diff options
56 files changed, 4169 insertions, 2638 deletions
diff --git a/usr/src/cmd/dfs.cmds/sharemgr/commands.c b/usr/src/cmd/dfs.cmds/sharemgr/commands.c index 79be97f149..06b8fb54f0 100644 --- a/usr/src/cmd/dfs.cmds/sharemgr/commands.c +++ b/usr/src/cmd/dfs.cmds/sharemgr/commands.c @@ -22,7 +22,11 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + */ + +/* * Copyright 2012 Milan Jurik. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2019, Joyent, Inc. */ @@ -2167,7 +2171,6 @@ static void show_group(sa_group_t group, int verbose, int properties, char *proto, char *subgroup) { - sa_share_t share; char *groupname; char *zfs = NULL; int iszfs = 0; @@ -2175,6 +2178,8 @@ show_group(sa_group_t group, int verbose, int properties, char *proto, groupname = sa_get_group_attr(group, "name"); if (groupname != NULL) { + sa_share_t share; + if (proto != NULL && !has_protocol(group, proto)) { sa_free_attr_string(groupname); return; @@ -2191,7 +2196,7 @@ show_group(sa_group_t group, int verbose, int properties, char *proto, iszfs = 1; sa_free_attr_string(zfs); } - share = sa_get_share(group, NULL); + if (subgroup == NULL) (void) printf("%s", groupname); else diff --git a/usr/src/cmd/fs.d/nfs/Makefile b/usr/src/cmd/fs.d/nfs/Makefile index 7f9ae26ae2..d18ddec4dd 100644 --- a/usr/src/cmd/fs.d/nfs/Makefile +++ b/usr/src/cmd/fs.d/nfs/Makefile @@ -23,6 +23,8 @@ # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# Copyright 2018 Nexenta Systems, Inc. All rights reserved. +# # cmd/fs.d/nfs/Makefile # # cmd/fs.d/nfs is the directory of all nfs specific commands @@ -33,7 +35,7 @@ include $(SRC)/Makefile.master SUBDIR1= exportfs nfsd rquotad \ statd nfsstat mountd dfshares \ - nfsfind nfs4cbd share tests + nfsfind nfs4cbd share tests dtrace # These do "make catalog" SUBDIR2= clear_locks lockd umount showmount \ @@ -52,14 +54,13 @@ all:= TARGET= all install:= TARGET= install clean:= TARGET= clean clobber:= TARGET= clobber -lint:= TARGET= lint catalog:= TARGET= catalog .KEEP_STATE: .PARALLEL: $(SUBDIRS) -all install clean clobber lint: $(SUBDIRS) +all install clean clobber: $(SUBDIRS) catalog: $(SUBDIR2) $(RM) $(POFILE) diff --git a/usr/src/cmd/fs.d/nfs/dtrace/Makefile b/usr/src/cmd/fs.d/nfs/dtrace/Makefile new file mode 100644 index 0000000000..0882ae8b9d --- /dev/null +++ b/usr/src/cmd/fs.d/nfs/dtrace/Makefile @@ -0,0 +1,37 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Nexenta Systems, Inc. All rights reserved. +# + +SRCS=nfs-trace.d nfs-time.d + +include $(SRC)/cmd/Makefile.cmd + +ROOTNFSDTRACEDIR = $(ROOTLIB)/nfs/dtrace +ROOTNFSDTRACEFILE = $(SRCS:%=$(ROOTNFSDTRACEDIR)/%) + +$(ROOTNFSDTRACEFILE):= FILEMODE = 0555 + +$(ROOTNFSDTRACEDIR): + $(INS.dir) + +$(ROOTNFSDTRACEDIR)/%: % + $(INS.file) + +all: + +clean: + +include $(SRC)/cmd/Makefile.targ + +install: all $(ROOTNFSDTRACEDIR) .WAIT $(ROOTNFSDTRACEFILE) diff --git a/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d b/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d new file mode 100755 index 0000000000..7d7c33d153 --- /dev/null +++ b/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d @@ -0,0 +1,78 @@ +#!/usr/sbin/dtrace -s + +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Quantize the time spent in each NFSv3 andf NFSv4 operation, + * optionally for a specified client, share and zone. + * + * Usage: nfs-time.d [<client ip>|all [<share path>|all] [<zone id>]]] + * + * example: nfs_time.d 192.168.123.1 /mypool/fs1 0 + * + * It is valid to specify <client ip> or <share path> as "all" + * to quantize data for all clients and/or all shares. + * Omitting <zone id> will quantize data for all zones. + */ + +#pragma D option flowindent +#pragma D option defaultargs + +dtrace:::BEGIN +{ + all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0; + all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0; + all_zones = ($$3 == NULL) ? 1 : 0; + + client = $$1; + share = $$2; + zoneid = $3; + + printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp, + (all_clients) ? "all" : client, + (all_shares) ? "all" : share, + (all_zones) ? "all" : $$3); +} + +nfsv3:::op-*-start, +nfsv4:::op-*-start +{ + self->ts[probefunc] = timestamp; +} + +nfsv3:::op-*-done, +nfsv4:::op-*-done +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + elapsed = (timestamp - self->ts[probefunc]); + @q[probefunc]=quantize(elapsed); +} + +tick-5s +{ + printa(@q); + /* + * uncomment "clear" to quantize per 5s interval + * rather than cumulative for duration of script. + * clear(@q); + */ +} + +dtrace:::END +{ +} diff --git a/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d b/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d new file mode 100755 index 0000000000..a89fed3424 --- /dev/null +++ b/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d @@ -0,0 +1,248 @@ +#!/usr/sbin/dtrace -s + +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Print input and output values for each NFSv3 andf NFSv4 operation, + * optionally for a specified client, share and zone. + * + * Usage: nfs-trace.d [<client ip>|all [<share path>|all] [<zone id>]]] + * + * example: nfs_trace.d 192.168.123.1 /mypool/fs1 0 + * + * It is valid to specify <client ip> or <share path> as "all" + * to quantize data for all clients and/or all shares. + * Omitting <zone id> will quantize data for all zones. + */ + +/* + * Unfortunately, trying to write this script using wildcards, for example: + * nfsv3:::op-*-start {} + * nfsv3:::op-*-done {} + * prints the operation-specific args[2] structure as the incorrect type. + * Until this is resolved it is necessary to explicitly list each operation. + * + * See nfs-time.d for an example of using the wildcard format when there are + * no operation-specific args (args[2]) being traced. + */ + +#pragma D option flowindent +#pragma D option defaultargs + +dtrace:::BEGIN +{ + all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0; + all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0; + all_zones = ($$3 == NULL) ? 1 : 0; + + client = $$1; + share = $$2; + zoneid = $3; + + printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp, + (all_clients) ? "all" : client, + (all_shares) ? "all" : share, + (all_zones) ? "all" : $$3); +} + +nfsv3:::op-getattr-start, +nfsv3:::op-setattr-start, +nfsv3:::op-lookup-start, +nfsv3:::op-access-start, +nfsv3:::op-commit-start, +nfsv3:::op-create-start, +nfsv3:::op-fsinfo-start, +nfsv3:::op-fsstat-start, +nfsv3:::op-link-start, +nfsv3:::op-mkdir-start, +nfsv3:::op-mknod-start, +nfsv3:::op-pathconf-start, +nfsv3:::op-read-start, +nfsv3:::op-readdir-start, +nfsv3:::op-readdirplus-start, +nfsv3:::op-readlink-start, +nfsv3:::op-remove-start, +nfsv3:::op-rename-start, +nfsv3:::op-rmdir-start, +nfsv3:::op-symlink-start, +nfsv3:::op-write-start +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + printf("\n"); + print(*args[0]); + printf("\n"); + print(*args[1]); + printf("\n"); + print(*args[2]); + printf("\n"); +} + +nfsv3:::op-getattr-done, +nfsv3:::op-setattr-done, +nfsv3:::op-lookup-done, +nfsv3:::op-access-done, +nfsv3:::op-commit-done, +nfsv3:::op-create-done, +nfsv3:::op-fsinfo-done, +nfsv3:::op-fsstat-done, +nfsv3:::op-link-done, +nfsv3:::op-mkdir-done, +nfsv3:::op-mknod-done, +nfsv3:::op-pathconf-done, +nfsv3:::op-read-done, +nfsv3:::op-readdir-done, +nfsv3:::op-readdirplus-done, +nfsv3:::op-readlink-done, +nfsv3:::op-remove-done, +nfsv3:::op-rename-done, +nfsv3:::op-rmdir-done, +nfsv3:::op-symlink-done, +nfsv3:::op-write-done +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + /* + printf("\n"); + print(*args[0]); + printf("\n"); + print(*args[1]); + */ + printf("\n"); + print(*args[2]); + printf("\n"); +} + +nfsv4:::op-access-start, +nfsv4:::op-close-start, +nfsv4:::op-commit-start, +nfsv4:::op-create-start, +nfsv4:::op-delegpurge-start, +nfsv4:::op-delegreturn-start, +nfsv4:::op-getattr-start, +nfsv4:::op-link-start, +nfsv4:::op-lock-start, +nfsv4:::op-lockt-start, +nfsv4:::op-locku-start, +nfsv4:::op-lookup-start, +nfsv4:::op-nverify-start, +nfsv4:::op-open-start, +nfsv4:::op-open-confirm-start, +nfsv4:::op-open-downgrade-start, +nfsv4:::op-openattr-start, +nfsv4:::op-putfh-start, +nfsv4:::op-read-start, +nfsv4:::op-readdir-start, +nfsv4:::op-release-lockowner-start, +nfsv4:::op-remove-start, +nfsv4:::op-rename-start, +nfsv4:::op-renew-start, +nfsv4:::op-secinfo-start, +nfsv4:::op-setattr-start, +nfsv4:::op-setclientid-start, +nfsv4:::op-setclientid-confirm-start, +nfsv4:::op-verify-start, +nfsv4:::op-write-start +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + printf("\n"); + print(*args[0]); + printf("\n"); + print(*args[1]); + printf("\n"); + print(*args[2]); + printf("\n"); +} + +/* These operations do not have args[2] */ +nfsv4:::op-getfh-start, +nfsv4:::op-lookupp-start, +nfsv4:::op-putpubfh-start, +nfsv4:::op-putrootfh-start, +nfsv4:::op-readlink-start, +nfsv4:::op-restorefh-start, +nfsv4:::op-savefh-start +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + printf("\n"); + print(*args[0]); + printf("\n"); + print(*args[1]); + printf("\n"); +} + + +nfsv4:::op-access-done, +nfsv4:::op-close-done, +nfsv4:::op-commit-done, +nfsv4:::op-create-done, +nfsv4:::op-delegpurge-done, +nfsv4:::op-delegreturn-done, +nfsv4:::op-getattr-done, +nfsv4:::op-getfh-done, +nfsv4:::op-link-done, +nfsv4:::op-lock-done, +nfsv4:::op-lockt-done, +nfsv4:::op-locku-done, +nfsv4:::op-lookup-done, +nfsv4:::op-lookupp-done, +nfsv4:::op-nverify-done, +nfsv4:::op-open-done, +nfsv4:::op-open-confirm-done, +nfsv4:::op-open-downgrade-done, +nfsv4:::op-openattr-done, +nfsv4:::op-putfh-done, +nfsv4:::op-putpubfh-done, +nfsv4:::op-putrootfh-done, +nfsv4:::op-read-done, +nfsv4:::op-readdir-done, +nfsv4:::op-readlink-done, +nfsv4:::op-release-lockowner-done, +nfsv4:::op-remove-done, +nfsv4:::op-rename-done, +nfsv4:::op-renew-done, +nfsv4:::op-restorefh-done, +nfsv4:::op-savefh-done, +nfsv4:::op-secinfo-done, +nfsv4:::op-setattr-done, +nfsv4:::op-setclientid-done, +nfsv4:::op-setclientid-confirm-done, +nfsv4:::op-verify-done, +nfsv4:::op-write-done +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->noi_shrpath == share)) && + ((all_zones) || (args[1]->noi_zoneid == zoneid)) / +{ + /* + printf("\n"); + print(*args[0]); + printf("\n"); + print(*args[1]); + */ + printf("\n"); + print(*args[2]); + printf("\n"); +} + +dtrace:::END +{ +} diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server index 11a54fea8a..bf8e73be98 100644 --- a/usr/src/cmd/fs.d/nfs/svc/nfs-server +++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server @@ -22,8 +22,8 @@ # # Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> +# Copyright 2018 Nexenta Systems, Inc. All rights reserved. # # Start/stop processes required for server NFS @@ -53,7 +53,7 @@ configure_ipfilter() # # Nothing to do if: - # - ipfilter isn't online + # - ipfilter isn't online # - global policy is 'custom' # - service's policy is 'use_global' # @@ -66,14 +66,6 @@ configure_ipfilter() case "$1" in 'start') - # The NFS server is not supported in a local zone - if smf_is_nonglobalzone; then - /usr/sbin/svcadm disable -t svc:/network/nfs/server - echo "The NFS server is not supported in a local zone" - sleep 5 & - exit $SMF_EXIT_OK - fi - # Share all file systems enabled for sharing. sharemgr understands # regular shares and ZFS shares and will handle both. Technically, # the shares would have been started long before getting here since diff --git a/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml b/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml index b5165e7ad5..957ff00856 100644 --- a/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml +++ b/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml @@ -22,6 +22,8 @@ Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2012, 2015 by Delphix. All rights reserved. + Copyright 2018 Nexenta Systems, Inc. All rights reserved. + Copyright 2019 Nexenta by DDN, Inc. All rights reserved. NOTE: This service manifest is not editable; its contents will be overwritten by package or patch operations, including @@ -65,7 +67,7 @@ grouping='require_all' restart_on='error' type='service'> - <service_fmri value='svc:/system/filesystem/minimal' /> + <service_fmri value='svc:/system/filesystem/minimal' /> </dependency> <exec_method @@ -93,7 +95,7 @@ </property_group> <instance name='default' enabled='false'> <property_group name='nfs-props' type='com.oracle.nfs,props'> - <propval name='grace_period' type='integer' value='90'/> + <propval name='grace_period' type='integer' value='60'/> <propval name='lockd_listen_backlog' type='integer' value='32'/> <propval name='lockd_retransmit_timeout' type='integer' value='5'/> <propval name='lockd_servers' type='integer' value='256'/> diff --git a/usr/src/cmd/smbsrv/dtrace/smb-trace.d b/usr/src/cmd/smbsrv/dtrace/smb-trace.d index 32162dca2b..40b3cba219 100644 --- a/usr/src/cmd/smbsrv/dtrace/smb-trace.d +++ b/usr/src/cmd/smbsrv/dtrace/smb-trace.d @@ -1,3 +1,4 @@ +#!/usr/sbin/dtrace -s /* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. @@ -10,7 +11,7 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* @@ -24,9 +25,38 @@ * args[2] smb_name_args_t * args[2] smb_open_args_t * args[2] smb_rw_args_t + * + * Usage: smb-trace.d [<client ip>|all [<share path>|all] [<zone id>]]] + * + * example: smb_trace.d 192.168.012.001 mypool_fs1 0 + * + * It is valid to specify <client ip> or <share path> as "all" to + * print data for all clients and/or all shares. + * Omitting <zone id> will print data for all zones. */ +#pragma D option defaultargs + +dtrace:::BEGIN +{ + all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0; + all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0; + all_zones = ($$3 == NULL) ? 1 : 0; + + client = $$1; + share = $$2; + zoneid = $3; + + printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp, + (all_clients) ? "all" : client, + (all_shares) ? "all" : share, + (all_zones) ? "all" : $$3); +} + smb:::op-*-start +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->soi_share == share)) && + ((all_zones) || (args[1]->soi_zoneid == zoneid)) / { printf("clnt=%s mid=0x%x uid=0x%x tid=0x%x\n", args[0]->ci_remote, @@ -36,9 +66,16 @@ smb:::op-*-start } smb:::op-*-done +/ ((all_clients) || (args[0]->ci_remote == client)) && + ((all_shares) || (args[1]->soi_share == share)) && + ((all_zones) || (args[1]->soi_zoneid == zoneid)) / { printf("clnt=%s mid=0x%x status=0x%x\n", args[0]->ci_remote, args[1]->soi_mid, args[1]->soi_status); } + +dtrace:::END +{ +} diff --git a/usr/src/cmd/smbsrv/dtrace/smb2-trace.d b/usr/src/cmd/smbsrv/dtrace/smb2-trace.d index 4e83216785..409d878dea 100644 --- a/usr/src/cmd/smbsrv/dtrace/smb2-trace.d +++ b/usr/src/cmd/smbsrv/dtrace/smb2-trace.d @@ -1,3 +1,4 @@ +#!/usr/sbin/dtrace -s /* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. @@ -10,7 +11,7 @@ */ /* - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* @@ -23,9 +24,38 @@ * Some also provide one of: (not used here) * args[2] smb_open_args_t * args[2] smb_rw_args_t + * + * Usage: smb2-trace.d [<client ip>|all [<share path>|all] [<zone id>]]] + * + * example: smb2_trace.d 192.168.012.001 mypool_fs1 0 + * + * It is valid to specify <client ip> or <share path> as "all" to + * print data for all clients and/or all shares. + * Omitting <zone id> will print data for all zones. */ +#pragma D option defaultargs + +dtrace:::BEGIN +{ + all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0; + all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0; + all_zones = ($$3 == NULL) ? 1 : 0; + + client = $$1; + share = $$2; + zoneid = $3; + + printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp, + (all_clients) ? "all" : client, + (all_shares) ? "all" : share, + (all_zones) ? "all" : $$3); +} + smb2:::op-*-start +/ ((all_clients == 1) || (args[0]->ci_remote == client)) && + ((all_shares == 1) || (args[1]->soi_share == share)) && + ((all_zones == 1) || (args[1]->soi_zoneid == zoneid)) / { printf("clnt=%s mid=0x%x uid=0x%x tid=0x%x\n", args[0]->ci_remote, @@ -35,9 +65,16 @@ smb2:::op-*-start } smb2:::op-*-done +/ ((all_clients == 1) || (args[0]->ci_remote == client)) && + ((all_shares == 1) || (args[1]->soi_share == share)) && + ((all_zones == 1) || (args[1]->soi_zoneid == zoneid)) / { printf("clnt=%s mid=0x%x status=0x%x\n", args[0]->ci_remote, args[1]->soi_mid, args[1]->soi_status); } + +dtrace:::END +{ +} diff --git a/usr/src/lib/brand/ipkg/zone/platform.xml b/usr/src/lib/brand/ipkg/zone/platform.xml index af417212f0..4075fe10d2 100644 --- a/usr/src/lib/brand/ipkg/zone/platform.xml +++ b/usr/src/lib/brand/ipkg/zone/platform.xml @@ -20,8 +20,8 @@ CDDL HEADER END - Copyright 2011 Nexenta Systems, Inc. All rights reserved. Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright 2018 Nexenta Systems, Inc. DO NOT EDIT THIS FILE. --> @@ -41,6 +41,7 @@ <mount special="mnttab" directory="/etc/mnttab" type="mntfs" /> <mount special="objfs" directory="/system/object" type="objfs" /> <mount special="swap" directory="/etc/svc/volatile" type="tmpfs" /> + <mount special="sharefs" directory="/etc/dfs/sharetab" type="sharefs" /> <!-- Devices to create under /dev --> <device match="arp" /> diff --git a/usr/src/lib/libdtrace/common/nfs.d b/usr/src/lib/libdtrace/common/nfs.d index d14c706aca..7a97cd5e6d 100644 --- a/usr/src/lib/libdtrace/common/nfs.d +++ b/usr/src/lib/libdtrace/common/nfs.d @@ -23,6 +23,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + */ #pragma D depends_on library ip.d #pragma D depends_on library net.d @@ -35,6 +38,8 @@ typedef struct nfsv4opinfo { uint64_t noi_xid; /* unique transation ID */ cred_t *noi_cred; /* credentials for operation */ string noi_curpath; /* current file handle path (if any) */ + string noi_shrpath; /* current share path */ + zoneid_t noi_zoneid; /* zone identifier */ } nfsv4opinfo_t; typedef struct nfsv4cbinfo { @@ -101,12 +106,17 @@ translator nfsv4opinfo_t < struct compound_state *P > { noi_xid = P->req->rq_xprt->xp_xid; noi_cred = P->basecr; noi_curpath = (P->vp == NULL) ? "<unknown>" : P->vp->v_path; + noi_shrpath = (P->exi == NULL || P->exi->exi_export.ex_path == NULL) ? + "<unknown>" : P->exi->exi_export.ex_path; + noi_zoneid = (P->exi == NULL) ? -1 : P->exi->exi_zoneid; }; typedef struct nfsv3opinfo { uint64_t noi_xid; /* unique transation ID */ cred_t *noi_cred; /* credentials for operation */ string noi_curpath; /* current file handle path (if any) */ + string noi_shrpath; /* current share path */ + zoneid_t noi_zoneid; /* zone identifier */ } nfsv3opinfo_t; typedef struct nfsv3oparg nfsv3oparg_t; @@ -117,4 +127,9 @@ translator nfsv3opinfo_t < nfsv3oparg_t *P > { noi_cred = (cred_t *)arg1; noi_curpath = (arg2 == 0 || ((vnode_t *)arg2)->v_path == NULL) ? "<unknown>" : ((vnode_t *)arg2)->v_path; + noi_shrpath = + (arg3 == 0 || ((exportinfo_t *)arg3)->exi_export.ex_path == NULL) ? + "<unknown>" : ((exportinfo_t *)arg3)->exi_export.ex_path; + noi_zoneid = + (arg3 == 0) ? -1 : ((exportinfo_t *)arg3)->exi_zoneid; }; diff --git a/usr/src/lib/libdtrace/common/smb.d b/usr/src/lib/libdtrace/common/smb.d index c58cb4bf1c..9b2f8bdc1f 100644 --- a/usr/src/lib/libdtrace/common/smb.d +++ b/usr/src/lib/libdtrace/common/smb.d @@ -23,7 +23,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ #pragma D depends_on library ip.d @@ -58,6 +58,7 @@ typedef struct smbopinfo { uint16_t soi_fid; /* file id */ uint16_t soi_flags2; /* flags2 */ uint8_t soi_flags; /* flags */ + zoneid_t soi_zoneid; /* zone identifier */ } smbopinfo_t; #pragma D binding "1.5" translator @@ -72,6 +73,7 @@ translator smbopinfo_t < struct smb_request *P > { soi_fid = P->smb_fid; soi_flags2 = P->smb_flg2; soi_flags = P->smb_flg; + soi_zoneid = P->sr_server->sv_zid; soi_share = (P->tid_tree == NULL) ? "<NULL>" : P->tid_tree->t_sharename; @@ -137,6 +139,7 @@ typedef struct smb2opinfo { uint32_t soi_tid; /* tree ID */ uint32_t soi_status; uint32_t soi_flags; + zoneid_t soi_zoneid; /* zone identifier */ } smb2opinfo_t; #pragma D binding "1.5" translator @@ -149,6 +152,7 @@ translator smb2opinfo_t < struct smb_request *P > { soi_tid = P->smb_tid; soi_status = P->smb2_status; soi_flags = P->smb2_hdr_flags; + soi_zoneid = P->sr_server->sv_zid; soi_share = (P->tid_tree == NULL) ? "<NULL>" : P->tid_tree->t_sharename; diff --git a/usr/src/lib/libshare/common/libshare_zfs.c b/usr/src/lib/libshare/common/libshare_zfs.c index a4fda8c424..0db6a56cd2 100644 --- a/usr/src/lib/libshare/common/libshare_zfs.c +++ b/usr/src/lib/libshare/common/libshare_zfs.c @@ -22,10 +22,11 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ + /* - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2016 by Delphix. All rights reserved. * Copyright 2017 RackTop Systems. + * Copyright 2019 Nexenta Systems, Inc. */ #include <stdio.h> @@ -33,6 +34,7 @@ #include <string.h> #include <strings.h> #include <errno.h> +#include <zone.h> #include <libshare.h> #include "libshare_impl.h" #include <libintl.h> @@ -238,6 +240,7 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen, { FILE *fp; struct mnttab entry; + int rc = 1; if ((fp = fopen(MNTTAB, "r")) == NULL) { return (1); @@ -256,11 +259,12 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen, if (dlen > 0) (void) strlcpy(dataset, entry.mnt_special, dlen); + rc = 0; break; } } (void) fclose(fp); - return (1); + return (rc); } @@ -817,6 +821,13 @@ sa_get_zfs_share_common(sa_handle_t handle, zfs_handle_t *fs_handle, char *path, if (!zfs_is_mounted(fs_handle, NULL)) return (SA_SYSTEM_ERR); + /* + * Ignore "zoned" datasets in global zone. + */ + if (getzoneid() == GLOBAL_ZONEID && + zfs_prop_get_int(fs_handle, ZFS_PROP_ZONED)) + return (SA_SYSTEM_ERR); + nfs = nfs_inherited = B_FALSE; if (zfs_prop_get(fs_handle, ZFS_PROP_SHARENFS, nfsshareopts, @@ -977,6 +988,29 @@ sa_get_zfs_shares(sa_handle_t handle, char *groupname) } /* + * Initializes shares for only the dataset specified fs_handle. + * This is used as a performance optimization relative to sa_get_zfs_shares. + */ +int +sa_get_zfs_share(sa_handle_t handle, char *groupname, zfs_handle_t *fs_handle) +{ + sa_group_t zfsgroup; + libzfs_handle_t *zfs_libhandle; + int err; + + if ((err = prep_zfs_handle_and_group(handle, groupname, &zfs_libhandle, + &zfsgroup, &err)) != SA_OK) { + return (err); + } + /* Not an error, this could be a legacy condition */ + if (zfsgroup == NULL) + return (SA_OK); + + err = sa_get_zfs_share_common(handle, fs_handle, NULL, zfsgroup); + return (err); +} + +/* * Initializes only the handles specified in the sharearg for use with libshare. * This is used as a performance optimization relative to sa_get_zfs_shares. */ diff --git a/usr/src/lib/libshare/common/mapfile-vers b/usr/src/lib/libshare/common/mapfile-vers index c661589eda..48a62e0bfd 100644 --- a/usr/src/lib/libshare/common/mapfile-vers +++ b/usr/src/lib/libshare/common/mapfile-vers @@ -21,7 +21,7 @@ # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2016 by Delphix. All rights reserved. -# Copyright 2018 Nexenta Systems, Inc. All rights reserved. +# Copyright 2019 Nexenta Systems, Inc. All rights reserved. # # @@ -152,7 +152,7 @@ SYMBOL_VERSION SUNWprivate { sa_proto_delete_section; sa_needs_refresh; sa_get_zfs_handle; - sa_zfs_process_share; + sa_get_zfs_share; sa_update_sharetab_ts; sa_zfs_setprop; local: diff --git a/usr/src/lib/libshare/nfs/libshare_nfs.c b/usr/src/lib/libshare/nfs/libshare_nfs.c index 484244e6ba..9f6acce19f 100644 --- a/usr/src/lib/libshare/nfs/libshare_nfs.c +++ b/usr/src/lib/libshare/nfs/libshare_nfs.c @@ -21,19 +21,19 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2014, 2016 by Delphix. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ /* * NFS specific functions */ + #include <stdio.h> #include <string.h> #include <ctype.h> #include <stdlib.h> #include <unistd.h> -#include <zone.h> #include <errno.h> #include <locale.h> #include <signal.h> @@ -1905,12 +1905,7 @@ nfs_enable_share(sa_share_t share) sa_free_attr_string(sectype); } } - /* - * when we get here, we can do the exportfs system call and - * initiate things. We probably want to enable the - * svc:/network/nfs/server service first if it isn't running. - */ - /* check svc:/network/nfs/server status and start if needed */ + /* now add the share to the internal tables */ printarg(path, &export); /* @@ -1920,52 +1915,17 @@ nfs_enable_share(sa_share_t share) if (iszfs) { struct exportfs_args ea; share_t sh; - char *str; - priv_set_t *priv_effective; - int privileged; - - /* - * If we aren't a privileged user - * and NFS server service isn't running - * then print out an error message - * and return EPERM - */ - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - - if (!privileged && - (str = smf_get_state(NFS_SERVER_SVC)) != NULL) { - err = 0; - if (strcmp(str, SCF_STATE_STRING_ONLINE) != 0) { - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share remote " - "filesystem: %s\n"), path); - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Service needs to be enabled " - "by a privileged user\n")); - err = SA_SYSTEM_ERR; - errno = EPERM; - } - free(str); - } - - if (err == 0) { - ea.dname = path; - ea.uex = &export; + ea.dname = path; + ea.uex = &export; - (void) sa_sharetab_fill_zfs(share, &sh, "nfs"); - err = sa_share_zfs(share, NULL, path, &sh, - &ea, ZFS_SHARE_NFS); - if (err != SA_OK) { - errno = err; - err = -1; - } - sa_emptyshare(&sh); + (void) sa_sharetab_fill_zfs(share, &sh, "nfs"); + err = sa_share_zfs(share, NULL, path, &sh, &ea, ZFS_SHARE_NFS); + if (err != SA_OK) { + errno = err; + err = -1; } + sa_emptyshare(&sh); } else { err = exportfs(path, &export); } @@ -1973,20 +1933,7 @@ nfs_enable_share(sa_share_t share) if (err < 0) { err = SA_SYSTEM_ERR; switch (errno) { - case EREMOTE: - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share filesystems " - "in non-global zones: %s\n"), path); - err = SA_NOT_SUPPORTED; - break; case EPERM: - if (getzoneid() != GLOBAL_ZONEID) { - (void) printf(dgettext(TEXT_DOMAIN, - "NFS: Cannot share file systems " - "in non-global zones: %s\n"), path); - err = SA_NOT_SUPPORTED; - break; - } err = SA_NO_PERMISSION; break; case EEXIST: @@ -2098,9 +2045,6 @@ nfs_disable_share(sa_share_t share, char *path) case EPERM: case EACCES: ret = SA_NO_PERMISSION; - if (getzoneid() != GLOBAL_ZONEID) { - ret = SA_NOT_SUPPORTED; - } break; case EINVAL: case ENOENT: diff --git a/usr/src/lib/libshare/smb/libshare_smb.c b/usr/src/lib/libshare/smb/libshare_smb.c index f567e7818b..da14ea94c3 100644 --- a/usr/src/lib/libshare/smb/libshare_smb.c +++ b/usr/src/lib/libshare/smb/libshare_smb.c @@ -391,9 +391,7 @@ smb_enable_share(sa_share_t share) smb_share_t si; sa_resource_t resource; boolean_t iszfs; - boolean_t privileged; int err = SA_OK; - priv_set_t *priv_effective; boolean_t online; /* @@ -405,11 +403,6 @@ smb_enable_share(sa_share_t share) return (SA_NOT_SUPPORTED); } - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - /* get the path since it is important in several places */ path = sa_get_share_attr(share, "path"); if (path == NULL) @@ -424,29 +417,7 @@ smb_enable_share(sa_share_t share) iszfs = sa_path_is_zfs(path); - if (iszfs) { - - if (privileged == B_FALSE && !online) { - - if (!online) { - (void) printf(dgettext(TEXT_DOMAIN, - "SMB: Cannot share remove " - "file system: %s\n"), path); - (void) printf(dgettext(TEXT_DOMAIN, - "SMB: Service needs to be enabled " - "by a privileged user\n")); - err = SA_NO_PERMISSION; - errno = EPERM; - } - if (err) { - sa_free_attr_string(path); - return (err); - } - - } - } - - if (privileged == B_TRUE && !online) { + if (!online) { err = smb_enable_service(); if (err != SA_OK) { (void) printf(dgettext(TEXT_DOMAIN, diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 6810679055..4e5ef6f292 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -21,14 +21,17 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. + */ + +/* + * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] - * Copyright 2017 Nexenta Systems, Inc. + * Copyright 2018 Nexenta Systems, Inc. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2017-2018 RackTop Systems. */ @@ -1311,8 +1314,8 @@ badlabel: * global zone non-global zone * -------------------------------------------------- * zoned=on mountpoint (no) mountpoint (yes) - * sharenfs (no) sharenfs (no) - * sharesmb (no) sharesmb (no) + * sharenfs (no) sharenfs (yes) + * sharesmb (no) sharesmb (yes) * * zoned=off mountpoint (yes) N/A * sharenfs (yes) @@ -1327,14 +1330,6 @@ badlabel: (void) zfs_error(hdl, EZFS_ZONED, errbuf); goto error; - } else if (prop == ZFS_PROP_SHARENFS || - prop == ZFS_PROP_SHARESMB) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' cannot be set in " - "a non-global zone"), propname); - (void) zfs_error(hdl, EZFS_ZONED, - errbuf); - goto error; } } else if (getzoneid() != GLOBAL_ZONEID) { /* diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index aea93fb91e..faa750f121 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -20,9 +20,12 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2017 by Delphix. All rights reserved. + */ + +/* + * Copyright 2019 Nexenta Systems, Inc. + * Copyright (c) 2014, 2016 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright 2017 Joyent, Inc. * Copyright 2017 RackTop Systems. @@ -621,8 +624,7 @@ static char *(*_sa_errorstr)(int); static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *); static boolean_t (*_sa_needs_refresh)(sa_handle_t *); static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t); -static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t, - char *, char *, zprop_source_t, char *, char *, char *); +static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *); static void (*_sa_update_sharetab_ts)(sa_handle_t); /* @@ -670,9 +672,8 @@ _zfs_init_libshare(void) dlsym(libshare, "sa_needs_refresh"); _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t)) dlsym(libshare, "sa_get_zfs_handle"); - _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t, - sa_share_t, char *, char *, zprop_source_t, char *, - char *, char *))dlsym(libshare, "sa_zfs_process_share"); + _sa_get_zfs_share = (int (*)(sa_handle_t, char *, + zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share"); _sa_update_sharetab_ts = (void (*)(sa_handle_t)) dlsym(libshare, "sa_update_sharetab_ts"); if (_sa_init == NULL || _sa_init_arg == NULL || @@ -680,7 +681,7 @@ _zfs_init_libshare(void) _sa_enable_share == NULL || _sa_disable_share == NULL || _sa_errorstr == NULL || _sa_parse_legacy_options == NULL || _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL || - _sa_zfs_process_share == NULL || _sa_service == NULL || + _sa_get_zfs_share == NULL || _sa_service == NULL || _sa_update_sharetab_ts == NULL) { _sa_init = NULL; _sa_init_arg = NULL; @@ -693,7 +694,7 @@ _zfs_init_libshare(void) (void) dlclose(libshare); _sa_needs_refresh = NULL; _sa_get_zfs_handle = NULL; - _sa_zfs_process_share = NULL; + _sa_get_zfs_share = NULL; _sa_update_sharetab_ts = NULL; } } @@ -880,30 +881,17 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) return (-1); } - /* - * If the 'zoned' property is set, then zfs_is_mountable() - * will have already bailed out if we are in the global zone. - * But local zones cannot be NFS servers, so we ignore it for - * local zones as well. - */ - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) - continue; - share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); if (share == NULL) { /* * This may be a new file system that was just - * created so isn't in the internal cache - * (second time through). Rather than - * reloading the entire configuration, we can - * assume ZFS has done the checking and it is - * safe to add this to the internal - * configuration. + * created so isn't in the internal cache. + * Rather than reloading the entire configuration, + * we can add just this one share to the cache. */ - if (_sa_zfs_process_share(hdl->libzfs_sharehdl, - NULL, NULL, mountpoint, - proto_table[*curr_proto].p_name, sourcetype, - shareopts, sourcestr, zhp->zfs_name) != SA_OK) { + if ((_sa_get_zfs_share == NULL) || + (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp) + != SA_OK)) { (void) zfs_error_fmt(hdl, proto_table[*curr_proto].p_share_err, dgettext(TEXT_DOMAIN, "cannot share '%s'"), diff --git a/usr/src/pkg/manifests/service-file-system-nfs.mf b/usr/src/pkg/manifests/service-file-system-nfs.mf index 7d3a408bac..c5937467c8 100644 --- a/usr/src/pkg/manifests/service-file-system-nfs.mf +++ b/usr/src/pkg/manifests/service-file-system-nfs.mf @@ -21,7 +21,7 @@ # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright 2018 Nexenta Systems, Inc. All rights reserved. # set name=pkg.fmri value=pkg:/service/file-system/nfs@$(PKGVERS) @@ -48,6 +48,7 @@ dir path=usr/lib/fs group=sys dir path=usr/lib/fs/nfs group=sys dir path=usr/lib/fs/nfs/$(ARCH64) group=sys dir path=usr/lib/nfs group=sys +dir path=usr/lib/nfs/dtrace group=bin dir path=usr/lib/reparse group=bin dir path=usr/lib/reparse/$(ARCH64) group=sys dir path=usr/sbin @@ -65,6 +66,8 @@ file path=lib/svc/manifest/network/nfs/server.xml group=sys mode=0444 file path=lib/svc/method/nfs-server mode=0555 file path=usr/lib/fs/nfs/$(ARCH64)/libshare_nfs.so.1 file path=usr/lib/fs/nfs/libshare_nfs.so.1 +file path=usr/lib/nfs/dtrace/nfs-time.d mode=0555 +file path=usr/lib/nfs/dtrace/nfs-trace.d mode=0555 file path=usr/lib/nfs/mountd mode=0555 file path=usr/lib/nfs/nfsd mode=0555 file path=usr/lib/nfs/nfslogd mode=0555 diff --git a/usr/src/uts/common/dtrace/sdt_subr.c b/usr/src/uts/common/dtrace/sdt_subr.c index 76f403673b..61e66681b7 100644 --- a/usr/src/uts/common/dtrace/sdt_subr.c +++ b/usr/src/uts/common/dtrace/sdt_subr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ #include <sys/sdt_impl.h> @@ -98,6 +98,14 @@ static dtrace_pattr_t iscsi_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, }; +static dtrace_pattr_t nfs_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, +}; + static dtrace_pattr_t smb_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, @@ -120,8 +128,8 @@ sdt_provider_t sdt_providers[] = { { "mib", "__mib_", &stab_attr }, { "fsinfo", "__fsinfo_", &fsinfo_attr }, { "iscsi", "__iscsi_", &iscsi_attr }, - { "nfsv3", "__nfsv3_", &stab_attr }, - { "nfsv4", "__nfsv4_", &stab_attr }, + { "nfsv3", "__nfsv3_", &nfs_attr }, + { "nfsv4", "__nfsv4_", &nfs_attr }, { "smb", "__smb_", &smb_attr }, { "smb2", "__smb2_", &smb_attr }, { "xpv", "__xpv_", &xpv_attr }, @@ -267,627 +275,490 @@ sdt_argdesc_t sdt_args[] = { { "iscsi", "xfer-done", 7, 6, "uint32_t"}, { "iscsi", "xfer-done", 8, 7, "int"}, - { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-getattr-start", 2, 3, "GETATTR3args *" }, - { "nfsv3", "op-getattr-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-getattr-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-getattr-done", 2, 3, "GETATTR3res *" }, - { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *" }, - { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *" }, - { "nfsv3", "op-lookup-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-lookup-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-lookup-start", 2, 3, "LOOKUP3args *" }, - { "nfsv3", "op-lookup-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-lookup-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-lookup-done", 2, 3, "LOOKUP3res *" }, - { "nfsv3", "op-access-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-access-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-access-start", 2, 3, "ACCESS3args *" }, - { "nfsv3", "op-access-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-access-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-access-done", 2, 3, "ACCESS3res *" }, - { "nfsv3", "op-commit-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-commit-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-commit-start", 2, 3, "COMMIT3args *" }, - { "nfsv3", "op-commit-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-commit-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-commit-done", 2, 3, "COMMIT3res *" }, - { "nfsv3", "op-create-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-create-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-create-start", 2, 3, "CREATE3args *" }, - { "nfsv3", "op-create-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-create-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-create-done", 2, 3, "CREATE3res *" }, - { "nfsv3", "op-fsinfo-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-fsinfo-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-fsinfo-start", 2, 3, "FSINFO3args *" }, - { "nfsv3", "op-fsinfo-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-fsinfo-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-fsinfo-done", 2, 3, "FSINFO3res *" }, - { "nfsv3", "op-fsstat-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-fsstat-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-fsstat-start", 2, 3, "FSSTAT3args *" }, - { "nfsv3", "op-fsstat-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-fsstat-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-fsstat-done", 2, 3, "FSSTAT3res *" }, - { "nfsv3", "op-link-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-link-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-link-start", 2, 3, "LINK3args *" }, - { "nfsv3", "op-link-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-link-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-link-done", 2, 3, "LINK3res *" }, - { "nfsv3", "op-mkdir-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-mkdir-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-mkdir-start", 2, 3, "MKDIR3args *" }, - { "nfsv3", "op-mkdir-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-mkdir-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-mkdir-done", 2, 3, "MKDIR3res *" }, - { "nfsv3", "op-mknod-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-mknod-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-mknod-start", 2, 3, "MKNOD3args *" }, - { "nfsv3", "op-mknod-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-mknod-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-mknod-done", 2, 3, "MKNOD3res *" }, - { "nfsv3", "op-null-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-null-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-null-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-null-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-pathconf-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-pathconf-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-pathconf-start", 2, 3, "PATHCONF3args *" }, - { "nfsv3", "op-pathconf-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-pathconf-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-pathconf-done", 2, 3, "PATHCONF3res *" }, - { "nfsv3", "op-read-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-read-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-read-start", 2, 3, "READ3args *" }, - { "nfsv3", "op-read-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-read-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-read-done", 2, 3, "READ3res *" }, - { "nfsv3", "op-readdir-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readdir-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readdir-start", 2, 3, "READDIR3args *" }, - { "nfsv3", "op-readdir-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readdir-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readdir-done", 2, 3, "READDIR3res *" }, - { "nfsv3", "op-readdirplus-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readdirplus-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readdirplus-start", 2, 3, "READDIRPLUS3args *" }, - { "nfsv3", "op-readdirplus-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readdirplus-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readdirplus-done", 2, 3, "READDIRPLUS3res *" }, - { "nfsv3", "op-readlink-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readlink-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readlink-start", 2, 3, "READLINK3args *" }, - { "nfsv3", "op-readlink-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-readlink-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-readlink-done", 2, 3, "READLINK3res *" }, - { "nfsv3", "op-remove-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-remove-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-remove-start", 2, 3, "REMOVE3args *" }, - { "nfsv3", "op-remove-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-remove-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-remove-done", 2, 3, "REMOVE3res *" }, - { "nfsv3", "op-rename-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-rename-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-rename-start", 2, 3, "RENAME3args *" }, - { "nfsv3", "op-rename-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-rename-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-rename-done", 2, 3, "RENAME3res *" }, - { "nfsv3", "op-rmdir-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-rmdir-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-rmdir-start", 2, 3, "RMDIR3args *" }, - { "nfsv3", "op-rmdir-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-rmdir-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-rmdir-done", 2, 3, "RMDIR3res *" }, - { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *" }, - { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *" }, - { "nfsv3", "op-symlink-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-symlink-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-symlink-start", 2, 3, "SYMLINK3args *" }, - { "nfsv3", "op-symlink-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-symlink-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-symlink-done", 2, 3, "SYMLINK3res *" }, - { "nfsv3", "op-write-start", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-write-start", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-write-start", 2, 3, "WRITE3args *" }, - { "nfsv3", "op-write-done", 0, 0, "struct svc_req *", - "conninfo_t *" }, - { "nfsv3", "op-write-done", 1, 1, "nfsv3oparg_t *", - "nfsv3opinfo_t *" }, - { "nfsv3", "op-write-done", 2, 3, "WRITE3res *" }, + /* Tables like this get really ugly when line-wrapped. */ + /* BEGIN CSTYLED */ + { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-getattr-start", 2, 4, "GETATTR3args *" }, + + { "nfsv3", "op-getattr-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-getattr-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-getattr-done", 2, 4, "GETATTR3res *" }, + + { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-start", 2, 4, "SETATTR3args *" }, + + { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-done", 2, 4, "SETATTR3res *" }, + + { "nfsv3", "op-lookup-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-lookup-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-lookup-start", 2, 4, "LOOKUP3args *" }, + + { "nfsv3", "op-lookup-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-lookup-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-lookup-done", 2, 4, "LOOKUP3res *" }, + + { "nfsv3", "op-access-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-access-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-access-start", 2, 4, "ACCESS3args *" }, + + { "nfsv3", "op-access-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-access-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-access-done", 2, 4, "ACCESS3res *" }, + + { "nfsv3", "op-commit-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-commit-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-commit-start", 2, 4, "COMMIT3args *" }, + + { "nfsv3", "op-commit-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-commit-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-commit-done", 2, 4, "COMMIT3res *" }, + + { "nfsv3", "op-create-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-create-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-create-start", 2, 4, "CREATE3args *" }, + + { "nfsv3", "op-create-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-create-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-create-done", 2, 4, "CREATE3res *" }, + + { "nfsv3", "op-fsinfo-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-fsinfo-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsinfo-start", 2, 4, "FSINFO3args *" }, + + { "nfsv3", "op-fsinfo-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-fsinfo-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsinfo-done", 2, 4, "FSINFO3res *" }, + + { "nfsv3", "op-fsstat-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-fsstat-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsstat-start", 2, 4, "FSSTAT3args *" }, + + { "nfsv3", "op-fsstat-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-fsstat-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsstat-done", 2, 4, "FSSTAT3res *" }, + + { "nfsv3", "op-link-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-link-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-link-start", 2, 4, "LINK3args *" }, + + { "nfsv3", "op-link-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-link-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-link-done", 2, 4, "LINK3res *" }, + + { "nfsv3", "op-mkdir-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-mkdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-mkdir-start", 2, 4, "MKDIR3args *" }, + + { "nfsv3", "op-mkdir-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-mkdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-mkdir-done", 2, 4, "MKDIR3res *" }, + + { "nfsv3", "op-mknod-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-mknod-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-mknod-start", 2, 4, "MKNOD3args *" }, + + { "nfsv3", "op-mknod-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-mknod-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-mknod-done", 2, 4, "MKNOD3res *" }, + + { "nfsv3", "op-null-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-null-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + + { "nfsv3", "op-null-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-null-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + + { "nfsv3", "op-pathconf-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-pathconf-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-pathconf-start", 2, 4, "PATHCONF3args *" }, + + { "nfsv3", "op-pathconf-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-pathconf-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-pathconf-done", 2, 4, "PATHCONF3res *" }, + + { "nfsv3", "op-read-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-read-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-read-start", 2, 4, "READ3args *" }, + + { "nfsv3", "op-read-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-read-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-read-done", 2, 4, "READ3res *" }, + + { "nfsv3", "op-readdir-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdir-start", 2, 4, "READDIR3args *" }, + + { "nfsv3", "op-readdir-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdir-done", 2, 4, "READDIR3res *" }, + + { "nfsv3", "op-readdirplus-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readdirplus-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdirplus-start", 2, 4, "READDIRPLUS3args *" }, + + { "nfsv3", "op-readdirplus-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readdirplus-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdirplus-done", 2, 4, "READDIRPLUS3res *" }, + + { "nfsv3", "op-readlink-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readlink-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readlink-start", 2, 4, "READLINK3args *" }, + + { "nfsv3", "op-readlink-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-readlink-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-readlink-done", 2, 4, "READLINK3res *" }, + + { "nfsv3", "op-remove-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-remove-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-remove-start", 2, 4, "REMOVE3args *" }, + + { "nfsv3", "op-remove-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-remove-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-remove-done", 2, 4, "REMOVE3res *" }, + + { "nfsv3", "op-rename-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-rename-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-rename-start", 2, 4, "RENAME3args *" }, + + { "nfsv3", "op-rename-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-rename-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-rename-done", 2, 4, "RENAME3res *" }, + + { "nfsv3", "op-rmdir-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-rmdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-rmdir-start", 2, 4, "RMDIR3args *" }, + + { "nfsv3", "op-rmdir-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-rmdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-rmdir-done", 2, 4, "RMDIR3res *" }, + + { "nfsv3", "op-symlink-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-symlink-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-symlink-start", 2, 4, "SYMLINK3args *" }, + + { "nfsv3", "op-symlink-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-symlink-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-symlink-done", 2, 4, "SYMLINK3res *" }, + + { "nfsv3", "op-write-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-write-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-write-start", 2, 4, "WRITE3args *" }, + + { "nfsv3", "op-write-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv3", "op-write-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" }, + { "nfsv3", "op-write-done", 2, 4, "WRITE3res *" }, { "nfsv4", "null-start", 0, 0, "struct svc_req *", "conninfo_t *" }, { "nfsv4", "null-done", 0, 0, "struct svc_req *", "conninfo_t *" }, - { "nfsv4", "compound-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "compound-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "compound-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "compound-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "compound-start", 2, 1, "COMPOUND4args *" }, - { "nfsv4", "compound-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "compound-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "compound-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "compound-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "compound-done", 2, 1, "COMPOUND4res *" }, - { "nfsv4", "op-access-start", 0, 0, "struct compound_state *", - "conninfo_t *"}, - { "nfsv4", "op-access-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-access-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-access-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-access-start", 2, 1, "ACCESS4args *" }, - { "nfsv4", "op-access-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-access-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-access-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-access-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-access-done", 2, 1, "ACCESS4res *" }, - { "nfsv4", "op-close-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-close-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-close-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-close-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-close-start", 2, 1, "CLOSE4args *" }, - { "nfsv4", "op-close-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-close-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-close-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-close-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-close-done", 2, 1, "CLOSE4res *" }, - { "nfsv4", "op-commit-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-commit-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-commit-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-commit-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-commit-start", 2, 1, "COMMIT4args *" }, - { "nfsv4", "op-commit-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-commit-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-commit-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-commit-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-commit-done", 2, 1, "COMMIT4res *" }, - { "nfsv4", "op-create-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-create-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-create-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-create-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-create-start", 2, 1, "CREATE4args *" }, - { "nfsv4", "op-create-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-create-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-create-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-create-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-create-done", 2, 1, "CREATE4res *" }, - { "nfsv4", "op-delegpurge-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-delegpurge-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-delegpurge-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-delegpurge-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-delegpurge-start", 2, 1, "DELEGPURGE4args *" }, - { "nfsv4", "op-delegpurge-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-delegpurge-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-delegpurge-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-delegpurge-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-delegpurge-done", 2, 1, "DELEGPURGE4res *" }, - { "nfsv4", "op-delegreturn-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-delegreturn-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-delegreturn-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-delegreturn-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-delegreturn-start", 2, 1, "DELEGRETURN4args *" }, - { "nfsv4", "op-delegreturn-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-delegreturn-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-delegreturn-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-delegreturn-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-delegreturn-done", 2, 1, "DELEGRETURN4res *" }, - { "nfsv4", "op-getattr-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-getattr-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-getattr-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-getattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-getattr-start", 2, 1, "GETATTR4args *" }, - { "nfsv4", "op-getattr-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-getattr-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-getattr-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-getattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-getattr-done", 2, 1, "GETATTR4res *" }, - { "nfsv4", "op-getfh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-getfh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-getfh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-getfh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-getfh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-getfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-getfh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-getfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-getfh-done", 2, 1, "GETFH4res *" }, - { "nfsv4", "op-link-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-link-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-link-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-link-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-link-start", 2, 1, "LINK4args *" }, - { "nfsv4", "op-link-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-link-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-link-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-link-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-link-done", 2, 1, "LINK4res *" }, - { "nfsv4", "op-lock-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lock-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lock-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lock-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lock-start", 2, 1, "LOCK4args *" }, - { "nfsv4", "op-lock-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lock-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lock-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lock-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lock-done", 2, 1, "LOCK4res *" }, - { "nfsv4", "op-lockt-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lockt-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lockt-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lockt-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lockt-start", 2, 1, "LOCKT4args *" }, - { "nfsv4", "op-lockt-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lockt-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lockt-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lockt-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lockt-done", 2, 1, "LOCKT4res *" }, - { "nfsv4", "op-locku-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-locku-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-locku-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-locku-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-locku-start", 2, 1, "LOCKU4args *" }, - { "nfsv4", "op-locku-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-locku-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-locku-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-locku-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-locku-done", 2, 1, "LOCKU4res *" }, - { "nfsv4", "op-lookup-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lookup-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lookup-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lookup-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lookup-start", 2, 1, "LOOKUP4args *" }, - { "nfsv4", "op-lookup-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lookup-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lookup-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lookup-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lookup-done", 2, 1, "LOOKUP4res *" }, - { "nfsv4", "op-lookupp-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lookupp-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-lookupp-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-lookupp-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lookupp-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lookupp-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-lookupp-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-lookupp-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-lookupp-done", 2, 1, "LOOKUPP4res *" }, - { "nfsv4", "op-nverify-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-nverify-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-nverify-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-nverify-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-nverify-start", 2, 1, "NVERIFY4args *" }, - { "nfsv4", "op-nverify-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-nverify-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-nverify-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-nverify-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-nverify-done", 2, 1, "NVERIFY4res *" }, - { "nfsv4", "op-open-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-start", 2, 1, "OPEN4args *" }, - { "nfsv4", "op-open-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-done", 2, 1, "OPEN4res *" }, - { "nfsv4", "op-open-confirm-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-confirm-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-confirm-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-confirm-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-confirm-start", 2, 1, "OPEN_CONFIRM4args *" }, - { "nfsv4", "op-open-confirm-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-confirm-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-confirm-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-confirm-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-confirm-done", 2, 1, "OPEN_CONFIRM4res *" }, - { "nfsv4", "op-open-downgrade-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-downgrade-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-downgrade-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-downgrade-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-downgrade-start", 2, 1, "OPEN_DOWNGRADE4args *" }, - { "nfsv4", "op-open-downgrade-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-open-downgrade-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-open-downgrade-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-open-downgrade-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-open-downgrade-done", 2, 1, "OPEN_DOWNGRADE4res *" }, - { "nfsv4", "op-openattr-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-openattr-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-openattr-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-openattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-openattr-start", 2, 1, "OPENATTR4args *" }, - { "nfsv4", "op-openattr-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-openattr-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-openattr-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-openattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-openattr-done", 2, 1, "OPENATTR4res *" }, - { "nfsv4", "op-putfh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putfh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putfh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-putfh-start", 2, 1, "PUTFH4args *" }, - { "nfsv4", "op-putfh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putfh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putfh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-putfh-done", 2, 1, "PUTFH4res *" }, - { "nfsv4", "op-putpubfh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putpubfh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-putpubfh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putpubfh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putpubfh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putpubfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putpubfh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putpubfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-putpubfh-done", 2, 1, "PUTPUBFH4res *" }, - { "nfsv4", "op-putrootfh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putrootfh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-putrootfh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-putrootfh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putrootfh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putrootfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-putrootfh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-putrootfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-putrootfh-done", 2, 1, "PUTROOTFH4res *" }, - { "nfsv4", "op-read-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-read-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-read-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-read-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-read-start", 2, 1, "READ4args *" }, - { "nfsv4", "op-read-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-read-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-read-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-read-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-read-done", 2, 1, "READ4res *" }, - { "nfsv4", "op-readdir-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-readdir-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-readdir-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-readdir-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-readdir-start", 2, 1, "READDIR4args *" }, - { "nfsv4", "op-readdir-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-readdir-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-readdir-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-readdir-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-readdir-done", 2, 1, "READDIR4res *" }, - { "nfsv4", "op-readlink-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-readlink-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-readlink-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-readlink-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-readlink-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-readlink-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-readlink-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-readlink-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-readlink-done", 2, 1, "READLINK4res *" }, - { "nfsv4", "op-release-lockowner-start", 0, 0, - "struct compound_state *", "conninfo_t *" }, - { "nfsv4", "op-release-lockowner-start", 1, 0, - "struct compound_state *", "nfsv4opinfo_t *" }, - { "nfsv4", "op-release-lockowner-start", 2, 1, - "RELEASE_LOCKOWNER4args *" }, - { "nfsv4", "op-release-lockowner-done", 0, 0, - "struct compound_state *", "conninfo_t *" }, - { "nfsv4", "op-release-lockowner-done", 1, 0, - "struct compound_state *", "nfsv4opinfo_t *" }, - { "nfsv4", "op-release-lockowner-done", 2, 1, - "RELEASE_LOCKOWNER4res *" }, - { "nfsv4", "op-remove-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-remove-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-release-lockowner-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-release-lockowner-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-release-lockowner-start", 2, 1, "RELEASE_LOCKOWNER4args *" }, + + { "nfsv4", "op-release-lockowner-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-release-lockowner-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-release-lockowner-done", 2, 1, "RELEASE_LOCKOWNER4res *" }, + + { "nfsv4", "op-remove-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-remove-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-remove-start", 2, 1, "REMOVE4args *" }, - { "nfsv4", "op-remove-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-remove-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-remove-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-remove-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-remove-done", 2, 1, "REMOVE4res *" }, - { "nfsv4", "op-rename-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-rename-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-rename-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-rename-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-rename-start", 2, 1, "RENAME4args *" }, - { "nfsv4", "op-rename-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-rename-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-rename-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-rename-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-rename-done", 2, 1, "RENAME4res *" }, - { "nfsv4", "op-renew-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-renew-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-renew-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-renew-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-renew-start", 2, 1, "RENEW4args *" }, - { "nfsv4", "op-renew-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-renew-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-renew-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-renew-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-renew-done", 2, 1, "RENEW4res *" }, - { "nfsv4", "op-restorefh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-restorefh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-restorefh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-restorefh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-restorefh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-restorefh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-restorefh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-restorefh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-restorefh-done", 2, 1, "RESTOREFH4res *" }, - { "nfsv4", "op-savefh-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-savefh-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, - { "nfsv4", "op-savefh-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-savefh-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-savefh-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-savefh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + + { "nfsv4", "op-savefh-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-savefh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-savefh-done", 2, 1, "SAVEFH4res *" }, - { "nfsv4", "op-secinfo-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-secinfo-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-secinfo-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-secinfo-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-secinfo-start", 2, 1, "SECINFO4args *" }, - { "nfsv4", "op-secinfo-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-secinfo-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-secinfo-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-secinfo-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-secinfo-done", 2, 1, "SECINFO4res *" }, - { "nfsv4", "op-setattr-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-setattr-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-setattr-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-setattr-start", 2, 1, "SETATTR4args *" }, - { "nfsv4", "op-setattr-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-setattr-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-setattr-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-setattr-done", 2, 1, "SETATTR4res *" }, - { "nfsv4", "op-setclientid-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-setclientid-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-setclientid-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-setclientid-start", 2, 1, "SETCLIENTID4args *" }, - { "nfsv4", "op-setclientid-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-setclientid-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-setclientid-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-setclientid-done", 2, 1, "SETCLIENTID4res *" }, - { "nfsv4", "op-setclientid-confirm-start", 0, 0, - "struct compound_state *", "conninfo_t *" }, - { "nfsv4", "op-setclientid-confirm-start", 1, 0, - "struct compound_state *", "nfsv4opinfo_t *" }, - { "nfsv4", "op-setclientid-confirm-start", 2, 1, - "SETCLIENTID_CONFIRM4args *" }, - { "nfsv4", "op-setclientid-confirm-done", 0, 0, - "struct compound_state *", "conninfo_t *" }, - { "nfsv4", "op-setclientid-confirm-done", 1, 0, - "struct compound_state *", "nfsv4opinfo_t *" }, - { "nfsv4", "op-setclientid-confirm-done", 2, 1, - "SETCLIENTID_CONFIRM4res *" }, - { "nfsv4", "op-verify-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-verify-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-setclientid-confirm-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-confirm-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-confirm-start", 2, 1, "SETCLIENTID_CONFIRM4args *" }, + + { "nfsv4", "op-setclientid-confirm-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-confirm-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-confirm-done", 2, 1, "SETCLIENTID_CONFIRM4res *" }, + + { "nfsv4", "op-verify-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-verify-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-verify-start", 2, 1, "VERIFY4args *" }, - { "nfsv4", "op-verify-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-verify-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-verify-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-verify-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-verify-done", 2, 1, "VERIFY4res *" }, - { "nfsv4", "op-write-start", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-write-start", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-write-start", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-write-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-write-start", 2, 1, "WRITE4args *" }, - { "nfsv4", "op-write-done", 0, 0, "struct compound_state *", - "conninfo_t *" }, - { "nfsv4", "op-write-done", 1, 0, "struct compound_state *", - "nfsv4opinfo_t *" }, + + { "nfsv4", "op-write-done", 0, 0, "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-write-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" }, { "nfsv4", "op-write-done", 2, 1, "WRITE4res *" }, - { "nfsv4", "cb-recall-start", 0, 0, "rfs4_client_t *", - "conninfo_t *" }, - { "nfsv4", "cb-recall-start", 1, 1, "rfs4_deleg_state_t *", - "nfsv4cbinfo_t *" }, + + { "nfsv4", "cb-recall-start", 0, 0, "rfs4_client_t *", "conninfo_t *" }, + { "nfsv4", "cb-recall-start", 1, 1, "rfs4_deleg_state_t *", "nfsv4cbinfo_t *" }, { "nfsv4", "cb-recall-start", 2, 2, "CB_RECALL4args *" }, - { "nfsv4", "cb-recall-done", 0, 0, "rfs4_client_t *", - "conninfo_t *" }, - { "nfsv4", "cb-recall-done", 1, 1, "rfs4_deleg_state_t *", - "nfsv4cbinfo_t *" }, + + { "nfsv4", "cb-recall-done", 0, 0, "rfs4_client_t *", "conninfo_t *" }, + { "nfsv4", "cb-recall-done", 1, 1, "rfs4_deleg_state_t *", "nfsv4cbinfo_t *" }, { "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" }, - /* Tables like this get really ugly when line-wrapped. */ - /* BEGIN CSTYLED */ { "smb", "op-Close-start", 0, 0, "smb_request_t *", "conninfo_t *" }, { "smb", "op-Close-start", 1, 0, "smb_request_t *", "smbopinfo_t *" }, { "smb", "op-Close-done", 0, 0, "smb_request_t *", "conninfo_t *" }, diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c index 7f5f4611b3..89d65a4d0f 100644 --- a/usr/src/uts/common/fs/nfs/nfs3_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -28,6 +28,7 @@ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ + #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> @@ -68,13 +69,18 @@ #include <inet/ip6.h> /* + * Zone global variables of NFSv3 server + */ +typedef struct nfs3_srv { + writeverf3 write3verf; +} nfs3_srv_t; + +/* * These are the interface routines for the server side of the * Network File System. See the NFS version 3 protocol specification * for a description of this interface. */ -static writeverf3 write3verf; - static int sattr3_to_vattr(sattr3 *, struct vattr *); static int vattr_to_fattr3(struct vattr *, fattr3 *); static int vattr_to_wcc_attr(struct vattr *, wcc_attr *); @@ -86,6 +92,15 @@ extern int nfs_loaned_buffers; u_longlong_t nfs3_srv_caller_id; +static nfs3_srv_t * +nfs3_get_srv(void) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + nfs3_srv_t *srv = ng->nfs3_srv; + ASSERT(srv != NULL); + return (srv); +} + /* ARGSUSED */ void rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, @@ -97,8 +112,9 @@ rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object, exi); - DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, GETATTR3args *, args); + DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + GETATTR3args *, args); if (vp == NULL) { error = ESTALE; @@ -119,8 +135,9 @@ rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi, goto out; resp->status = NFS3_OK; - DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp); + DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + GETATTR3res *, resp); VN_RELE(vp); @@ -134,8 +151,9 @@ out: } else resp->status = puterrno3(error); - DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp); + DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + GETATTR3res *, resp); if (vp != NULL) VN_RELE(vp); @@ -168,8 +186,9 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object, exi); - DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, SETATTR3args *, args); + DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + SETATTR3args *, args); if (vp == NULL) { error = ESTALE; @@ -330,8 +349,9 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi, resp->status = NFS3_OK; vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc); - DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp); + DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + SETATTR3res *, resp); VN_RELE(vp); @@ -344,8 +364,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp); + DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + SETATTR3res *, resp); if (vp != NULL) { if (in_crit) @@ -390,16 +411,19 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { - dvp = rootdir; + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); + dvp = ZONE_ROOTVP(); VN_HOLD(dvp); - DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args); + DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + LOOKUP3args *, args); } else { dvp = nfs3_fhtovp(&args->what.dir, exi); - DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args); + DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + LOOKUP3args *, args); if (dvp == NULL) { error = ESTALE; @@ -421,10 +445,11 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, } fhp = &args->what.dir; + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */ if (strcmp(args->what.name, "..") == 0 && EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) { if ((exi->exi_export.ex_flags & EX_NOHIDE) && - (dvp->v_flag & VROOT)) { + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) { /* * special case for ".." and 'nohide'exported root */ @@ -455,6 +480,7 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, publicfh_flag = TRUE; exi_rele(exi); + exi = NULL; error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, &sec); @@ -538,7 +564,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, va.va_mask = AT_ALL; vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va; - exi_rele(exi); VN_RELE(vp); resp->status = NFS3_OK; @@ -553,9 +578,11 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi, if (auth_weak) resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR; - DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp); + DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + LOOKUP3res *, resp); VN_RELE(dvp); + exi_rele(exi); return; @@ -566,12 +593,13 @@ out: } else resp->status = puterrno3(error); out1: + DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + LOOKUP3res *, resp); + if (exi != NULL) exi_rele(exi); - DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp); - if (dvp != NULL) VN_RELE(dvp); vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes); @@ -603,8 +631,9 @@ rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object, exi); - DTRACE_NFSV3_4(op__access__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, ACCESS3args *, args); + DTRACE_NFSV3_5(op__access__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + ACCESS3args *, args); if (vp == NULL) { error = ESTALE; @@ -714,8 +743,9 @@ rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi, resp->status = NFS3_OK; vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); - DTRACE_NFSV3_4(op__access__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp); + DTRACE_NFSV3_5(op__access__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + ACCESS3res *, resp); VN_RELE(vp); @@ -727,8 +757,9 @@ out: resp->status = NFS3ERR_JUKEBOX; } else resp->status = puterrno3(error); - DTRACE_NFSV3_4(op__access__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp); + DTRACE_NFSV3_5(op__access__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + ACCESS3res *, resp); if (vp != NULL) VN_RELE(vp); vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes); @@ -761,8 +792,9 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->symlink, exi); - DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READLINK3args *, args); + DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READLINK3args *, args); if (vp == NULL) { error = ESTALE; @@ -811,10 +843,11 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, if (is_referral) { char *s; size_t strsz; + kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3]; /* Get an artificial symlink based on a referral */ s = build_symlink(vp, cr, &strsz); - global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++; + stat[NFS_REFERLINKS].value.ui64++; DTRACE_PROBE2(nfs3serv__func__referral__reflink, vnode_t *, vp, char *, s); if (s == NULL) @@ -882,8 +915,9 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi, vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes); resp->resok.data = name; - DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READLINK3res *, resp); + DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READLINK3res *, resp); VN_RELE(vp); if (name != data) @@ -898,8 +932,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READLINK3res *, resp); + DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READLINK3res *, resp); if (vp != NULL) VN_RELE(vp); vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes); @@ -949,8 +984,10 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->file, exi); - DTRACE_NFSV3_4(op__read__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READ3args *, args); + DTRACE_NFSV3_5(op__read__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READ3args *, args); + if (vp == NULL) { error = ESTALE; @@ -1205,8 +1242,9 @@ doio_read: } done: - DTRACE_NFSV3_4(op__read__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READ3res *, resp); + DTRACE_NFSV3_5(op__read__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READ3res *, resp); VN_RELE(vp); @@ -1222,8 +1260,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__read__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READ3res *, resp); + DTRACE_NFSV3_5(op__read__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READ3res *, resp); if (vp != NULL) { if (need_rwunlock) @@ -1268,6 +1307,7 @@ void rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { + nfs3_srv_t *ns; int error; vnode_t *vp; struct vattr *bvap = NULL; @@ -1288,14 +1328,18 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->file, exi); - DTRACE_NFSV3_4(op__write__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, WRITE3args *, args); + DTRACE_NFSV3_5(op__write__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + WRITE3args *, args); if (vp == NULL) { error = ESTALE; goto err; } + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */ + ns = nfs3_get_srv(); + if (is_system_labeled()) { bslabel_t *clabel = req->rq_label; @@ -1383,7 +1427,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); resp->resok.count = 0; resp->resok.committed = args->stable; - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; goto out; } @@ -1485,7 +1529,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi, vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); resp->resok.count = args->count - uio.uio_resid; resp->resok.committed = args->stable; - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; goto out; err: @@ -1497,8 +1541,9 @@ err: err1: vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc); out: - DTRACE_NFSV3_4(op__write__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, WRITE3res *, resp); + DTRACE_NFSV3_5(op__write__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + WRITE3res *, resp); if (vp != NULL) { if (rwlock_ret != -1) @@ -1543,8 +1588,9 @@ rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi, dvp = nfs3_fhtovp(&args->where.dir, exi); - DTRACE_NFSV3_4(op__create__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, CREATE3args *, args); + DTRACE_NFSV3_5(op__create__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + CREATE3args *, args); if (dvp == NULL) { error = ESTALE; @@ -1843,8 +1889,9 @@ tryagain: vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); - DTRACE_NFSV3_4(op__create__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp); + DTRACE_NFSV3_5(op__create__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + CREATE3res *, resp); VN_RELE(dvp); return; @@ -1856,8 +1903,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__create__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp); + DTRACE_NFSV3_5(op__create__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + CREATE3res *, resp); if (name != NULL && name != args->where.name) kmem_free(name, MAXPATHLEN + 1); @@ -1900,8 +1948,9 @@ rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi, dvp = nfs3_fhtovp(&args->where.dir, exi); - DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args); + DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKDIR3args *, args); if (dvp == NULL) { error = ESTALE; @@ -2000,8 +2049,9 @@ rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi, vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); - DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp); + DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKDIR3res *, resp); VN_RELE(dvp); return; @@ -2013,8 +2063,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp); + DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKDIR3res *, resp); if (dvp != NULL) VN_RELE(dvp); vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); @@ -2049,8 +2100,9 @@ rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi, dvp = nfs3_fhtovp(&args->where.dir, exi); - DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args); + DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + SYMLINK3args *, args); if (dvp == NULL) { error = ESTALE; @@ -2187,8 +2239,9 @@ out: if (symdata != NULL && symdata != args->symlink.symlink_data) kmem_free(symdata, MAXPATHLEN + 1); - DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp); + DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + SYMLINK3res *, resp); if (dvp != NULL) VN_RELE(dvp); @@ -2225,8 +2278,9 @@ rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi, dvp = nfs3_fhtovp(&args->where.dir, exi); - DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args); + DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKNOD3args *, args); if (dvp == NULL) { error = ESTALE; @@ -2372,8 +2426,9 @@ rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi, vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc); - DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp); + DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKNOD3res *, resp); VN_RELE(dvp); return; @@ -2384,8 +2439,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp); + DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi, + MKNOD3res *, resp); if (dvp != NULL) VN_RELE(dvp); vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc); @@ -2417,8 +2473,9 @@ rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object.dir, exi); - DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, REMOVE3args *, args); + DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + REMOVE3args *, args); if (vp == NULL) { error = ESTALE; @@ -2526,8 +2583,9 @@ err: err1: vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc); out: - DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp); + DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + REMOVE3res *, resp); if (name != NULL && name != args->object.name) kmem_free(name, MAXPATHLEN + 1); @@ -2561,8 +2619,9 @@ rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object.dir, exi); - DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, RMDIR3args *, args); + DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + RMDIR3args *, args); if (vp == NULL) { error = ESTALE; @@ -2618,7 +2677,8 @@ rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi, goto err1; } - error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0); + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); + error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0); if (name != args->object.name) kmem_free(name, MAXPATHLEN + 1); @@ -2656,8 +2716,9 @@ err: err1: vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc); out: - DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp); + DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + RMDIR3res *, resp); if (vp != NULL) VN_RELE(vp); @@ -2702,8 +2763,9 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, fvp = nfs3_fhtovp(&args->from.dir, exi); - DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, fvp, RENAME3args *, args); + DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi, + RENAME3args *, args); if (fvp == NULL) { error = ESTALE; @@ -2820,10 +2882,10 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi, } /* - * Check for renaming over a delegated file. Check rfs4_deleg_policy + * Check for renaming over a delegated file. Check nfs4_deleg_policy * first to avoid VOP_LOOKUP if possible. */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { @@ -2887,8 +2949,9 @@ out: if (toname != NULL && toname != args->to.name) kmem_free(toname, MAXPATHLEN + 1); - DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp); + DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi, + RENAME3res *, resp); if (fvp != NULL) VN_RELE(fvp); if (tvp != NULL) @@ -2928,8 +2991,9 @@ rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->file, exi); - DTRACE_NFSV3_4(op__link__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, LINK3args *, args); + DTRACE_NFSV3_5(op__link__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + LINK3args *, args); if (vp == NULL) { error = ESTALE; @@ -3041,8 +3105,9 @@ rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi, vattr_to_post_op_attr(vap, &resp->resok.file_attributes); vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc); - DTRACE_NFSV3_4(op__link__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, LINK3res *, resp); + DTRACE_NFSV3_5(op__link__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + LINK3res *, resp); VN_RELE(vp); @@ -3058,8 +3123,9 @@ out1: if (name != NULL && name != args->link.name) kmem_free(name, MAXPATHLEN + 1); - DTRACE_NFSV3_4(op__link__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, LINK3res *, resp); + DTRACE_NFSV3_5(op__link__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + LINK3res *, resp); if (vp != NULL) VN_RELE(vp); @@ -3127,8 +3193,9 @@ rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->dir, exi); - DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIR3args *, args); + DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIR3args *, args); if (vp == NULL) { error = ESTALE; @@ -3292,8 +3359,9 @@ rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi, resp->resok.count = args->count; resp->resok.freecount = count; - DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIR3res *, resp); + DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIR3res *, resp); VN_RELE(vp); @@ -3306,8 +3374,11 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIR3res *, resp); + vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes); + + DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIR3res *, resp); if (vp != NULL) { VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); @@ -3398,8 +3469,9 @@ rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp, vp = nfs3_fhtovp(&args->dir, exi); - DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args); + DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIRPLUS3args *, args); if (vp == NULL) { error = ESTALE; @@ -3681,11 +3753,9 @@ good: resp->resok.count = args->dircount - ret; resp->resok.maxcount = args->maxcount; - DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp); - if (ndata != data) - kmem_free(data, args->dircount); - + DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIRPLUS3res *, resp); VN_RELE(vp); @@ -3699,8 +3769,11 @@ out: resp->status = puterrno3(error); } out1: - DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp); + vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes); + + DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + READDIRPLUS3res *, resp); if (vp != NULL) { VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); @@ -3746,8 +3819,9 @@ rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->fsroot, exi); - DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args); + DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + FSSTAT3args *, args); if (vp == NULL) { error = ESTALE; @@ -3797,8 +3871,9 @@ rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi, resp->resok.afiles = (size3)sb.f_favail; resp->resok.invarsec = 0; - DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp); + DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + FSSTAT3res *, resp); VN_RELE(vp); return; @@ -3810,8 +3885,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp); + DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + FSSTAT3res *, resp); if (vp != NULL) VN_RELE(vp); @@ -3839,8 +3915,9 @@ rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->fsroot, exi); - DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, FSINFO3args *, args); + DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + FSINFO3args *, args); if (vp == NULL) { if (curthread->t_flag & T_WOULDBLOCK) { @@ -3914,16 +3991,18 @@ rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi, resp->resok.properties = FSF3_LINK | FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME; - DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp); + DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + FSINFO3res *, resp); VN_RELE(vp); return; out: - DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp); + DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi, + FSINFO3res *, resp); if (vp != NULL) VN_RELE(vp); } @@ -3949,8 +4028,9 @@ rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->object, exi); - DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args); + DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + PATHCONF3args *, args); if (vp == NULL) { error = ESTALE; @@ -4006,8 +4086,9 @@ rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi, vattr_to_post_op_attr(vap, &resp->resok.obj_attributes); resp->resok.info.case_insensitive = FALSE; resp->resok.info.case_preserving = TRUE; - DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp); + DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + PATHCONF3res *, resp); VN_RELE(vp); return; @@ -4018,8 +4099,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp); + DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + PATHCONF3res *, resp); if (vp != NULL) VN_RELE(vp); vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes); @@ -4036,6 +4118,7 @@ void rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { + nfs3_srv_t *ns; int error; vnode_t *vp; struct vattr *bvap; @@ -4048,14 +4131,17 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, vp = nfs3_fhtovp(&args->file, exi); - DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, COMMIT3args *, args); + DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + COMMIT3args *, args); if (vp == NULL) { error = ESTALE; goto out; } + ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */ + ns = nfs3_get_srv(); bva.va_mask = AT_ALL; error = VOP_GETATTR(vp, &bva, 0, cr, NULL); @@ -4108,10 +4194,11 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi, resp->status = NFS3_OK; vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc); - resp->resok.verf = write3verf; + resp->resok.verf = ns->write3verf; - DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp); + DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + COMMIT3res *, resp); VN_RELE(vp); @@ -4124,8 +4211,9 @@ out: } else resp->status = puterrno3(error); out1: - DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp); + DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi, + COMMIT3res *, resp); if (vp != NULL) VN_RELE(vp); @@ -4203,7 +4291,7 @@ sattr3_to_vattr(sattr3 *sap, struct vattr *vap) return (0); } -static ftype3 vt_to_nf3[] = { +static const ftype3 vt_to_nf3[] = { 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0 }; @@ -4285,20 +4373,39 @@ vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap) static void vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp) { - vattr_to_pre_op_attr(bvap, &wccp->before); vattr_to_post_op_attr(avap, &wccp->after); } +static int +rdma_setup_read_data3(READ3args *args, READ3resok *rok) +{ + struct clist *wcl; + int wlist_len; + count3 count = rok->count; + + wcl = args->wlist; + if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) + return (FALSE); + + wcl = args->wlist; + rok->wlist_len = wlist_len; + rok->wlist = wcl; + return (TRUE); +} + void -rfs3_srvrinit(void) +rfs3_srv_zone_init(nfs_globals_t *ng) { + nfs3_srv_t *ns; struct rfs3_verf_overlay { uint_t id; /* a "unique" identifier */ int ts; /* a unique timestamp */ } *verfp; timestruc_t now; + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); + /* * The following algorithm attempts to find a unique verifier * to be used as the write verifier returned from the server @@ -4322,37 +4429,34 @@ rfs3_srvrinit(void) * We ASSERT that this constant logic expression is * always true because in the past, it wasn't. */ - ASSERT(sizeof (*verfp) <= sizeof (write3verf)); + ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf)); #endif gethrestime(&now); - verfp = (struct rfs3_verf_overlay *)&write3verf; + verfp = (struct rfs3_verf_overlay *)&ns->write3verf; verfp->ts = (int)now.tv_sec; verfp->id = zone_get_hostid(NULL); if (verfp->id == 0) verfp->id = (uint_t)now.tv_nsec; - nfs3_srv_caller_id = fs_new_caller_id(); - + ng->nfs3_srv = ns; } -static int -rdma_setup_read_data3(READ3args *args, READ3resok *rok) +void +rfs3_srv_zone_fini(nfs_globals_t *ng) { - struct clist *wcl; - int wlist_len; - count3 count = rok->count; + nfs3_srv_t *ns = ng->nfs3_srv; - wcl = args->wlist; - if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) { - return (FALSE); - } + ng->nfs3_srv = NULL; - wcl = args->wlist; - rok->wlist_len = wlist_len; - rok->wlist = wcl; - return (TRUE); + kmem_free(ns, sizeof (*ns)); +} + +void +rfs3_srvrinit(void) +{ + nfs3_srv_caller_id = fs_new_caller_id(); } void diff --git a/usr/src/uts/common/fs/nfs/nfs4_callback.c b/usr/src/uts/common/fs/nfs/nfs4_callback.c index 729c8658fe..0d604fca70 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_callback.c +++ b/usr/src/uts/common/fs/nfs/nfs4_callback.c @@ -560,13 +560,17 @@ cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req, cs.cont = TRUE; /* - * Form a reply tag by copying over the reqeuest tag. + * Form a reply tag by copying over the request tag. */ resp->tag.utf8string_len = args->tag.utf8string_len; - resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len, - KM_SLEEP); - bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, - args->tag.utf8string_len); + if (args->tag.utf8string_len != 0) { + resp->tag.utf8string_val = + kmem_alloc(resp->tag.utf8string_len, KM_SLEEP); + bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, + args->tag.utf8string_len); + } else { + resp->tag.utf8string_val = NULL; + } /* * XXX for now, minorversion should be zero diff --git a/usr/src/uts/common/fs/nfs/nfs4_db.c b/usr/src/uts/common/fs/nfs/nfs4_db.c index fbecb86f64..b97567be70 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_db.c +++ b/usr/src/uts/common/fs/nfs/nfs4_db.c @@ -18,10 +18,15 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/systm.h> #include <sys/cmn_err.h> #include <sys/kmem.h> @@ -249,6 +254,50 @@ rfs4_database_destroy(rfs4_database_t *db) kmem_free(db, sizeof (rfs4_database_t)); } +/* + * Used to get the correct kmem_cache database for the state table being + * created. + * Helper function for rfs4_table_create + */ +static kmem_cache_t * +get_db_mem_cache(char *name) +{ + int i; + + for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) { + if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0) + return (rfs4_db_mem_cache_table[i].r_db_mem_cache); + } + /* + * There is no associated kmem cache for this NFS4 server state + * table name + */ + return (NULL); +} + +/* + * Used to initialize the global NFSv4 server state database. + * Helper funtion for rfs4_state_g_init and called when module is loaded. + */ +kmem_cache_t * +/* CSTYLED */ +nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx) +{ + kmem_cache_t *mem_cache = kmem_cache_create(cache_name, + sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, + 0, + rfs4_dbe_kmem_constructor, + rfs4_dbe_kmem_destructor, + NULL, + NULL, + NULL, + 0); + (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name, + strlen(cache_name) + 1); + rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache; + return (mem_cache); +} + rfs4_table_t * rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time, uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *), @@ -304,15 +353,11 @@ rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time, table->dbt_destroy = destroy; table->dbt_expiry = expiry; - table->dbt_mem_cache = kmem_cache_create(cache_name, - sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size, - 0, - rfs4_dbe_kmem_constructor, - rfs4_dbe_kmem_destructor, - NULL, - table, - NULL, - 0); + /* + * get the correct kmem_cache for this table type based on the name. + */ + table->dbt_mem_cache = get_db_mem_cache(cache_name); + kmem_free(cache_name, len+13); table->dbt_debug = db->db_debug_flags; @@ -364,7 +409,7 @@ rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table) kmem_free(table->dbt_name, strlen(table->dbt_name) + 1); if (table->dbt_id_space) id_space_destroy(table->dbt_id_space); - kmem_cache_destroy(table->dbt_mem_cache); + table->dbt_mem_cache = NULL; kmem_free(table, sizeof (rfs4_table_t)); } @@ -683,12 +728,14 @@ retry: boolean_t rfs4_cpr_callb(void *arg, int code) { - rfs4_table_t *table = rfs4_client_tab; rfs4_bucket_t *buckets, *bp; rfs4_link_t *l; rfs4_client_t *cp; int i; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + rfs4_table_t *table = nsrv4->rfs4_client_tab; + /* * We get called for Suspend and Resume events. * For the suspend case we simply don't care! Nor do we care if @@ -879,6 +926,7 @@ reaper_thread(caddr_t *arg) table->dbt_db->db_shutdown_count--; cv_signal(&table->dbt_db->db_shutdown_wait); mutex_exit(table->dbt_db->db_lock); + zthread_exit(); } static void @@ -887,7 +935,7 @@ rfs4_start_reaper(rfs4_table_t *table) if (table->dbt_max_cache_time == 0) return; - (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, reaper_thread, table, 0, minclsyspri); } diff --git a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c index fbff936e09..1fdfd0f601 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c +++ b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/systm.h> #include <sys/sdt.h> #include <rpc/types.h> @@ -39,11 +43,6 @@ #define NFS4_MAX_MINOR_VERSION 0 /* - * This is the duplicate request cache for NFSv4 - */ -rfs4_drc_t *nfs4_drc = NULL; - -/* * The default size of the duplicate request cache */ uint32_t nfs4_drc_max = 8 * 1024; @@ -94,12 +93,12 @@ rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size) * Destroy a duplicate request cache. */ void -rfs4_fini_drc(rfs4_drc_t *drc) +rfs4_fini_drc(void) { + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + rfs4_drc_t *drc = nsrv4->nfs4_drc; rfs4_dupreq_t *drp, *drp_next; - ASSERT(drc); - /* iterate over the dr_cache and free the enties */ for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { @@ -356,25 +355,25 @@ rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) * * Passed into this function are:- * - * disp A pointer to our dispatch table entry - * req The request to process - * xprt The server transport handle - * ap A pointer to the arguments + * disp A pointer to our dispatch table entry + * req The request to process + * xprt The server transport handle + * ap A pointer to the arguments * * * When appropriate this function is responsible for inserting * the reply into the duplicate cache or replaying an existing * cached reply. * - * dr_stat reflects the state of the duplicate request that - * has been inserted into or retrieved from the cache + * dr_stat reflects the state of the duplicate request that + * has been inserted into or retrieved from the cache * * drp is the duplicate request entry * */ int -rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, - SVCXPRT *xprt, char *ap) +rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, SVCXPRT *xprt, + char *ap) { COMPOUND4res res_buf; @@ -386,6 +385,8 @@ rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, int dr_stat = NFS4_NOT_DUP; rfs4_dupreq_t *drp = NULL; int rv; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + rfs4_drc_t *nfs4_drc = nsrv4->nfs4_drc; ASSERT(disp); @@ -544,13 +545,17 @@ rfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args) resp = &res_buf; /* - * Form a reply tag by copying over the reqeuest tag. + * Form a reply tag by copying over the request tag. */ - resp->tag.utf8string_val = - kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); resp->tag.utf8string_len = argsp->tag.utf8string_len; - bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val, - resp->tag.utf8string_len); + if (argsp->tag.utf8string_len != 0) { + resp->tag.utf8string_val = + kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); + bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val, + resp->tag.utf8string_len); + } else { + resp->tag.utf8string_val = NULL; + } resp->array_len = 0; resp->array = NULL; resp->status = NFS4ERR_MINOR_VERS_MISMATCH; @@ -575,11 +580,15 @@ rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp) /* * Form a reply tag by copying over the request tag. */ - rbp->tag.utf8string_val = - kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); rbp->tag.utf8string_len = argsp->tag.utf8string_len; - bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val, - rbp->tag.utf8string_len); + if (argsp->tag.utf8string_len != 0) { + rbp->tag.utf8string_val = + kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP); + bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val, + rbp->tag.utf8string_len); + } else { + rbp->tag.utf8string_val = NULL; + } rbp->array_len = 1; rbp->array = kmem_zalloc(rbp->array_len * sizeof (nfs_resop4), diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c index 30c45a71cd..757964eb84 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c @@ -20,9 +20,7 @@ */ /* - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. */ /* @@ -30,6 +28,12 @@ * All Rights Reserved */ +/* + * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright 2019 Nexenta Systems, Inc. + * Copyright 2019 Nexenta by DDN, Inc. + */ + #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> @@ -66,10 +70,12 @@ #include <rpc/svc.h> #include <nfs/nfs.h> +#include <nfs/nfssys.h> #include <nfs/export.h> #include <nfs/nfs_cmd.h> #include <nfs/lm.h> #include <nfs/nfs4.h> +#include <nfs/nfs4_drc.h> #include <sys/strsubr.h> #include <sys/strsun.h> @@ -114,8 +120,8 @@ static int rdma_setup_read_data4(READ4args *, READ4res *); * sizeof nfsstat4 (4 bytes) + * sizeof verifier4 (8 bytes) + * sizeof entry4list bool (4 bytes) + - * sizeof entry4 (36 bytes) + - * sizeof eof bool (4 bytes) + * sizeof entry4 (36 bytes) + + * sizeof eof bool (4 bytes) * * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to * VOP_READDIR. Its value is the size of the maximum possible dirent @@ -147,108 +153,105 @@ static int rdma_setup_read_data4(READ4args *, READ4res *); #define DIRENT64_TO_DIRCOUNT(dp) \ (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen)) -time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */ -static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ +static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */ u_longlong_t nfs4_srv_caller_id; uint_t nfs4_srv_vkey = 0; -verifier4 Write4verf; -verifier4 Readdir4verf; - void rfs4_init_compound_state(struct compound_state *); static void nullfree(caddr_t); static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_create_free(nfs_resop4 *resop); static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_getattr_free(nfs_resop4 *); static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_getfh_free(nfs_resop4 *); static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void lock_denied_free(nfs_resop4 *); static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, - struct svc_req *req, struct compound_state *cs); + struct svc_req *req, struct compound_state *cs); static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_read_free(nfs_resop4 *); static void rfs4_op_readdir_free(nfs_resop4 *resop); static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_readlink_free(nfs_resop4 *); static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *, - struct svc_req *, struct compound_state *); + struct svc_req *, struct compound_state *); static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *, - struct svc_req *req, struct compound_state *); + struct svc_req *req, struct compound_state *); static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *, - struct compound_state *); + struct compound_state *); static void rfs4_op_secinfo_free(nfs_resop4 *); -static nfsstat4 check_open_access(uint32_t, - struct compound_state *, struct svc_req *); -nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); -void rfs4_ss_clid(rfs4_client_t *); +static nfsstat4 check_open_access(uint32_t, struct compound_state *, + struct svc_req *); +nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *); +void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *); + /* * translation table for attrs @@ -262,19 +265,17 @@ struct nfs4_ntov_table { static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp); static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp, - struct nfs4_svgetit_arg *sargp); + struct nfs4_svgetit_arg *sargp); static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs, struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd); +static void hanfsv4_failover(nfs4_srv_t *); + fem_t *deleg_rdops; fem_t *deleg_wrops; -rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */ -kmutex_t rfs4_servinst_lock; /* protects linked list */ -int rfs4_seen_first_compound; /* set first time we see one */ - /* * NFS4 op dispatch table */ @@ -466,7 +467,7 @@ static char *rfs4_op_string[] = { }; #endif -void rfs4_ss_chkclid(rfs4_client_t *); +void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *); extern size_t strlcpy(char *dst, const char *src, size_t dstsize); @@ -499,13 +500,22 @@ static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = { NULL, NULL }; -int -rfs4_srvrinit(void) +nfs4_srv_t * +nfs4_get_srv(void) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + nfs4_srv_t *srv = ng->nfs4_srv; + ASSERT(srv != NULL); + return (srv); +} + +void +rfs4_srv_zone_init(nfs_globals_t *ng) { + nfs4_srv_t *nsrv4; timespec32_t verf; - int error; - extern void rfs4_attr_init(); - extern krwlock_t rfs4_deleg_policy_lock; + + nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP); /* * The following algorithm attempts to find a unique verifier @@ -535,61 +545,113 @@ rfs4_srvrinit(void) verf.tv_sec = (time_t)tverf.tv_sec; verf.tv_nsec = tverf.tv_nsec; } + nsrv4->write4verf = *(uint64_t *)&verf; + + /* Used to manage create/destroy of server state */ + nsrv4->nfs4_server_state = NULL; + nsrv4->nfs4_cur_servinst = NULL; + nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE; + mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL); + + ng->nfs4_srv = nsrv4; +} - Write4verf = *(uint64_t *)&verf; +void +rfs4_srv_zone_fini(nfs_globals_t *ng) +{ + nfs4_srv_t *nsrv4 = ng->nfs4_srv; - rfs4_attr_init(); - mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL); + ng->nfs4_srv = NULL; - /* Used to manage create/destroy of server state */ - mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_destroy(&nsrv4->deleg_lock); + mutex_destroy(&nsrv4->state_lock); + mutex_destroy(&nsrv4->servinst_lock); + rw_destroy(&nsrv4->deleg_policy_lock); - /* Used to manage access to server instance linked list */ - mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL); + kmem_free(nsrv4, sizeof (*nsrv4)); +} - /* Used to manage access to rfs4_deleg_policy */ - rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL); +void +rfs4_srvrinit(void) +{ + extern void rfs4_attr_init(); - error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops); - if (error != 0) { + rfs4_attr_init(); + + if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) { rfs4_disable_delegation(); - } else { - error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, - &deleg_wrops); - if (error != 0) { - rfs4_disable_delegation(); - fem_free(deleg_rdops); - } + } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl, + &deleg_wrops) != 0) { + rfs4_disable_delegation(); + fem_free(deleg_rdops); } nfs4_srv_caller_id = fs_new_caller_id(); - lockt_sysid = lm_alloc_sysidt(); - vsd_create(&nfs4_srv_vkey, NULL); - - return (0); + rfs4_state_g_init(); } void rfs4_srvrfini(void) { - extern krwlock_t rfs4_deleg_policy_lock; - if (lockt_sysid != LM_NOSYSID) { lm_free_sysidt(lockt_sysid); lockt_sysid = LM_NOSYSID; } - mutex_destroy(&rfs4_deleg_lock); - mutex_destroy(&rfs4_state_lock); - rw_destroy(&rfs4_deleg_policy_lock); + rfs4_state_g_fini(); fem_free(deleg_rdops); fem_free(deleg_wrops); } void +rfs4_do_server_start(int server_upordown, + int srv_delegation, int cluster_booted) +{ + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + + /* Is this a warm start? */ + if (server_upordown == NFS_SERVER_QUIESCED) { + cmn_err(CE_NOTE, "nfs4_srv: " + "server was previously quiesced; " + "existing NFSv4 state will be re-used"); + + /* + * HA-NFSv4: this is also the signal + * that a Resource Group failover has + * occurred. + */ + if (cluster_booted) + hanfsv4_failover(nsrv4); + } else { + /* Cold start */ + nsrv4->rfs4_start_time = 0; + rfs4_state_zone_init(nsrv4); + nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max, + nfs4_drc_hash); + + /* + * The nfsd service was started with the -s option + * we need to pull in any state from the paths indicated. + */ + if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { + /* read in the stable storage state from these paths */ + rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths, + rfs4_dss_newpaths); + } + } + + /* Check if delegation is to be enabled */ + if (srv_delegation != FALSE) + rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE); +} + +void rfs4_init_compound_state(struct compound_state *cs) { bzero(cs, sizeof (*cs)); @@ -652,34 +714,35 @@ rfs4_clnt_in_grace(rfs4_client_t *cp) * reset all currently active grace periods */ void -rfs4_grace_reset_all(void) +rfs4_grace_reset_all(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) if (rfs4_servinst_in_grace(sip)) rfs4_grace_start(sip); - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* * start any new instances' grace periods */ void -rfs4_grace_start_new(void) +rfs4_grace_start_new(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) if (rfs4_servinst_grace_new(sip)) rfs4_grace_start(sip); - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static rfs4_dss_path_t * -rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) +rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip, + char *path, unsigned index) { size_t len; rfs4_dss_path_t *dss_path; @@ -703,15 +766,15 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) * Add to list of served paths. * No locking required, as we're only ever called at startup. */ - if (rfs4_dss_pathlist == NULL) { + if (nsrv4->dss_pathlist == NULL) { /* this is the first dss_path_t */ /* needed for insque/remque */ dss_path->next = dss_path->prev = dss_path; - rfs4_dss_pathlist = dss_path; + nsrv4->dss_pathlist = dss_path; } else { - insque(dss_path, rfs4_dss_pathlist); + insque(dss_path, nsrv4->dss_pathlist); } return (dss_path); @@ -723,7 +786,8 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index) * recovery window. */ void -rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) +rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace, + int dss_npaths, char **dss_paths) { unsigned i; rfs4_servinst_t *sip; @@ -754,21 +818,22 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) sizeof (rfs4_dss_path_t *), KM_SLEEP); for (i = 0; i < dss_npaths; i++) { - sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i); + sip->dss_paths[i] = + rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i); } - mutex_enter(&rfs4_servinst_lock); - if (rfs4_cur_servinst != NULL) { + mutex_enter(&nsrv4->servinst_lock); + if (nsrv4->nfs4_cur_servinst != NULL) { /* add to linked list */ - sip->prev = rfs4_cur_servinst; - rfs4_cur_servinst->next = sip; + sip->prev = nsrv4->nfs4_cur_servinst; + nsrv4->nfs4_cur_servinst->next = sip; } if (start_grace) rfs4_grace_start(sip); /* make the new instance "current" */ - rfs4_cur_servinst = sip; + nsrv4->nfs4_cur_servinst = sip; - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -776,31 +841,47 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths) * all instances directly. */ void -rfs4_servinst_destroy_all(void) +rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip, *prev, *current; #ifdef DEBUG int n = 0; #endif - mutex_enter(&rfs4_servinst_lock); - ASSERT(rfs4_cur_servinst != NULL); - current = rfs4_cur_servinst; - rfs4_cur_servinst = NULL; + mutex_enter(&nsrv4->servinst_lock); + ASSERT(nsrv4->nfs4_cur_servinst != NULL); + current = nsrv4->nfs4_cur_servinst; + nsrv4->nfs4_cur_servinst = NULL; for (sip = current; sip != NULL; sip = prev) { prev = sip->prev; rw_destroy(&sip->rwlock); if (sip->oldstate) kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t)); - if (sip->dss_paths) + if (sip->dss_paths) { + int i = sip->dss_npaths; + + while (i > 0) { + i--; + if (sip->dss_paths[i] != NULL) { + char *path = sip->dss_paths[i]->path; + + if (path != NULL) { + kmem_free(path, + strlen(path) + 1); + } + kmem_free(sip->dss_paths[i], + sizeof (rfs4_dss_path_t)); + } + } kmem_free(sip->dss_paths, sip->dss_npaths * sizeof (rfs4_dss_path_t *)); + } kmem_free(sip, sizeof (rfs4_servinst_t)); #ifdef DEBUG n++; #endif } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -808,7 +889,8 @@ rfs4_servinst_destroy_all(void) * Should be called with cp->rc_dbe held. */ void -rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip) +rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp, + rfs4_servinst_t *sip) { ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0); @@ -816,9 +898,9 @@ rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip) * The lock ensures that if the current instance is in the process * of changing, we will see the new one. */ - mutex_enter(&rfs4_servinst_lock); + mutex_enter(&nsrv4->servinst_lock); cp->rc_server_instance = sip; - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } rfs4_servinst_t * @@ -871,7 +953,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) { int error, different_export = 0; vnode_t *dvp, *vp; - struct exportinfo *exi = NULL; + struct exportinfo *exi; fid_t fid; uint_t count, i; secinfo4 *resok_val; @@ -879,8 +961,11 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) seconfig_t *si; bool_t did_traverse = FALSE; int dotdot, walk; + nfs_export_t *ne = nfs_get_export(); dvp = cs->vp; + exi = cs->exi; + ASSERT(exi != NULL); dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0'); /* @@ -888,25 +973,27 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * root of a filesystem, or above an export point. */ if (dotdot) { + vnode_t *zone_rootvp = ne->exi_root->exi_vp; + ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid); /* * If dotdotting at the root of a filesystem, then * need to traverse back to the mounted-on filesystem * and do the dotdot lookup there. */ - if (cs->vp->v_flag & VROOT) { + if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) { /* * If at the system root, then can * go up no further. */ - if (VN_CMP(dvp, rootdir)) + if (VN_CMP(dvp, zone_rootvp)) return (puterrno4(ENOENT)); /* * Traverse back to the mounted-on filesystem */ - dvp = untraverse(cs->vp); + dvp = untraverse(dvp, zone_rootvp); /* * Set the different_export flag so we remember @@ -920,7 +1007,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * If dotdotting above an export point then set * the different_export to get new export info. */ - different_export = nfs_exported(cs->exi, cs->vp); + different_export = nfs_exported(exi, dvp); } } @@ -939,9 +1026,9 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * check whether this vnode is visible. */ if (!different_export && - (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) || + (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) || cs->access & CS_ACCESS_LIMITED)) { - if (! nfs_visible(cs->exi, vp, &different_export)) { + if (! nfs_visible(exi, vp, &different_export)) { VN_RELE(vp); return (puterrno4(ENOENT)); } @@ -983,6 +1070,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) return (puterrno4(error)); } + /* We'll need to reassign "exi". */ if (dotdot) exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE); else @@ -1003,8 +1091,6 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) return (puterrno4(EACCES)); } } - } else { - exi = cs->exi; } ASSERT(exi != NULL); @@ -1017,7 +1103,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp) * For a real export node, return the flavor that the client * has access with. */ - ASSERT(RW_LOCK_HELD(&exported_lock)); + ASSERT(RW_LOCK_HELD(&ne->exported_lock)); if (PSEUDO(exi)) { count = exi->exi_export.ex_seccnt; /* total sec count */ resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP); @@ -1380,6 +1466,7 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, vnode_t *vp = cs->vp; cred_t *cr = cs->cr; vattr_t va; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs, COMMIT4args *, args); @@ -1436,8 +1523,9 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } + nsrv4 = nfs4_get_srv(); *cs->statusp = resp->status = NFS4_OK; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; out: DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs, COMMIT4res *, resp); @@ -2633,25 +2721,28 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs) * export point. */ if (dotdot) { + vnode_t *zone_rootvp; + ASSERT(cs->exi != NULL); + zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp; /* * If dotdotting at the root of a filesystem, then * need to traverse back to the mounted-on filesystem * and do the dotdot lookup there. */ - if (cs->vp->v_flag & VROOT) { + if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) { /* * If at the system root, then can * go up no further. */ - if (VN_CMP(cs->vp, rootdir)) + if (VN_CMP(cs->vp, zone_rootvp)) return (puterrno4(ENOENT)); /* * Traverse back to the mounted-on filesystem */ - cs->vp = untraverse(cs->vp); + cs->vp = untraverse(cs->vp, zone_rootvp); /* * Set the different_export flag so we remember @@ -3409,6 +3500,7 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, vnode_t *vp; struct exportinfo *exi, *sav_exi; nfs_fh4_fmt_t *fh_fmtp; + nfs_export_t *ne = nfs_get_export(); DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs); @@ -3422,19 +3514,19 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, cs->cr = crdup(cs->basecr); - vp = exi_public->exi_vp; + vp = ne->exi_public->exi_vp; if (vp == NULL) { *cs->statusp = resp->status = NFS4ERR_SERVERFAULT; goto out; } - error = makefh4(&cs->fh, vp, exi_public); + error = makefh4(&cs->fh, vp, ne->exi_public); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; } sav_exi = cs->exi; - if (exi_public == exi_root) { + if (ne->exi_public == ne->exi_root) { /* * No filesystem is actually shared public, so we default * to exi_root. In this case, we must check whether root @@ -3449,12 +3541,12 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req, */ exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen, NULL); - cs->exi = ((exi != NULL) ? exi : exi_public); + cs->exi = ((exi != NULL) ? exi : ne->exi_public); } else { /* * it's a properly shared filesystem */ - cs->exi = exi_public; + cs->exi = ne->exi_public; } if (is_system_labeled()) { @@ -3596,7 +3688,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, */ bzero(&fid, sizeof (fid)); fid.fid_len = MAXFIDSZ; - error = vop_fid_pseudo(rootdir, &fid); + error = vop_fid_pseudo(ZONE_ROOTVP(), &fid); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; @@ -3610,7 +3702,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * one or more exports further down in the server's * file tree. */ - exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL); + exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL); if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) { NFS4_DEBUG(rfs4_debug, (CE_WARN, "rfs4_op_putrootfh: export check failure")); @@ -3622,7 +3714,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * Now make a filehandle based on the root * export and root vnode. */ - error = makefh4(&cs->fh, rootdir, exi); + error = makefh4(&cs->fh, ZONE_ROOTVP(), exi); if (error != 0) { *cs->statusp = resp->status = puterrno4(error); goto out; @@ -3631,11 +3723,11 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, sav_exi = cs->exi; cs->exi = exi; - VN_HOLD(rootdir); - cs->vp = rootdir; + VN_HOLD(ZONE_ROOTVP()); + cs->vp = ZONE_ROOTVP(); if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) { - VN_RELE(rootdir); + VN_RELE(cs->vp); cs->vp = NULL; cs->exi = sav_exi; goto out; @@ -3721,10 +3813,12 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, if (is_referral) { char *s; size_t strsz; + kstat_named_t *stat = + cs->exi->exi_ne->ne_globals->svstat[NFS_V4]; /* Get an artificial symlink based on a referral */ s = build_symlink(vp, cs->cr, &strsz); - global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++; + stat[NFS_REFERLINKS].value.ui64++; DTRACE_PROBE2(nfs4serv__func__referral__reflink, vnode_t *, vp, char *, s); if (s == NULL) @@ -4171,7 +4265,7 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to * transmit over the wire. */ - if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr, + if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr, NULL, 0)) == EEXIST) error = ENOTEMPTY; } @@ -4283,7 +4377,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, int error; vnode_t *odvp; vnode_t *ndvp; - vnode_t *srcvp, *targvp; + vnode_t *srcvp, *targvp, *tvp; struct vattr obdva, oidva, oadva; struct vattr nbdva, nidva, nadva; char *onm, *nnm; @@ -4291,6 +4385,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, rfs4_file_t *fp, *sfp; int in_crit_src, in_crit_targ; int fp_rele_grant_hold, sfp_rele_grant_hold; + int unlinked; bslabel_t *clabel; struct sockaddr *ca; char *converted_onm = NULL; @@ -4301,9 +4396,10 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, RENAME4args *, args); fp = sfp = NULL; - srcvp = targvp = NULL; + srcvp = targvp = tvp = NULL; in_crit_src = in_crit_targ = 0; fp_rele_grant_hold = sfp_rele_grant_hold = 0; + unlinked = 0; /* CURRENT_FH: target directory */ ndvp = cs->vp; @@ -4476,7 +4572,6 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, } fp_rele_grant_hold = 1; - /* Check for NBMAND lock on both source and target */ if (nbl_need_check(srcvp)) { nbl_start_crit(srcvp, RW_READER); @@ -4511,11 +4606,15 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime) NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime) - if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, - cs->cr, NULL, 0)) == 0 && fp != NULL) { - struct vattr va; - vnode_t *tvp; + error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr, + NULL, 0); + /* + * If target existed and was unlinked by VOP_RENAME, state will need + * closed. To avoid deadlock, rfs4_close_all_state will be done after + * any necessary nbl_end_crit on srcvp and tgtvp. + */ + if (error == 0 && fp != NULL) { rfs4_dbe_lock(fp->rf_dbe); tvp = fp->rf_vp; if (tvp) @@ -4523,17 +4622,23 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, rfs4_dbe_unlock(fp->rf_dbe); if (tvp) { + struct vattr va; va.va_mask = AT_NLINK; + if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) && va.va_nlink == 0) { - /* The file is gone and so should the state */ - if (in_crit_targ) { - nbl_end_crit(targvp); - in_crit_targ = 0; + unlinked = 1; + + /* DEBUG data */ + if ((srcvp == targvp) || (tvp != targvp)) { + cmn_err(CE_WARN, "rfs4_op_rename: " + "srcvp %p, targvp: %p, tvp: %p", + (void *)srcvp, (void *)targvp, + (void *)tvp); } - rfs4_close_all_state(fp); + } else { + VN_RELE(tvp); } - VN_RELE(tvp); } } if (error == 0) @@ -4548,6 +4653,21 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, if (targvp) VN_RELE(targvp); + if (unlinked) { + ASSERT(fp != NULL); + ASSERT(tvp != NULL); + + /* DEBUG data */ + if (RW_READ_HELD(&tvp->v_nbllock)) { + cmn_err(CE_WARN, "rfs4_op_rename: " + "RW_READ_HELD(%p)", (void *)tvp); + } + + /* The file is gone and so should the state */ + rfs4_close_all_state(fp); + VN_RELE(tvp); + } + if (sfp) { rfs4_clear_dont_grant(sfp); rfs4_file_rele(sfp); @@ -5484,6 +5604,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, nfsstat4 stat; int in_crit = 0; caller_context_t ct; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs, WRITE4args *, args); @@ -5554,11 +5675,12 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, goto out; } + nsrv4 = nfs4_get_srv(); if (args->data_len == 0) { *cs->statusp = resp->status = NFS4_OK; resp->count = 0; resp->committed = args->stable; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; goto out; } @@ -5654,7 +5776,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req, else resp->committed = FILE_SYNC4; - resp->writeverf = Write4verf; + resp->writeverf = nsrv4->write4verf; out: if (in_crit) @@ -5674,18 +5796,24 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, { uint_t i; struct compound_state cs; + nfs4_srv_t *nsrv4; + nfs_export_t *ne = nfs_get_export(); if (rv != NULL) *rv = 0; rfs4_init_compound_state(&cs); /* - * Form a reply tag by copying over the reqeuest tag. + * Form a reply tag by copying over the request tag. */ - resp->tag.utf8string_val = - kmem_alloc(args->tag.utf8string_len, KM_SLEEP); resp->tag.utf8string_len = args->tag.utf8string_len; - bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, - resp->tag.utf8string_len); + if (args->tag.utf8string_len != 0) { + resp->tag.utf8string_val = + kmem_alloc(args->tag.utf8string_len, KM_SLEEP); + bcopy(args->tag.utf8string_val, resp->tag.utf8string_val, + resp->tag.utf8string_len); + } else { + resp->tag.utf8string_val = NULL; + } cs.statusp = &resp->status; cs.req = req; @@ -5731,6 +5859,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, KM_SLEEP); cs.basecr = cr; + nsrv4 = nfs4_get_srv(); DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs, COMPOUND4args *, args); @@ -5745,26 +5874,27 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, * ops along with unexport. This lock will be removed as * part of the NFSv4 phase 2 namespace redesign work. */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); /* * If this is the first compound we've seen, we need to start all * new instances' grace periods. */ - if (rfs4_seen_first_compound == 0) { - rfs4_grace_start_new(); + if (nsrv4->seen_first_compound == 0) { + rfs4_grace_start_new(nsrv4); /* * This must be set after rfs4_grace_start_new(), otherwise * another thread could proceed past here before the former * is finished. */ - rfs4_seen_first_compound = 1; + nsrv4->seen_first_compound = 1; } for (i = 0; i < args->array_len && cs.cont; i++) { nfs_argop4 *argop; nfs_resop4 *resop; uint_t op; + kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4]; argop = &args->array[i]; resop = &resp->array[i]; @@ -5776,7 +5906,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, * Count the individual ops here; NULL and COMPOUND * are counted in common_dispatch() */ - rfsproccnt_v4_ptr[op].value.ui64++; + stat[op].value.ui64++; NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "Executing %s", rfs4_op_string[op])); @@ -5793,7 +5923,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, * day when XDR code doesn't verify v4 opcodes. */ op = OP_ILLEGAL; - rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++; + stat[OP_ILLEGAL_IDX].value.ui64++; rfs4_op_illegal(argop, resop, req, &cs); cs.cont = FALSE; @@ -5816,15 +5946,22 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi, } } - rw_exit(&exported_lock); - - DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs, - COMPOUND4res *, resp); + rw_exit(&ne->exported_lock); + /* + * clear exportinfo and vnode fields from compound_state before dtrace + * probe, to avoid tracing residual values for path and share path. + */ if (cs.vp) VN_RELE(cs.vp); if (cs.saved_vp) VN_RELE(cs.saved_vp); + cs.exi = cs.saved_exi = NULL; + cs.vp = cs.saved_vp = NULL; + + DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs, + COMPOUND4res *, resp); + if (cs.saved_fh.nfs_fh4_val) kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE); @@ -6528,25 +6665,27 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs, if (trunc) { int in_crit = 0; rfs4_file_t *fp; + nfs4_srv_t *nsrv4; bool_t create = FALSE; /* * We are writing over an existing file. * Check to see if we need to recall a delegation. */ - rfs4_hold_deleg_policy(); + nsrv4 = nfs4_get_srv(); + rfs4_hold_deleg_policy(nsrv4); if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) { if (rfs4_check_delegated_byfp(FWRITE, fp, (reqsize == 0), FALSE, FALSE, &clientid)) { rfs4_file_rele(fp); - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); VN_RELE(vp); *attrset = 0; return (NFS4ERR_DELAY); } rfs4_file_rele(fp); } - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); if (nbl_need_check(vp)) { in_crit = 1; @@ -8104,11 +8243,13 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, SETCLIENTID_CONFIRM4res *res = &resop->nfs_resop4_u.opsetclientid_confirm; rfs4_client_t *cp, *cptoclose = NULL; + nfs4_srv_t *nsrv4; DTRACE_NFSV4_2(op__setclientid__confirm__start, struct compound_state *, cs, SETCLIENTID_CONFIRM4args *, args); + nsrv4 = nfs4_get_srv(); *cs->statusp = res->status = NFS4_OK; cp = rfs4_findclient_by_id(args->clientid, TRUE); @@ -8144,14 +8285,14 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, * Update the client's associated server instance, if it's changed * since the client was created. */ - if (rfs4_servinst(cp) != rfs4_cur_servinst) - rfs4_servinst_assign(cp, rfs4_cur_servinst); + if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst) + rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); /* * Record clientid in stable storage. * Must be done after server instance has been assigned. */ - rfs4_ss_clid(cp); + rfs4_ss_clid(nsrv4, cp); rfs4_dbe_unlock(cp->rc_dbe); @@ -8166,7 +8307,7 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop, /* * Check to see if client can perform reclaims */ - rfs4_ss_chkclid(cp); + rfs4_ss_chkclid(nsrv4, cp); rfs4_client_rele(cp); @@ -9811,3 +9952,167 @@ client_is_downrev(struct svc_req *req) rfs4_dbe_rele(ci->ri_dbe); return (is_downrev); } + +/* + * Do the main work of handling HA-NFSv4 Resource Group failover on + * Sun Cluster. + * We need to detect whether any RG admin paths have been added or removed, + * and adjust resources accordingly. + * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In + * order to scale, the list and array of paths need to be held in more + * suitable data structures. + */ +static void +hanfsv4_failover(nfs4_srv_t *nsrv4) +{ + int i, start_grace, numadded_paths = 0; + char **added_paths = NULL; + rfs4_dss_path_t *dss_path; + + /* + * Note: currently, dss_pathlist cannot be NULL, since + * it will always include an entry for NFS4_DSS_VAR_DIR. If we + * make the latter dynamically specified too, the following will + * need to be adjusted. + */ + + /* + * First, look for removed paths: RGs that have been failed-over + * away from this node. + * Walk the "currently-serving" dss_pathlist and, for each + * path, check if it is on the "passed-in" rfs4_dss_newpaths array + * from nfsd. If not, that RG path has been removed. + * + * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed + * any duplicates. + */ + dss_path = nsrv4->dss_pathlist; + do { + int found = 0; + char *path = dss_path->path; + + /* used only for non-HA so may not be removed */ + if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { + dss_path = dss_path->next; + continue; + } + + for (i = 0; i < rfs4_dss_numnewpaths; i++) { + int cmpret; + char *newpath = rfs4_dss_newpaths[i]; + + /* + * Since nfsd has sorted rfs4_dss_newpaths for us, + * once the return from strcmp is negative we know + * we've passed the point where "path" should be, + * and can stop searching: "path" has been removed. + */ + cmpret = strcmp(path, newpath); + if (cmpret < 0) + break; + if (cmpret == 0) { + found = 1; + break; + } + } + + if (found == 0) { + unsigned index = dss_path->index; + rfs4_servinst_t *sip = dss_path->sip; + rfs4_dss_path_t *path_next = dss_path->next; + + /* + * This path has been removed. + * We must clear out the servinst reference to + * it, since it's now owned by another + * node: we should not attempt to touch it. + */ + ASSERT(dss_path == sip->dss_paths[index]); + sip->dss_paths[index] = NULL; + + /* remove from "currently-serving" list, and destroy */ + remque(dss_path); + /* allow for NUL */ + kmem_free(dss_path->path, strlen(dss_path->path) + 1); + kmem_free(dss_path, sizeof (rfs4_dss_path_t)); + + dss_path = path_next; + } else { + /* path was found; not removed */ + dss_path = dss_path->next; + } + } while (dss_path != nsrv4->dss_pathlist); + + /* + * Now, look for added paths: RGs that have been failed-over + * to this node. + * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, + * for each path, check if it is on the "currently-serving" + * dss_pathlist. If not, that RG path has been added. + * + * Note: we don't do duplicate detection here; nfsd does that for us. + * + * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us + * an upper bound for the size needed for added_paths[numadded_paths]. + */ + + /* probably more space than we need, but guaranteed to be enough */ + if (rfs4_dss_numnewpaths > 0) { + size_t sz = rfs4_dss_numnewpaths * sizeof (char *); + added_paths = kmem_zalloc(sz, KM_SLEEP); + } + + /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ + for (i = 0; i < rfs4_dss_numnewpaths; i++) { + int found = 0; + char *newpath = rfs4_dss_newpaths[i]; + + dss_path = nsrv4->dss_pathlist; + do { + char *path = dss_path->path; + + /* used only for non-HA */ + if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { + dss_path = dss_path->next; + continue; + } + + if (strncmp(path, newpath, strlen(path)) == 0) { + found = 1; + break; + } + + dss_path = dss_path->next; + } while (dss_path != nsrv4->dss_pathlist); + + if (found == 0) { + added_paths[numadded_paths] = newpath; + numadded_paths++; + } + } + + /* did we find any added paths? */ + if (numadded_paths > 0) { + + /* create a new server instance, and start its grace period */ + start_grace = 1; + /* CSTYLED */ + rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths); + + /* read in the stable storage state from these paths */ + rfs4_dss_readstate(nsrv4, numadded_paths, added_paths); + + /* + * Multiple failovers during a grace period will cause + * clients of the same resource group to be partitioned + * into different server instances, with different + * grace periods. Since clients of the same resource + * group must be subject to the same grace period, + * we need to reset all currently active grace periods. + */ + rfs4_grace_reset_all(nsrv4); + } + + if (rfs4_dss_numnewpaths > 0) + kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); +} diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c index 7240faa356..a9ee217a8b 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c @@ -18,12 +18,14 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* - * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. */ #include <sys/systm.h> @@ -133,6 +135,11 @@ rfs4_attr_init() struct statvfs64 sb; rfs4_init_compound_state(&cs); + /* + * This is global state checking, called once. We might be in + * non-global-zone context here (say a modload happens from a zone + * process) so in this case, we want the global-zone root vnode. + */ cs.vp = rootvp; cs.fh.nfs_fh4_val = NULL; cs.cr = kcred; @@ -1301,22 +1308,29 @@ rfs4_get_mntdfileid(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg) vp = sarg->cs->vp; sarg->mntdfid_set = FALSE; - /* VROOT object, must untraverse */ - if (vp->v_flag & VROOT) { + /* + * VROOT object or zone's root, must untraverse. + * + * NOTE: Not doing reality checks on curzone vs. compound + * state vnode because it will mismatch once at initialization + * if a non-global-zone triggers the module load, BUT in that case + * the vp is literally "/" which has VROOT set. + */ + if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) { /* extra hold for vp since untraverse might rele */ VN_HOLD(vp); - stubvp = untraverse(vp); + stubvp = untraverse(vp, ZONE_ROOTVP()); /* - * If vp/stubvp are same, we must be at system + * If vp/stubvp are same, we must be at system-or-zone * root because untraverse returned same vp * for a VROOT object. sarg->vap was setup * before we got here, so there's no need to do * another getattr -- just use the one in sarg. */ if (VN_CMP(vp, stubvp)) { - ASSERT(VN_CMP(vp, rootdir)); + ASSERT(VN_IS_CURZONEROOT(vp)); vap = sarg->vap; } else { va.va_mask = AT_NODEID; @@ -1375,10 +1389,10 @@ rfs4_fattr4_mounted_on_fileid(nfs4_attr_cmd_t cmd, break; /* this attr is supported */ case NFS4ATTR_GETIT: case NFS4ATTR_VERIT: - if (! sarg->mntdfid_set) + if (!sarg->mntdfid_set) error = rfs4_get_mntdfileid(cmd, sarg); - if (! error && sarg->mntdfid_set) { + if (!error && sarg->mntdfid_set) { if (cmd == NFS4ATTR_GETIT) na->mounted_on_fileid = sarg->mounted_on_fileid; else @@ -1595,6 +1609,10 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, break; /* this attr is supported */ case NFS4ATTR_GETIT: + { + kstat_named_t *stat = + sarg->cs->exi->exi_ne->ne_globals->svstat[NFS_V4]; + fsl = fetch_referral(sarg->cs->vp, sarg->cs->cr); if (fsl == NULL) (void) memset(&(na->fs_locations), 0, @@ -1603,9 +1621,9 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg, na->fs_locations = *fsl; kmem_free(fsl, sizeof (fs_locations4)); } - global_svstat_ptr[4][NFS_REFERRALS].value.ui64++; + stat[NFS_REFERRALS].value.ui64++; break; - + } case NFS4ATTR_FREEIT: if (sarg->op == NFS4ATTR_SETIT || sarg->op == NFS4ATTR_VERIT) error = EINVAL; diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c index bb3f1bdd95..ea50f18d77 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c @@ -22,7 +22,10 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. */ #include <sys/systm.h> @@ -48,10 +51,7 @@ #define MAX_READ_DELEGATIONS 5 -krwlock_t rfs4_deleg_policy_lock; -srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE; static int rfs4_deleg_wlp = 5; -kmutex_t rfs4_deleg_lock; static int rfs4_deleg_disabled; static int rfs4_max_setup_cb_tries = 5; @@ -138,23 +138,30 @@ uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp) * value of "new_policy" */ void -rfs4_set_deleg_policy(srv_deleg_policy_t new_policy) +rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy) { - rw_enter(&rfs4_deleg_policy_lock, RW_WRITER); - rfs4_deleg_policy = new_policy; - rw_exit(&rfs4_deleg_policy_lock); + rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER); + nsrv4->nfs4_deleg_policy = new_policy; + rw_exit(&nsrv4->deleg_policy_lock); } void -rfs4_hold_deleg_policy(void) +rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4) { - rw_enter(&rfs4_deleg_policy_lock, RW_READER); + rw_enter(&nsrv4->deleg_policy_lock, RW_READER); } void -rfs4_rele_deleg_policy(void) +rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4) { - rw_exit(&rfs4_deleg_policy_lock); + rw_exit(&nsrv4->deleg_policy_lock); +} + +srv_deleg_policy_t +nfs4_get_deleg_policy() +{ + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + return (nsrv4->nfs4_deleg_policy); } @@ -210,7 +217,7 @@ rfs4_do_cb_null(rfs4_client_t *cp) if (cbp->cb_nullcaller == TRUE) { mutex_exit(cbp->cb_lock); rfs4_client_rele(cp); - return; + zthread_exit(); } /* Mark the cbinfo as having a thread in the NULL callback */ @@ -278,7 +285,7 @@ retry: cbp->cb_nullcaller = FALSE; mutex_exit(cbp->cb_lock); rfs4_client_rele(cp); - return; + zthread_exit(); } /* mark rfs4_client_t as CALLBACK NULL in progress */ @@ -320,8 +327,8 @@ retry: cv_broadcast(cbp->cb_cv); /* start up the other threads */ cbp->cb_nullcaller = FALSE; mutex_exit(cbp->cb_lock); - rfs4_client_rele(cp); + zthread_exit(); } /* @@ -687,7 +694,7 @@ rfs4_deleg_cb_check(rfs4_client_t *cp) rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */ - (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0, minclsyspri); } @@ -948,8 +955,8 @@ do_recall(struct recall_arg *arg) mutex_destroy(&cpr_lock); rfs4_deleg_state_rele(dsp); /* release the hold for this thread */ - kmem_free(arg, sizeof (struct recall_arg)); + zthread_exit(); } struct master_recall_args { @@ -977,7 +984,7 @@ do_recall_file(struct master_recall_args *map) rfs4_dbe_rele_nolock(fp->rf_dbe); rfs4_dbe_unlock(fp->rf_dbe); kmem_free(map, sizeof (struct master_recall_args)); - return; + zthread_exit(); } mutex_exit(fp->rf_dinfo.rd_recall_lock); @@ -1010,7 +1017,7 @@ do_recall_file(struct master_recall_args *map) recall_count++; - (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, do_recall, arg, 0, minclsyspri); } @@ -1035,6 +1042,7 @@ do_recall_file(struct master_recall_args *map) mutex_enter(&cpr_lock); CALLB_CPR_EXIT(&cpr_info); mutex_destroy(&cpr_lock); + zthread_exit(); } static void @@ -1070,7 +1078,7 @@ rfs4_recall_file(rfs4_file_t *fp, args->recall = recall; args->trunc = trunc; - (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN, + (void) zthread_create(NULL, 0, do_recall_file, args, 0, minclsyspri); } @@ -1206,12 +1214,12 @@ rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp) * determine the actual delegation type to return. */ static open_delegation_type4 -rfs4_delegation_policy(open_delegation_type4 dtype, +rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype, rfs4_dinfo_t *dinfo, clientid4 cid) { time_t elapsed; - if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE) + if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE) return (OPEN_DELEGATE_NONE); /* @@ -1254,6 +1262,7 @@ rfs4_delegation_policy(open_delegation_type4 dtype, rfs4_deleg_state_t * rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) { + nfs4_srv_t *nsrv4; rfs4_file_t *fp = sp->rs_finfo; open_delegation_type4 dtype; int no_delegation; @@ -1261,14 +1270,18 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) ASSERT(rfs4_dbe_islocked(sp->rs_dbe)); ASSERT(rfs4_dbe_islocked(fp->rf_dbe)); + nsrv4 = nfs4_get_srv(); + /* Is the server even providing delegations? */ - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE) + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE || + dreq == DELEG_NONE) { return (NULL); + } /* Check to see if delegations have been temporarily disabled */ - mutex_enter(&rfs4_deleg_lock); + mutex_enter(&nsrv4->deleg_lock); no_delegation = rfs4_deleg_disabled; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); if (no_delegation) return (NULL); @@ -1349,7 +1362,7 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall) * Based on policy and the history of the file get the * actual delegation. */ - dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo, + dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo, sp->rs_owner->ro_client->rc_clientid); if (dtype == OPEN_DELEGATE_NONE) @@ -1438,8 +1451,10 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, { rfs4_deleg_state_t *dsp; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + /* Is delegation enabled? */ - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) return (FALSE); /* do we have a delegation on this file? */ @@ -1504,14 +1519,16 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp, bool_t rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) { + nfs4_srv_t *nsrv4; rfs4_file_t *fp; bool_t create = FALSE; bool_t rc = FALSE; - rfs4_hold_deleg_policy(); + nsrv4 = nfs4_get_srv(); + rfs4_hold_deleg_policy(nsrv4); /* Is delegation enabled? */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) { + if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) { fp = rfs4_findfile(vp, NULL, &create); if (fp != NULL) { if (rfs4_check_delegated_byfp(mode, fp, trunc, @@ -1521,7 +1538,7 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) rfs4_file_rele(fp); } } - rfs4_rele_deleg_policy(); + rfs4_rele_deleg_policy(nsrv4); return (rc); } @@ -1533,7 +1550,9 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc) void rfs4_clear_dont_grant(rfs4_file_t *fp) { - if (rfs4_deleg_policy == SRV_NEVER_DELEGATE) + nfs4_srv_t *nsrv4 = nfs4_get_srv(); + + if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE) return; rfs4_dbe_lock(fp->rf_dbe); ASSERT(fp->rf_dinfo.rd_hold_grant > 0); @@ -1869,18 +1888,24 @@ rfs4_is_deleg(rfs4_state_t *sp) void rfs4_disable_delegation(void) { - mutex_enter(&rfs4_deleg_lock); + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); + mutex_enter(&nsrv4->deleg_lock); rfs4_deleg_disabled++; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); } void rfs4_enable_delegation(void) { - mutex_enter(&rfs4_deleg_lock); + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); + mutex_enter(&nsrv4->deleg_lock); ASSERT(rfs4_deleg_disabled > 0); rfs4_deleg_disabled--; - mutex_exit(&rfs4_deleg_lock); + mutex_exit(&nsrv4->deleg_lock); } void diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c index 4ad799be46..920ebeca53 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c +++ b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c @@ -20,8 +20,11 @@ */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. * Copyright (c) 2015, Joyent, Inc. */ @@ -139,12 +142,12 @@ nfs4_vget_pseudo(struct exportinfo *exi, vnode_t **vpp, fid_t *fidp) * * A visible list has a per-file-system scope. Any exportinfo * struct (real or pseudo) can have a visible list as long as - * a) its export root is VROOT + * a) its export root is VROOT, or is the zone's root for in-zone NFS service * b) a descendant of the export root is shared */ struct exportinfo * -pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, - struct exportdata *exdata) +pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid, + struct exp_visible *vis_head, struct exportdata *exdata) { struct exportinfo *exi; struct exportdata *kex; @@ -152,7 +155,7 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, int vpathlen; int i; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); fsid = vp->v_vfsp->vfs_fsid; exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); @@ -162,6 +165,7 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, VN_HOLD(exi->exi_vp); exi->exi_visible = vis_head; exi->exi_count = 1; + exi->exi_zoneid = ne->ne_globals->nfs_zoneid; exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag & VSW_VOLATILEDEV) ? 1 : 0; mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL); @@ -205,7 +209,15 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head, /* * Insert the new entry at the front of the export list */ - export_link(exi); + export_link(ne, exi); + + /* + * Initialize exi_id and exi_kstats + */ + mutex_enter(&nfs_exi_id_lock); + exi->exi_id = exi_id_get_next(); + avl_add(&exi_id_tree, exi); + mutex_exit(&nfs_exi_id_lock); return (exi); } @@ -281,14 +293,14 @@ tree_prepend_node(treenode_t *n, exp_visible_t *v, exportinfo_t *e) * they should be already freed. */ static void -tree_remove_node(treenode_t *node) +tree_remove_node(nfs_export_t *ne, treenode_t *node) { treenode_t *parent = node->tree_parent; treenode_t *s; /* s for sibling */ if (parent == NULL) { kmem_free(node, sizeof (*node)); - ns_root = NULL; + ne->ns_root = NULL; return; } /* This node is first child */ @@ -437,6 +449,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) struct exp_visible *vp1, *vp2, *vis_head, *tail, *next; int found; treenode_t *child, *curr, *connect_point; + nfs_export_t *ne = nfs_get_export(); vis_head = tree_head->tree_vis; connect_point = exi->exi_tree; @@ -450,7 +463,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) exi->exi_visible = vis_head; /* Update the change timestamp */ - tree_update_change(connect_point, &vis_head->vis_change); + tree_update_change(ne, connect_point, &vis_head->vis_change); return; } @@ -510,7 +523,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head) tree_add_child(connect_point, curr); /* Update the change timestamp */ - tree_update_change(connect_point, + tree_update_change(ne, connect_point, &curr->tree_vis->vis_change); connect_point = NULL; @@ -627,8 +640,11 @@ treeclimb_export(struct exportinfo *exip) struct vattr va; treenode_t *tree_head = NULL; timespec_t now; + nfs_export_t *ne; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ne = exip->exi_ne; + ASSERT3P(ne, ==, nfs_get_export()); /* curzone reality check */ + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); gethrestime(&now); @@ -644,11 +660,14 @@ treeclimb_export(struct exportinfo *exip) if (error) break; + /* XXX KEBE ASKS DO WE NEED THIS?!? */ + ASSERT3U(exip->exi_zoneid, ==, curzone->zone_id); /* - * The root of the file system needs special handling + * The root of the file system, or the zone's root for + * in-zone NFS service needs special handling */ - if (vp->v_flag & VROOT) { - if (! exportdir) { + if (vp->v_flag & VROOT || vp == EXI_TO_ZONEROOTVP(exip)) { + if (!exportdir) { struct exportinfo *exi; /* @@ -677,23 +696,23 @@ treeclimb_export(struct exportinfo *exip) * this as a pseudo export so that an NFS v4 * client can do lookups in it. */ - new_exi = pseudo_exportfs(vp, &fid, vis_head, - NULL); + new_exi = pseudo_exportfs(ne, vp, &fid, + vis_head, NULL); vis_head = NULL; } - if (VN_CMP(vp, rootdir)) { + if (VN_IS_CURZONEROOT(vp)) { /* at system root */ /* * If sharing "/", new_exi is shared exportinfo * (exip). Otherwise, new_exi is exportinfo * created by pseudo_exportfs() above. */ - ns_root = tree_prepend_node(tree_head, NULL, + ne->ns_root = tree_prepend_node(tree_head, NULL, new_exi); /* Update the change timestamp */ - tree_update_change(ns_root, &now); + tree_update_change(ne, ne->ns_root, &now); break; } @@ -702,7 +721,7 @@ treeclimb_export(struct exportinfo *exip) * Traverse across the mountpoint and continue the * climb on the mounted-on filesystem. */ - vp = untraverse(vp); + vp = untraverse(vp, ne->exi_root->exi_vp); exportdir = 0; continue; } @@ -788,7 +807,10 @@ treeclimb_export(struct exportinfo *exip) exportinfo_t *e = tree_head->tree_exi; /* exip will be freed in exportfs() */ if (e && e != exip) { - export_unlink(e); + mutex_enter(&nfs_exi_id_lock); + avl_remove(&exi_id_tree, e); + mutex_exit(&nfs_exi_id_lock); + export_unlink(ne, e); exi_rele(e); } tree_head = tree_head->tree_child_first; @@ -809,17 +831,30 @@ treeclimb_export(struct exportinfo *exip) * node was a leaf node. * Deleting of nodes will finish when we reach a node which * has children or is a real export, then we might still need - * to continue releasing visibles, until we reach VROOT node. + * to continue releasing visibles, until we reach VROOT or zone's root node. */ void -treeclimb_unexport(struct exportinfo *exip) +treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip) { treenode_t *tnode, *old_nd; treenode_t *connect_point = NULL; - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); + ASSERT(curzone->zone_id == exip->exi_zoneid || + curzone->zone_id == global_zone->zone_id); + /* + * exi_tree can be null for the zone root + * which means we're already at the "top" + * and there's nothing more to "climb". + */ tnode = exip->exi_tree; + if (tnode == NULL) { + /* Should only happen for... */ + ASSERT(exip == ne->exi_root); + return; + } + /* * The unshared exportinfo was unlinked in unexport(). * Zeroing tree_exi ensures that we will skip it. @@ -831,7 +866,10 @@ treeclimb_unexport(struct exportinfo *exip) while (tnode != NULL) { - /* Stop at VROOT node which is exported or has child */ + /* + * Stop at VROOT (or zone root) node which is exported or has + * child. + */ if (TREE_ROOT(tnode) && (TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL)) break; @@ -839,8 +877,12 @@ treeclimb_unexport(struct exportinfo *exip) /* Release pseudo export if it has no child */ if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) && tnode->tree_child_first == NULL) { - export_unlink(tnode->tree_exi); + mutex_enter(&nfs_exi_id_lock); + avl_remove(&exi_id_tree, tnode->tree_exi); + mutex_exit(&nfs_exi_id_lock); + export_unlink(ne, tnode->tree_exi); exi_rele(tnode->tree_exi); + tnode->tree_exi = NULL; } /* Release visible in parent's exportinfo */ @@ -854,14 +896,14 @@ treeclimb_unexport(struct exportinfo *exip) /* Remove itself, if this is a leaf and non-exported node */ if (old_nd->tree_child_first == NULL && !TREE_EXPORTED(old_nd)) { - tree_remove_node(old_nd); + tree_remove_node(ne, old_nd); connect_point = tnode; } } /* Update the change timestamp */ if (connect_point != NULL) - tree_update_change(connect_point, NULL); + tree_update_change(ne, connect_point, NULL); } /* @@ -870,13 +912,13 @@ treeclimb_unexport(struct exportinfo *exip) * vnode. */ vnode_t * -untraverse(vnode_t *vp) +untraverse(vnode_t *vp, vnode_t *zone_rootvp) { vnode_t *tvp, *nextvp; tvp = vp; for (;;) { - if (! (tvp->v_flag & VROOT)) + if (!(tvp->v_flag & VROOT) && !VN_CMP(tvp, zone_rootvp)) break; /* lock vfs to prevent unmount of this vfs */ @@ -907,7 +949,7 @@ untraverse(vnode_t *vp) /* * Given an exportinfo, climb up to find the exportinfo for the VROOT - * of the filesystem. + * (or zone root) of the filesystem. * * e.g. / * | @@ -924,7 +966,7 @@ untraverse(vnode_t *vp) * * If d is shared, then c will be put into a's visible list. * Note: visible list is per filesystem and is attached to the - * VROOT exportinfo. + * VROOT exportinfo. Returned exi does NOT have a new hold. */ struct exportinfo * get_root_export(struct exportinfo *exip) @@ -956,12 +998,15 @@ has_visible(struct exportinfo *exi, vnode_t *vp) vp_is_exported = VN_CMP(vp, exi->exi_vp); /* - * An exported root vnode has a sub-dir shared if it has a visible list. - * i.e. if it does not have a visible list, then there is no node in - * this filesystem leads to any other shared node. + * An exported root vnode has a sub-dir shared if it has a visible + * list. i.e. if it does not have a visible list, then there is no + * node in this filesystem leads to any other shared node. */ - if (vp_is_exported && (vp->v_flag & VROOT)) + ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid); + if (vp_is_exported && + ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp))) { return (exi->exi_visible ? 1 : 0); + } /* * Only the exportinfo of a fs root node may have a visible list. @@ -1034,7 +1079,7 @@ nfs_visible(struct exportinfo *exi, vnode_t *vp, int *expseudo) * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ - if (! PSEUDO(exi)) + if (!PSEUDO(exi)) exi = get_root_export(exi); /* Get the fid of the vnode */ @@ -1142,7 +1187,7 @@ nfs_visible_inode(struct exportinfo *exi, ino64_t ino, * Only a PSEUDO node has a visible list or an exported VROOT * node may have a visible list. */ - if (! PSEUDO(exi)) + if (!PSEUDO(exi)) exi = get_root_export(exi); for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next) @@ -1154,14 +1199,6 @@ nfs_visible_inode(struct exportinfo *exi, ino64_t ino, } /* - * The change attribute value of the root of nfs pseudo namespace. - * - * The ns_root_change is protected by exported_lock because all of the treenode - * operations are protected by exported_lock too. - */ -static timespec_t ns_root_change; - -/* * Get the change attribute from visible and returns TRUE. * If the change value is not available returns FALSE. */ @@ -1171,6 +1208,7 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) struct exp_visible *visp; fid_t fid; treenode_t *node; + nfs_export_t *ne = nfs_get_export(); /* * First check to see if vp is export root. @@ -1215,14 +1253,13 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change) exproot: /* The VROOT export have its visible available through treenode */ node = exi->exi_tree; - if (node != ns_root) { + if (node != ne->ns_root) { ASSERT(node->tree_vis != NULL); *change = node->tree_vis->vis_change; } else { ASSERT(node->tree_vis == NULL); - *change = ns_root_change; + *change = ne->ns_root_change; } - return (TRUE); } @@ -1234,15 +1271,15 @@ exproot: * If the change value is not supplied, the current time is used. */ void -tree_update_change(treenode_t *tnode, timespec_t *change) +tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change) { timespec_t *vis_change; ASSERT(tnode != NULL); - ASSERT((tnode != ns_root && tnode->tree_vis != NULL) || - (tnode == ns_root && tnode->tree_vis == NULL)); + ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) || + (tnode == ne->ns_root && tnode->tree_vis == NULL)); - vis_change = tnode == ns_root ? &ns_root_change + vis_change = tnode == ne->ns_root ? &ne->ns_root_change : &tnode->tree_vis->vis_change; if (change != NULL) diff --git a/usr/src/uts/common/fs/nfs/nfs4_state.c b/usr/src/uts/common/fs/nfs/nfs4_state.c index 47941454bc..0c1efb26df 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_state.c +++ b/usr/src/uts/common/fs/nfs/nfs4_state.c @@ -18,9 +18,14 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2019 Nexenta by DDN, Inc. */ #include <sys/systm.h> @@ -41,7 +46,6 @@ extern u_longlong_t nfs4_srv_caller_id; -extern time_t rfs4_start_time; extern uint_t nfs4_srv_vkey; stateid4 special0 = { @@ -72,7 +76,8 @@ int rfs4_debug; static uint32_t rfs4_database_debug = 0x00; -static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf); +/* CSTYLED */ +static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf); static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf); static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip); static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip); @@ -121,11 +126,6 @@ rfs4_sw_exit(rfs4_state_wait_t *swp) mutex_exit(swp->sw_cv_lock); } -/* - * CPR callback id -- not related to v4 callbacks - */ -static callb_id_t cpr_id = 0; - static void deep_lock_copy(LOCK4res *dres, LOCK4res *sres) { @@ -138,6 +138,11 @@ deep_lock_copy(LOCK4res *dres, LOCK4res *sres) } } +/* + * CPR callback id -- not related to v4 callbacks + */ +static callb_id_t cpr_id = 0; + static void deep_lock_free(LOCK4res *res) { @@ -273,41 +278,6 @@ rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src) #define ADDRHASH(key) ((unsigned long)(key) >> 3) -/* Used to serialize create/destroy of rfs4_server_state database */ -kmutex_t rfs4_state_lock; -static rfs4_database_t *rfs4_server_state = NULL; - -/* Used to serialize lookups of clientids */ -static krwlock_t rfs4_findclient_lock; - -/* - * For now this "table" is exposed so that the CPR callback - * function can tromp through it.. - */ -rfs4_table_t *rfs4_client_tab; - -static rfs4_index_t *rfs4_clientid_idx; -static rfs4_index_t *rfs4_nfsclnt_idx; -static rfs4_table_t *rfs4_clntip_tab; -static rfs4_index_t *rfs4_clntip_idx; -static rfs4_table_t *rfs4_openowner_tab; -static rfs4_index_t *rfs4_openowner_idx; -static rfs4_table_t *rfs4_state_tab; -static rfs4_index_t *rfs4_state_idx; -static rfs4_index_t *rfs4_state_owner_file_idx; -static rfs4_index_t *rfs4_state_file_idx; -static rfs4_table_t *rfs4_lo_state_tab; -static rfs4_index_t *rfs4_lo_state_idx; -static rfs4_index_t *rfs4_lo_state_owner_idx; -static rfs4_table_t *rfs4_lockowner_tab; -static rfs4_index_t *rfs4_lockowner_idx; -static rfs4_index_t *rfs4_lockowner_pid_idx; -static rfs4_table_t *rfs4_file_tab; -static rfs4_index_t *rfs4_file_idx; -static rfs4_table_t *rfs4_deleg_state_tab; -static rfs4_index_t *rfs4_deleg_idx; -static rfs4_index_t *rfs4_deleg_state_idx; - #define MAXTABSZ 1024*1024 /* The values below are rfs4_lease_time units */ @@ -330,16 +300,25 @@ static rfs4_index_t *rfs4_deleg_state_idx; #define DELEG_STATE_CACHE_TIME 1 #endif +/* + * NFSv4 server state databases + * + * Initilized when the module is loaded and used by NFSv4 state tables. + * These kmem_cache databases are global, the tables that make use of these + * are per zone. + */ +kmem_cache_t *rfs4_client_mem_cache; +kmem_cache_t *rfs4_clntIP_mem_cache; +kmem_cache_t *rfs4_openown_mem_cache; +kmem_cache_t *rfs4_openstID_mem_cache; +kmem_cache_t *rfs4_lockstID_mem_cache; +kmem_cache_t *rfs4_lockown_mem_cache; +kmem_cache_t *rfs4_file_mem_cache; +kmem_cache_t *rfs4_delegstID_mem_cache; -static time_t rfs4_client_cache_time = 0; -static time_t rfs4_clntip_cache_time = 0; -static time_t rfs4_openowner_cache_time = 0; -static time_t rfs4_state_cache_time = 0; -static time_t rfs4_lo_state_cache_time = 0; -static time_t rfs4_lockowner_cache_time = 0; -static time_t rfs4_file_cache_time = 0; -static time_t rfs4_deleg_state_cache_time = 0; - +/* + * NFSv4 state table functions + */ static bool_t rfs4_client_create(rfs4_entry_t, void *); static void rfs4_dss_remove_cpleaf(rfs4_client_t *); static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *); @@ -424,7 +403,7 @@ static rfs4_ss_pn_t * rfs4_ss_pnalloc(char *dir, char *leaf) { rfs4_ss_pn_t *ss_pn; - int dir_len, leaf_len; + int dir_len, leaf_len; /* * validate we have a resonable path @@ -705,29 +684,29 @@ out: } static void -rfs4_ss_init(void) +rfs4_ss_init(nfs4_srv_t *nsrv4) { int npaths = 1; char *default_dss_path = NFS4_DSS_VAR_DIR; /* read the default stable storage state */ - rfs4_dss_readstate(npaths, &default_dss_path); + rfs4_dss_readstate(nsrv4, npaths, &default_dss_path); rfs4_ss_enabled = 1; } static void -rfs4_ss_fini(void) +rfs4_ss_fini(nfs4_srv_t *nsrv4) { rfs4_servinst_t *sip; - mutex_enter(&rfs4_servinst_lock); - sip = rfs4_cur_servinst; + mutex_enter(&nsrv4->servinst_lock); + sip = nsrv4->nfs4_cur_servinst; while (sip != NULL) { rfs4_dss_clear_oldstate(sip); sip = sip->next; } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -771,7 +750,7 @@ rfs4_dss_clear_oldstate(rfs4_servinst_t *sip) * Form the state and oldstate paths, and read in the stable storage files. */ void -rfs4_dss_readstate(int npaths, char **paths) +rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths) { int i; char *state, *oldstate; @@ -795,8 +774,10 @@ rfs4_dss_readstate(int npaths, char **paths) * and move the latter's contents to old state * directory. */ - rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL); - rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate); + /* CSTYLED */ + rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL); + /* CSTYLED */ + rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate); } kmem_free(state, MAXPATHLEN); @@ -809,7 +790,7 @@ rfs4_dss_readstate(int npaths, char **paths) * granted permission to perform reclaims. */ void -rfs4_ss_chkclid(rfs4_client_t *cp) +rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) { rfs4_servinst_t *sip; @@ -830,15 +811,15 @@ rfs4_ss_chkclid(rfs4_client_t *cp) * Start at the current instance, and walk the list backwards * to the first. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { rfs4_ss_chkclid_sip(cp, sip); /* if the above check found this client, we're done */ if (cp->rc_can_reclaim) break; } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static void @@ -888,7 +869,7 @@ rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip) * the server-generated short-hand clientid. */ void -rfs4_ss_clid(rfs4_client_t *cp) +rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) { const char *kinet_ntop6(uchar_t *, char *, size_t); char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN]; @@ -920,7 +901,7 @@ rfs4_ss_clid(rfs4_client_t *cp) (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf, (longlong_t)cp->rc_clientid); - rfs4_ss_clid_write(cp, leaf); + rfs4_ss_clid_write(nsrv4, cp, leaf); } /* @@ -929,7 +910,7 @@ rfs4_ss_clid(rfs4_client_t *cp) * multiple directories. */ static void -rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) +rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf) { rfs4_servinst_t *sip; @@ -943,8 +924,8 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) * to all instances' paths. Start at the current instance, and * walk the list backwards to the first. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { int i, npaths = sip->dss_npaths; /* write the leaf file to all DSS paths */ @@ -958,7 +939,7 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) rfs4_ss_clid_write_one(cp, dss_path->path, leaf); } } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } /* @@ -1151,34 +1132,107 @@ rfs4_client_scrub(rfs4_entry_t ent, void *arg) void rfs4_clear_client_state(struct nfs4clrst_args *clr) { - (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr); + nfs4_srv_t *nsrv4; + nsrv4 = nfs4_get_srv(); + (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr); } /* * Used to initialize the NFSv4 server's state or database. All of - * the tables are created and timers are set. Only called when NFSv4 - * service is provided. + * the tables are created and timers are set. */ void -rfs4_state_init() +rfs4_state_g_init() { - int start_grace; extern boolean_t rfs4_cpr_callb(void *, int); - char *dss_path = NFS4_DSS_VAR_DIR; - time_t start_time; + /* + * Add a CPR callback so that we can update client + * access times to extend the lease after a suspend + * and resume (using the same class as rpcmod/connmgr) + */ + cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); - mutex_enter(&rfs4_state_lock); + /* + * NFSv4 server state databases + * + * Initialized when the module is loaded and used by NFSv4 state + * tables. These kmem_cache free pools are used globally, the NFSv4 + * state tables which make use of these kmem_cache free pools are per + * zone. + * + * initialize the global kmem_cache free pools which will be used by + * the NFSv4 state tables. + */ + /* CSTYLED */ + rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0); + /* CSTYLED */ + rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1); + /* CSTYLED */ + rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2); + /* CSTYLED */ + rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3); + /* CSTYLED */ + rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4); + /* CSTYLED */ + rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5); + /* CSTYLED */ + rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6); + /* CSTYLED */ + rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7); + rfs4_client_clrst = rfs4_clear_client_state; +} + + +/* + * Used at server shutdown to cleanup all of the NFSv4 server's structures + * and other state. + */ +void +rfs4_state_g_fini() +{ + int i; /* - * If the server state database has already been initialized, - * skip it + * Cleanup the CPR callback. */ - if (rfs4_server_state != NULL) { - mutex_exit(&rfs4_state_lock); - return; + if (cpr_id) + (void) callb_delete(cpr_id); + + rfs4_client_clrst = NULL; + + /* free the NFSv4 state databases */ + for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) { + kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache); + rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL; } - rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL); + rfs4_client_mem_cache = NULL; + rfs4_clntIP_mem_cache = NULL; + rfs4_openown_mem_cache = NULL; + rfs4_openstID_mem_cache = NULL; + rfs4_lockstID_mem_cache = NULL; + rfs4_lockown_mem_cache = NULL; + rfs4_file_mem_cache = NULL; + rfs4_delegstID_mem_cache = NULL; + + /* DSS: distributed stable storage */ + nvlist_free(rfs4_dss_oldpaths); + nvlist_free(rfs4_dss_paths); + rfs4_dss_paths = rfs4_dss_oldpaths = NULL; +} + +/* + * Used to initialize the per zone NFSv4 server's state + */ +void +rfs4_state_zone_init(nfs4_srv_t *nsrv4) +{ + time_t start_time; + int start_grace; + char *dss_path = NFS4_DSS_VAR_DIR; + + /* DSS: distributed stable storage: initialise served paths list */ + nsrv4->dss_pathlist = NULL; /* * Set the boot time. If the server @@ -1187,13 +1241,10 @@ rfs4_state_init() * regardless. A small window but it exists... */ start_time = gethrestime_sec(); - if (rfs4_start_time < start_time) - rfs4_start_time = start_time; + if (nsrv4->rfs4_start_time < start_time) + nsrv4->rfs4_start_time = start_time; else - rfs4_start_time++; - - /* DSS: distributed stable storage: initialise served paths list */ - rfs4_dss_pathlist = NULL; + nsrv4->rfs4_start_time++; /* * Create the first server instance, or a new one if the server has @@ -1202,42 +1253,67 @@ rfs4_state_init() * clients' recovery window. */ start_grace = 0; - rfs4_servinst_create(start_grace, 1, &dss_path); + if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { + int i; + char **dss_allpaths = NULL; + dss_allpaths = kmem_alloc(sizeof (char *) * + (rfs4_dss_numnewpaths + 1), KM_SLEEP); + /* + * Add the default path into the list of paths for saving + * state informantion. + */ + dss_allpaths[0] = dss_path; + for (i = 0; i < rfs4_dss_numnewpaths; i++) { + dss_allpaths[i + 1] = rfs4_dss_newpaths[i]; + } + rfs4_servinst_create(nsrv4, start_grace, + (rfs4_dss_numnewpaths + 1), dss_allpaths); + kmem_free(dss_allpaths, + (sizeof (char *) * (rfs4_dss_numnewpaths + 1))); + } else { + rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path); + } /* reset the "first NFSv4 request" status */ - rfs4_seen_first_compound = 0; + nsrv4->seen_first_compound = 0; + + mutex_enter(&nsrv4->state_lock); /* - * Add a CPR callback so that we can update client - * access times to extend the lease after a suspend - * and resume (using the same class as rpcmod/connmgr) + * If the server state database has already been initialized, + * skip it */ - cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); + if (nsrv4->nfs4_server_state != NULL) { + mutex_exit(&nsrv4->state_lock); + return; + } + + rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL); /* set the various cache timers for table creation */ - if (rfs4_client_cache_time == 0) - rfs4_client_cache_time = CLIENT_CACHE_TIME; - if (rfs4_openowner_cache_time == 0) - rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME; - if (rfs4_state_cache_time == 0) - rfs4_state_cache_time = STATE_CACHE_TIME; - if (rfs4_lo_state_cache_time == 0) - rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME; - if (rfs4_lockowner_cache_time == 0) - rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME; - if (rfs4_file_cache_time == 0) - rfs4_file_cache_time = FILE_CACHE_TIME; - if (rfs4_deleg_state_cache_time == 0) - rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME; + if (nsrv4->rfs4_client_cache_time == 0) + nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME; + if (nsrv4->rfs4_openowner_cache_time == 0) + nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME; + if (nsrv4->rfs4_state_cache_time == 0) + nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME; + if (nsrv4->rfs4_lo_state_cache_time == 0) + nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME; + if (nsrv4->rfs4_lockowner_cache_time == 0) + nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME; + if (nsrv4->rfs4_file_cache_time == 0) + nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME; + if (nsrv4->rfs4_deleg_state_cache_time == 0) + nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME; /* Create the overall database to hold all server state */ - rfs4_server_state = rfs4_database_create(rfs4_database_debug); + nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug); /* Now create the individual tables */ - rfs4_client_cache_time *= rfs4_lease_time; - rfs4_client_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_client_cache_time *= rfs4_lease_time; + nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state, "Client", - rfs4_client_cache_time, + nsrv4->rfs4_client_cache_time, 2, rfs4_client_create, rfs4_client_destroy, @@ -1245,19 +1321,19 @@ rfs4_state_init() sizeof (rfs4_client_t), TABSIZE, MAXTABSZ/8, 100); - rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab, + nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab, "nfs_client_id4", nfsclnt_hash, nfsclnt_compare, nfsclnt_mkkey, TRUE); - rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab, + nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab, "client_id", clientid_hash, clientid_compare, clientid_mkkey, FALSE); - rfs4_clntip_cache_time = 86400 * 365; /* about a year */ - rfs4_clntip_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */ + nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state, "ClntIP", - rfs4_clntip_cache_time, + nsrv4->rfs4_clntip_cache_time, 1, rfs4_clntip_create, rfs4_clntip_destroy, @@ -1265,15 +1341,15 @@ rfs4_state_init() sizeof (rfs4_clntip_t), TABSIZE, MAXTABSZ, 100); - rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab, + nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab, "client_ip", clntip_hash, clntip_compare, clntip_mkkey, TRUE); - rfs4_openowner_cache_time *= rfs4_lease_time; - rfs4_openowner_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time; + nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state, "OpenOwner", - rfs4_openowner_cache_time, + nsrv4->rfs4_openowner_cache_time, 1, rfs4_openowner_create, rfs4_openowner_destroy, @@ -1281,15 +1357,15 @@ rfs4_state_init() sizeof (rfs4_openowner_t), TABSIZE, MAXTABSZ, 100); - rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab, + nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab, "open_owner4", openowner_hash, openowner_compare, openowner_mkkey, TRUE); - rfs4_state_cache_time *= rfs4_lease_time; - rfs4_state_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_state_cache_time *= rfs4_lease_time; + nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, "OpenStateID", - rfs4_state_cache_time, + nsrv4->rfs4_state_cache_time, 3, rfs4_state_create, rfs4_state_destroy, @@ -1298,25 +1374,26 @@ rfs4_state_init() TABSIZE, MAXTABSZ, 100); - rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab, + /* CSTYLED */ + nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab, "Openowner-File", state_owner_file_hash, state_owner_file_compare, state_owner_file_mkkey, TRUE); - rfs4_state_idx = rfs4_index_create(rfs4_state_tab, + nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab, "State-id", state_hash, state_compare, state_mkkey, FALSE); - rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab, + nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab, "File", state_file_hash, state_file_compare, state_file_mkkey, FALSE); - rfs4_lo_state_cache_time *= rfs4_lease_time; - rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time; + nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, "LockStateID", - rfs4_lo_state_cache_time, + nsrv4->rfs4_lo_state_cache_time, 2, rfs4_lo_state_create, rfs4_lo_state_destroy, @@ -1325,22 +1402,23 @@ rfs4_state_init() TABSIZE, MAXTABSZ, 100); - rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab, + /* CSTYLED */ + nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab, "lockownerxstate", lo_state_lo_hash, lo_state_lo_compare, lo_state_lo_mkkey, TRUE); - rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab, + nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab, "State-id", lo_state_hash, lo_state_compare, lo_state_mkkey, FALSE); - rfs4_lockowner_cache_time *= rfs4_lease_time; + nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time; - rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state, "Lockowner", - rfs4_lockowner_cache_time, + nsrv4->rfs4_lockowner_cache_time, 2, rfs4_lockowner_create, rfs4_lockowner_destroy, @@ -1349,20 +1427,21 @@ rfs4_state_init() TABSIZE, MAXTABSZ, 100); - rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab, + nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab, "lock_owner4", lockowner_hash, lockowner_compare, lockowner_mkkey, TRUE); - rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab, + /* CSTYLED */ + nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab, "pid", pid_hash, pid_compare, pid_mkkey, FALSE); - rfs4_file_cache_time *= rfs4_lease_time; - rfs4_file_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_file_cache_time *= rfs4_lease_time; + nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state, "File", - rfs4_file_cache_time, + nsrv4->rfs4_file_cache_time, 1, rfs4_file_create, rfs4_file_destroy, @@ -1371,14 +1450,15 @@ rfs4_state_init() TABSIZE, MAXTABSZ, -1); - rfs4_file_idx = rfs4_index_create(rfs4_file_tab, + nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab, "Filehandle", file_hash, file_compare, file_mkkey, TRUE); - rfs4_deleg_state_cache_time *= rfs4_lease_time; - rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state, + nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time; + /* CSTYLED */ + nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, "DelegStateID", - rfs4_deleg_state_cache_time, + nsrv4->rfs4_deleg_state_cache_time, 2, rfs4_deleg_state_create, rfs4_deleg_state_destroy, @@ -1386,87 +1466,89 @@ rfs4_state_init() sizeof (rfs4_deleg_state_t), TABSIZE, MAXTABSZ, 100); - rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab, + nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab, "DelegByFileClient", deleg_hash, deleg_compare, deleg_mkkey, TRUE); - rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab, + /* CSTYLED */ + nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab, "DelegState", deleg_state_hash, deleg_state_compare, deleg_state_mkkey, FALSE); + mutex_exit(&nsrv4->state_lock); + /* * Init the stable storage. */ - rfs4_ss_init(); - - rfs4_client_clrst = rfs4_clear_client_state; - - mutex_exit(&rfs4_state_lock); + rfs4_ss_init(nsrv4); } - /* - * Used at server shutdown to cleanup all of the NFSv4 server's structures - * and other state. + * Used at server shutdown to cleanup all of NFSv4 server's zone structures + * and state. */ void -rfs4_state_fini() +rfs4_state_zone_fini() { rfs4_database_t *dbp; + nfs4_srv_t *nsrv4; + nsrv4 = nfs4_get_srv(); + + rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE); + + /* + * Clean up any dangling stable storage structures BEFORE calling + * rfs4_servinst_destroy_all() so there are no dangling structures + * (i.e. the srvinsts are all cleared of danglers BEFORE they get + * freed). + */ + rfs4_ss_fini(nsrv4); - mutex_enter(&rfs4_state_lock); + mutex_enter(&nsrv4->state_lock); - if (rfs4_server_state == NULL) { - mutex_exit(&rfs4_state_lock); + if (nsrv4->nfs4_server_state == NULL) { + mutex_exit(&nsrv4->state_lock); return; } - rfs4_client_clrst = NULL; + /* destroy server instances and current instance ptr */ + rfs4_servinst_destroy_all(nsrv4); - rfs4_set_deleg_policy(SRV_NEVER_DELEGATE); - dbp = rfs4_server_state; - rfs4_server_state = NULL; + /* reset the "first NFSv4 request" status */ + nsrv4->seen_first_compound = 0; - /* - * Cleanup the CPR callback. - */ - if (cpr_id) - (void) callb_delete(cpr_id); + dbp = nsrv4->nfs4_server_state; + nsrv4->nfs4_server_state = NULL; - rw_destroy(&rfs4_findclient_lock); + rw_destroy(&nsrv4->rfs4_findclient_lock); /* First stop all of the reaper threads in the database */ rfs4_database_shutdown(dbp); - /* clean up any dangling stable storage structures */ - rfs4_ss_fini(); - /* Now actually destroy/release the database and its tables */ + + /* + * WARNING: There may be consumers of the rfs4 database still + * active as we destroy these. IF that's the case, consider putting + * some of their _zone_fini()-like functions into the zsd key as + * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can + * maintain some ordering guarantees better that way. + */ + /* Now destroy/release the database tables */ rfs4_database_destroy(dbp); /* Reset the cache timers for next time */ - rfs4_client_cache_time = 0; - rfs4_openowner_cache_time = 0; - rfs4_state_cache_time = 0; - rfs4_lo_state_cache_time = 0; - rfs4_lockowner_cache_time = 0; - rfs4_file_cache_time = 0; - rfs4_deleg_state_cache_time = 0; - - mutex_exit(&rfs4_state_lock); - - /* destroy server instances and current instance ptr */ - rfs4_servinst_destroy_all(); - - /* reset the "first NFSv4 request" status */ - rfs4_seen_first_compound = 0; + nsrv4->rfs4_client_cache_time = 0; + nsrv4->rfs4_openowner_cache_time = 0; + nsrv4->rfs4_state_cache_time = 0; + nsrv4->rfs4_lo_state_cache_time = 0; + nsrv4->rfs4_lockowner_cache_time = 0; + nsrv4->rfs4_file_cache_time = 0; + nsrv4->rfs4_deleg_state_cache_time = 0; - /* DSS: distributed stable storage */ - nvlist_free(rfs4_dss_oldpaths); - nvlist_free(rfs4_dss_paths); - rfs4_dss_paths = rfs4_dss_oldpaths = NULL; + mutex_exit(&nsrv4->state_lock); } typedef union { @@ -1581,6 +1663,7 @@ rfs4_client_expiry(rfs4_entry_t u_entry) static void rfs4_dss_remove_cpleaf(rfs4_client_t *cp) { + nfs4_srv_t *nsrv4; rfs4_servinst_t *sip; char *leaf = cp->rc_ss_pn->leaf; @@ -1590,12 +1673,13 @@ rfs4_dss_remove_cpleaf(rfs4_client_t *cp) * from all server instances. */ - mutex_enter(&rfs4_servinst_lock); - for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { + nsrv4 = nfs4_get_srv(); + mutex_enter(&nsrv4->servinst_lock); + for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { /* remove the leaf file associated with this server instance */ rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf); } - mutex_exit(&rfs4_servinst_lock); + mutex_exit(&nsrv4->servinst_lock); } static void @@ -1663,10 +1747,13 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) struct sockaddr *ca; cid *cidp; scid_confirm_verf *scvp; + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); /* Get a clientid to give to the client */ cidp = (cid *)&cp->rc_clientid; - cidp->impl_id.start_time = rfs4_start_time; + cidp->impl_id.start_time = nsrv4->rfs4_start_time; cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe); /* If we are booted as a cluster node, embed our nodeid */ @@ -1724,7 +1811,7 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg) * rfs4_servinst_assign(). In this case it's not strictly necessary. */ rfs4_dbe_hold(cp->rc_dbe); - rfs4_servinst_assign(cp, rfs4_cur_servinst); + rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); rfs4_dbe_rele(cp->rc_dbe); return (TRUE); @@ -1755,22 +1842,24 @@ rfs4_client_t * rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp) { rfs4_client_t *cp; + nfs4_srv_t *nsrv4; + nsrv4 = nfs4_get_srv(); if (oldcp) { - rw_enter(&rfs4_findclient_lock, RW_WRITER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER); rfs4_dbe_hide(oldcp->rc_dbe); } else { - rw_enter(&rfs4_findclient_lock, RW_READER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); } - cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client, + cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client, create, (void *)client, RFS4_DBS_VALID); if (oldcp) rfs4_dbe_unhide(oldcp->rc_dbe); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); return (cp); } @@ -1781,17 +1870,18 @@ rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed) rfs4_client_t *cp; bool_t create = FALSE; cid *cidp = (cid *)&clientid; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); /* If we're a cluster and the nodeid isn't right, short-circuit */ if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp)) return (NULL); - rw_enter(&rfs4_findclient_lock, RW_READER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); - cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid, + cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid, &create, NULL, RFS4_DBS_VALID); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) { rfs4_client_rele(cp); @@ -1899,13 +1989,16 @@ rfs4_clntip_t * rfs4_find_clntip(struct sockaddr *addr, bool_t *create) { rfs4_clntip_t *cp; + nfs4_srv_t *nsrv4; - rw_enter(&rfs4_findclient_lock, RW_READER); + nsrv4 = nfs4_get_srv(); - cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr, + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); + + cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr, create, addr, RFS4_DBS_VALID); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); return (cp); } @@ -1915,19 +2008,20 @@ rfs4_invalidate_clntip(struct sockaddr *addr) { rfs4_clntip_t *cp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - rw_enter(&rfs4_findclient_lock, RW_READER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); - cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr, + cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr, &create, NULL, RFS4_DBS_VALID); if (cp == NULL) { - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); return; } rfs4_dbe_invalidate(cp->ri_dbe); rfs4_dbe_rele(cp->ri_dbe); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); } bool_t @@ -2075,14 +2169,15 @@ rfs4_openowner_create(rfs4_entry_t u_entry, void *arg) seqid4 seqid = argp->ro_open_seqid; rfs4_client_t *cp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - rw_enter(&rfs4_findclient_lock, RW_READER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); - cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, + cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &openowner->clientid, &create, NULL, RFS4_DBS_VALID); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); if (cp == NULL) return (FALSE); @@ -2124,10 +2219,12 @@ rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid) { rfs4_openowner_t *oo; rfs4_openowner_t arg; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); arg.ro_owner = *openowner; arg.ro_open_seqid = seqid; - oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner, + /* CSTYLED */ + oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner, create, &arg, RFS4_DBS_VALID); return (oo); @@ -2270,14 +2367,15 @@ rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg) lock_owner4 *lockowner = (lock_owner4 *)arg; rfs4_client_t *cp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - rw_enter(&rfs4_findclient_lock, RW_READER); + rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); - cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, + cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &lockowner->clientid, &create, NULL, RFS4_DBS_VALID); - rw_exit(&rfs4_findclient_lock); + rw_exit(&nsrv4->rfs4_findclient_lock); if (cp == NULL) return (FALSE); @@ -2298,8 +2396,10 @@ rfs4_lockowner_t * rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create) { rfs4_lockowner_t *lo; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner, + /* CSTYLED */ + lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner, create, lockowner, RFS4_DBS_VALID); return (lo); @@ -2310,8 +2410,9 @@ rfs4_findlockowner_by_pid(pid_t pid) { rfs4_lockowner_t *lo; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx, + lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx, (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID); return (lo); @@ -2422,12 +2523,14 @@ rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create) { rfs4_file_t *fp; rfs4_fcreate_arg arg; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); arg.vp = vp; arg.fh = fh; if (*create == TRUE) - fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create, + /* CSTYLED */ + fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create, &arg, RFS4_DBS_VALID); else { mutex_enter(&vp->v_vsd_lock); @@ -2462,6 +2565,7 @@ rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create) rfs4_file_t *fp; rfs4_fcreate_arg arg; bool_t screate = *create; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); if (screate == FALSE) { mutex_enter(&vp->v_vsd_lock); @@ -2492,8 +2596,8 @@ retry: arg.vp = vp; arg.fh = fh; - fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create, - &arg, RFS4_DBS_VALID); + fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, + create, &arg, RFS4_DBS_VALID); if (fp != NULL) { rw_enter(&fp->rf_file_rwlock, RW_WRITER); if (fp->rf_vp == NULL) { @@ -2648,8 +2752,9 @@ rfs4_findlo_state(stateid_t *id, bool_t lock_fp) { rfs4_lo_state_t *lsp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id, + lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id, &create, NULL, RFS4_DBS_VALID); if (lock_fp == TRUE && lsp != NULL) rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER); @@ -2688,12 +2793,13 @@ rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp, { rfs4_lo_state_t *lsp; rfs4_lo_state_t arg; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); arg.rls_locker = lo; arg.rls_state = sp; - lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg, - create, &arg, RFS4_DBS_VALID); + lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx, + &arg, create, &arg, RFS4_DBS_VALID); return (lsp); } @@ -2702,8 +2808,11 @@ static stateid_t get_stateid(id_t eid) { stateid_t id; + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); - id.bits.boottime = rfs4_start_time; + id.bits.boottime = nsrv4->rfs4_start_time; id.bits.ident = eid; id.bits.chgseq = 0; id.bits.type = 0; @@ -2959,11 +3068,12 @@ rfs4_deleg_state_t * rfs4_finddeleg(rfs4_state_t *sp, bool_t *create) { rfs4_deleg_state_t ds, *dsp; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); ds.rds_client = sp->rs_owner->ro_client; ds.rds_finfo = sp->rs_finfo; - dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds, + dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds, create, &ds, RFS4_DBS_VALID); return (dsp); @@ -2974,9 +3084,10 @@ rfs4_finddelegstate(stateid_t *id) { rfs4_deleg_state_t *dsp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id, - &create, NULL, RFS4_DBS_VALID); + dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx, + id, &create, NULL, RFS4_DBS_VALID); return (dsp); } @@ -3091,16 +3202,17 @@ state_file_mkkey(rfs4_entry_t u_entry) rfs4_state_t * rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp, - bool_t *create) + bool_t *create) { rfs4_state_t *sp; rfs4_state_t key; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); key.rs_owner = oo; key.rs_finfo = fp; - sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key, - create, &key, RFS4_DBS_VALID); + sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx, + &key, create, &key, RFS4_DBS_VALID); return (sp); } @@ -3110,8 +3222,9 @@ static rfs4_state_t * rfs4_findstate_by_file(rfs4_file_t *fp) { bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp, + return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp, &create, fp, RFS4_DBS_VALID)); } @@ -3162,8 +3275,9 @@ rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp) { rfs4_state_t *sp; bool_t create = FALSE; + nfs4_srv_t *nsrv4 = nfs4_get_srv(); - sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id, + sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id, &create, NULL, find_invalid); if (lock_fp == TRUE && sp != NULL) rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER); @@ -3231,6 +3345,9 @@ nfsstat4 rfs4_check_clientid(clientid4 *cp, int setclid_confirm) { cid *cidp = (cid *) cp; + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); /* * If we are booted as a cluster node, check the embedded nodeid. @@ -3245,7 +3362,8 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm) * by the client (via the clientid) and this is NOT a * setclientid_confirm then return EXPIRED. */ - if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time) + if (!setclid_confirm && + cidp->impl_id.start_time == nsrv4->rfs4_start_time) return (NFS4ERR_EXPIRED); return (NFS4ERR_STALE_CLIENTID); @@ -3259,6 +3377,10 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm) static nfsstat4 what_stateid_error(stateid_t *id, stateid_type_t type) { + nfs4_srv_t *nsrv4; + + nsrv4 = nfs4_get_srv(); + /* If we are booted as a cluster node, was stateid locally generated? */ if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) return (NFS4ERR_STALE_STATEID); @@ -3268,7 +3390,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type) return (NFS4ERR_BAD_STATEID); /* From a different server instantiation, return STALE */ - if (id->bits.boottime != rfs4_start_time) + if (id->bits.boottime != nsrv4->rfs4_start_time) return (NFS4ERR_STALE_STATEID); /* @@ -3283,7 +3405,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type) * that has been revoked, the server should return BAD_STATEID * instead of the more common EXPIRED error. */ - if (id->bits.boottime == rfs4_start_time) { + if (id->bits.boottime == nsrv4->rfs4_start_time) { if (type == DELEGID) return (NFS4ERR_BAD_STATEID); else @@ -3785,7 +3907,7 @@ rfs4_close_all_state(rfs4_file_t *fp) #ifdef DEBUG /* only applies when server is handing out delegations */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE) ASSERT(fp->rf_dinfo.rd_hold_grant > 0); #endif @@ -3991,21 +4113,34 @@ rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e) * state in the server that refers to objects residing underneath this * particular export. The ordering of the release is important. * Lock_owner, then state and then file. + * + * NFS zones note: nfs_export.c:unexport() calls this from a + * thread in the global zone for NGZ data structures, so we + * CANNOT use zone_getspecific anywhere in this code path. */ void -rfs4_clean_state_exi(struct exportinfo *exi) +rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi) { - mutex_enter(&rfs4_state_lock); + nfs_globals_t *ng; + nfs4_srv_t *nsrv4; + + ng = ne->ne_globals; + ASSERT(ng->nfs_zoneid == exi->exi_zoneid); + nsrv4 = ng->nfs4_srv; + + mutex_enter(&nsrv4->state_lock); - if (rfs4_server_state == NULL) { - mutex_exit(&rfs4_state_lock); + if (nsrv4->nfs4_server_state == NULL) { + mutex_exit(&nsrv4->state_lock); return; } - rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi); - rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi); - rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi); - rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi); + rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, + rfs4_lo_state_walk_callout, exi); + rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi); + rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, + rfs4_deleg_state_walk_callout, exi); + rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi); - mutex_exit(&rfs4_state_lock); + mutex_exit(&nsrv4->state_lock); } diff --git a/usr/src/uts/common/fs/nfs/nfs_auth.c b/usr/src/uts/common/fs/nfs/nfs_auth.c index b363ba37d3..7ac3c3318b 100644 --- a/usr/src/uts/common/fs/nfs/nfs_auth.c +++ b/usr/src/uts/common/fs/nfs/nfs_auth.c @@ -20,10 +20,10 @@ */ /* - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015 by Delphix. All rights reserved. * Copyright (c) 2015 Joyent, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ #include <sys/param.h> @@ -53,10 +53,12 @@ static struct kmem_cache *exi_cache_handle; static void exi_cache_reclaim(void *); +static void exi_cache_reclaim_zone(nfs_globals_t *); static void exi_cache_trim(struct exportinfo *exi); extern pri_t minclsyspri; +/* NFS auth cache statistics */ volatile uint_t nfsauth_cache_hit; volatile uint_t nfsauth_cache_miss; volatile uint_t nfsauth_cache_refresh; @@ -120,9 +122,8 @@ typedef struct refreshq_auth_node { } refreshq_auth_node_t; /* - * Used to manipulate things on the refreshq_queue. - * Note that the refresh thread will effectively - * pop a node off of the queue, at which point it + * Used to manipulate things on the refreshq_queue. Note that the refresh + * thread will effectively pop a node off of the queue, at which point it * will no longer need to hold the mutex. */ static kmutex_t refreshq_lock; @@ -130,102 +131,130 @@ static list_t refreshq_queue; static kcondvar_t refreshq_cv; /* - * If there is ever a problem with loading the - * module, then nfsauth_fini() needs to be called - * to remove state. In that event, since the - * refreshq thread has been started, they need to - * work together to get rid of state. + * If there is ever a problem with loading the module, then nfsauth_fini() + * needs to be called to remove state. In that event, since the refreshq + * thread has been started, they need to work together to get rid of state. */ typedef enum nfsauth_refreshq_thread_state { REFRESHQ_THREAD_RUNNING, REFRESHQ_THREAD_FINI_REQ, - REFRESHQ_THREAD_HALTED + REFRESHQ_THREAD_HALTED, + REFRESHQ_THREAD_NEED_CREATE } nfsauth_refreshq_thread_state_t; -nfsauth_refreshq_thread_state_t -refreshq_thread_state = REFRESHQ_THREAD_HALTED; +typedef struct nfsauth_globals { + kmutex_t mountd_lock; + door_handle_t mountd_dh; + + /* + * Used to manipulate things on the refreshq_queue. Note that the + * refresh thread will effectively pop a node off of the queue, + * at which point it will no longer need to hold the mutex. + */ + kmutex_t refreshq_lock; + list_t refreshq_queue; + kcondvar_t refreshq_cv; + + /* + * A list_t would be overkill. These are auth_cache entries which are + * no longer linked to an exi. It should be the case that all of their + * states are NFS_AUTH_INVALID, i.e., the only way to be put on this + * list is iff their state indicated that they had been placed on the + * refreshq_queue. + * + * Note that while there is no link from the exi or back to the exi, + * the exi can not go away until these entries are harvested. + */ + struct auth_cache *refreshq_dead_entries; + nfsauth_refreshq_thread_state_t refreshq_thread_state; + +} nfsauth_globals_t; static void nfsauth_free_node(struct auth_cache *); -static void nfsauth_refresh_thread(void); +static void nfsauth_refresh_thread(nfsauth_globals_t *); static int nfsauth_cache_compar(const void *, const void *); -/* - * mountd is a server-side only daemon. This will need to be - * revisited if the NFS server is ever made zones-aware. - */ -kmutex_t mountd_lock; -door_handle_t mountd_dh; +static nfsauth_globals_t * +nfsauth_get_zg(void) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + nfsauth_globals_t *nag = ng->nfs_auth; + ASSERT(nag != NULL); + return (nag); +} void mountd_args(uint_t did) { - mutex_enter(&mountd_lock); - if (mountd_dh != NULL) - door_ki_rele(mountd_dh); - mountd_dh = door_ki_lookup(did); - mutex_exit(&mountd_lock); + nfsauth_globals_t *nag; + + nag = nfsauth_get_zg(); + mutex_enter(&nag->mountd_lock); + if (nag->mountd_dh != NULL) + door_ki_rele(nag->mountd_dh); + nag->mountd_dh = door_ki_lookup(did); + mutex_exit(&nag->mountd_lock); } void nfsauth_init(void) { - /* - * mountd can be restarted by smf(5). We need to make sure - * the updated door handle will safely make it to mountd_dh - */ - mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL); + exi_cache_handle = kmem_cache_create("exi_cache_handle", + sizeof (struct auth_cache), 0, NULL, NULL, + exi_cache_reclaim, NULL, NULL, 0); +} - mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&refreshq_queue, sizeof (refreshq_exi_node_t), - offsetof(refreshq_exi_node_t, ren_node)); +void +nfsauth_fini(void) +{ + kmem_cache_destroy(exi_cache_handle); +} - cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL); +void +nfsauth_zone_init(nfs_globals_t *ng) +{ + nfsauth_globals_t *nag; + + nag = kmem_zalloc(sizeof (*nag), KM_SLEEP); /* - * Allocate nfsauth cache handle + * mountd can be restarted by smf(5). We need to make sure + * the updated door handle will safely make it to mountd_dh. */ - exi_cache_handle = kmem_cache_create("exi_cache_handle", - sizeof (struct auth_cache), 0, NULL, NULL, - exi_cache_reclaim, NULL, NULL, 0); + mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t), + offsetof(refreshq_exi_node_t, ren_node)); + cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL); + nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE; - refreshq_thread_state = REFRESHQ_THREAD_RUNNING; - (void) zthread_create(NULL, 0, nfsauth_refresh_thread, - NULL, 0, minclsyspri); + ng->nfs_auth = nag; } -/* - * Finalization routine for nfsauth. It is important to call this routine - * before destroying the exported_lock. - */ void -nfsauth_fini(void) +nfsauth_zone_shutdown(nfs_globals_t *ng) { refreshq_exi_node_t *ren; + nfsauth_globals_t *nag = ng->nfs_auth; - /* - * Prevent the nfsauth_refresh_thread from getting new - * work. - */ - mutex_enter(&refreshq_lock); - if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) { - refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ; - cv_broadcast(&refreshq_cv); + /* Prevent the nfsauth_refresh_thread from getting new work */ + mutex_enter(&nag->refreshq_lock); + if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) { + nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ; + cv_broadcast(&nag->refreshq_cv); - /* - * Also, wait for nfsauth_refresh_thread() to exit. - */ - while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) { - cv_wait(&refreshq_cv, &refreshq_lock); - } + /* Wait for nfsauth_refresh_thread() to exit */ + while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED) + cv_wait(&nag->refreshq_cv, &nag->refreshq_lock); } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); /* * Walk the exi_list and in turn, walk the auth_lists and free all * lists. In addition, free INVALID auth_cache entries. */ - while ((ren = list_remove_head(&refreshq_queue))) { + while ((ren = list_remove_head(&nag->refreshq_queue))) { refreshq_auth_node_t *ran; while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) { @@ -233,24 +262,30 @@ nfsauth_fini(void) if (p->auth_state == NFS_AUTH_INVALID) nfsauth_free_node(p); strfree(ran->ran_netid); - kmem_free(ran, sizeof (refreshq_auth_node_t)); + kmem_free(ran, sizeof (*ran)); } list_destroy(&ren->ren_authlist); exi_rele(ren->ren_exi); - kmem_free(ren, sizeof (refreshq_exi_node_t)); + kmem_free(ren, sizeof (*ren)); } - list_destroy(&refreshq_queue); - - cv_destroy(&refreshq_cv); - mutex_destroy(&refreshq_lock); - - mutex_destroy(&mountd_lock); +} - /* - * Deallocate nfsauth cache handle - */ - kmem_cache_destroy(exi_cache_handle); +void +nfsauth_zone_fini(nfs_globals_t *ng) +{ + nfsauth_globals_t *nag = ng->nfs_auth; + + ng->nfs_auth = NULL; + + list_destroy(&nag->refreshq_queue); + cv_destroy(&nag->refreshq_cv); + mutex_destroy(&nag->refreshq_lock); + mutex_destroy(&nag->mountd_lock); + /* Extra cleanup. */ + if (nag->mountd_dh != NULL) + door_ki_rele(nag->mountd_dh); + kmem_free(nag, sizeof (*nag)); } /* @@ -342,9 +377,10 @@ sys_log(const char *msg) * Callup to the mountd to get access information in the kernel. */ static bool_t -nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, - struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid, - gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids) +nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi, + char *req_netid, int flavor, struct netbuf *addr, int *access, + cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt, + gid_t **srv_gids) { varg_t varg = {0}; nfsauth_res_t res = {0}; @@ -417,11 +453,11 @@ nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor, da.rsize = 1; retry: - mutex_enter(&mountd_lock); - dh = mountd_dh; + mutex_enter(&nag->mountd_lock); + dh = nag->mountd_dh; if (dh != NULL) door_ki_hold(dh); - mutex_exit(&mountd_lock); + mutex_exit(&nag->mountd_lock); if (dh == NULL) { /* @@ -491,12 +527,12 @@ retry: * chance to restart mountd(1m) * and establish a new door handle. */ - mutex_enter(&mountd_lock); - if (dh == mountd_dh) { - door_ki_rele(mountd_dh); - mountd_dh = NULL; + mutex_enter(&nag->mountd_lock); + if (dh == nag->mountd_dh) { + door_ki_rele(nag->mountd_dh); + nag->mountd_dh = NULL; } - mutex_exit(&mountd_lock); + mutex_exit(&nag->mountd_lock); delay(hz); goto retry; } @@ -593,7 +629,7 @@ fail: } static void -nfsauth_refresh_thread(void) +nfsauth_refresh_thread(nfsauth_globals_t *nag) { refreshq_exi_node_t *ren; refreshq_auth_node_t *ran; @@ -605,25 +641,25 @@ nfsauth_refresh_thread(void) callb_cpr_t cprinfo; - CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr, + CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr, "nfsauth_refresh"); for (;;) { - mutex_enter(&refreshq_lock); - if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { + mutex_enter(&nag->refreshq_lock); + if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { /* Keep the hold on the lock! */ break; } - ren = list_remove_head(&refreshq_queue); + ren = list_remove_head(&nag->refreshq_queue); if (ren == NULL) { CALLB_CPR_SAFE_BEGIN(&cprinfo); - cv_wait(&refreshq_cv, &refreshq_lock); - CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock); - mutex_exit(&refreshq_lock); + cv_wait(&nag->refreshq_cv, &nag->refreshq_lock); + CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock); + mutex_exit(&nag->refreshq_lock); continue; } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); exi = ren->ren_exi; ASSERT(exi != NULL); @@ -670,7 +706,8 @@ nfsauth_refresh_thread(void) * shutdown. */ if (p->auth_state == NFS_AUTH_INVALID || - refreshq_thread_state != REFRESHQ_THREAD_RUNNING) { + nag->refreshq_thread_state != + REFRESHQ_THREAD_RUNNING) { mutex_exit(&p->auth_lock); if (p->auth_state == NFS_AUTH_INVALID) @@ -705,7 +742,7 @@ nfsauth_refresh_thread(void) * of the request which triggered the * refresh attempt. */ - retrieval = nfsauth_retrieve(exi, netid, + retrieval = nfsauth_retrieve(nag, exi, netid, p->auth_flavor, &p->auth_clnt->authc_addr, &access, p->auth_clnt_cred, &uid, &gid, &ngids, &gids); @@ -752,9 +789,10 @@ nfsauth_refresh_thread(void) kmem_free(ren, sizeof (refreshq_exi_node_t)); } - refreshq_thread_state = REFRESHQ_THREAD_HALTED; - cv_broadcast(&refreshq_cv); + nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED; + cv_broadcast(&nag->refreshq_cv); CALLB_CPR_EXIT(&cprinfo); + DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit); zthread_exit(); } @@ -826,6 +864,7 @@ static int nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids) { + nfsauth_globals_t *nag; struct netbuf *taddrmask; struct netbuf addr; /* temporary copy of client's address */ const struct netbuf *claddr; @@ -845,6 +884,9 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, ASSERT(cr != NULL); + ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid); + nag = nfsauth_get_zg(); + /* * Now check whether this client already * has an entry for this flavor in the cache @@ -857,8 +899,12 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, claddr = svc_getrpccaller(req->rq_xprt); addr = *claddr; - addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP); - bcopy(claddr->buf, addr.buf, claddr->len); + if (claddr->len != 0) { + addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP); + bcopy(claddr->buf, addr.buf, claddr->len); + } else { + addr.buf = NULL; + } SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask); ASSERT(taddrmask != NULL); @@ -1004,8 +1050,9 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, atomic_inc_uint(&nfsauth_cache_miss); - res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, - &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids); + res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), + flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, + &tmpgids); p->auth_access = access; p->auth_time = p->auth_freshness = gethrestime_sec(); @@ -1090,21 +1137,33 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, ran->ran_auth = p; ran->ran_netid = strdup(svc_getnetid(req->rq_xprt)); - mutex_enter(&refreshq_lock); + mutex_enter(&nag->refreshq_lock); + + if (nag->refreshq_thread_state == + REFRESHQ_THREAD_NEED_CREATE) { + /* Launch nfsauth refresh thread */ + nag->refreshq_thread_state = + REFRESHQ_THREAD_RUNNING; + (void) zthread_create(NULL, 0, + nfsauth_refresh_thread, nag, 0, + minclsyspri); + } + /* - * We should not add a work queue - * item if the thread is not - * accepting them. + * We should not add a work queue item if the thread + * is not accepting them. */ - if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) { + if (nag->refreshq_thread_state == + REFRESHQ_THREAD_RUNNING) { refreshq_exi_node_t *ren; /* * Is there an existing exi_list? */ - for (ren = list_head(&refreshq_queue); + for (ren = list_head(&nag->refreshq_queue); ren != NULL; - ren = list_next(&refreshq_queue, ren)) { + ren = list_next(&nag->refreshq_queue, + ren)) { if (ren->ren_exi == exi) { list_insert_tail( &ren->ren_authlist, ran); @@ -1127,16 +1186,17 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor, list_insert_tail(&ren->ren_authlist, ran); - list_insert_tail(&refreshq_queue, ren); + list_insert_tail(&nag->refreshq_queue, + ren); } - cv_broadcast(&refreshq_cv); + cv_broadcast(&nag->refreshq_cv); } else { strfree(ran->ran_netid); kmem_free(ran, sizeof (refreshq_auth_node_t)); } - mutex_exit(&refreshq_lock); + mutex_exit(&nag->refreshq_lock); } else { mutex_exit(&p->auth_lock); } @@ -1162,8 +1222,8 @@ retrieve: atomic_inc_uint(&nfsauth_cache_miss); - if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr, - &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) { + if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor, + &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) { if (uid != NULL) *uid = tmpuid; if (gid != NULL) @@ -1410,32 +1470,55 @@ nfsauth_cache_free(struct exportinfo *exi) } /* - * Called by the kernel memory allocator when - * memory is low. Free unused cache entries. - * If that's not enough, the VM system will - * call again for some more. + * Called by the kernel memory allocator when memory is low. + * Free unused cache entries. If that's not enough, the VM system + * will call again for some more. + * + * This needs to operate on all zones, so we take a reader lock + * on the list of zones and walk the list. This is OK here + * becuase exi_cache_trim doesn't block or cause new objects + * to be allocated (basically just frees lots of stuff). + * Use care if nfssrv_globals_rwl is taken as reader in any + * other cases because it will block nfs_server_zone_init + * and nfs_server_zone_fini, which enter as writer. */ /*ARGSUSED*/ void exi_cache_reclaim(void *cdrarg) { + nfs_globals_t *ng; + + rw_enter(&nfssrv_globals_rwl, RW_READER); + + ng = list_head(&nfssrv_globals_list); + while (ng != NULL) { + exi_cache_reclaim_zone(ng); + ng = list_next(&nfssrv_globals_list, ng); + } + + rw_exit(&nfssrv_globals_rwl); +} + +static void +exi_cache_reclaim_zone(nfs_globals_t *ng) +{ int i; struct exportinfo *exi; + nfs_export_t *ne = ng->nfs_export; - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); for (i = 0; i < EXPTABLESIZE; i++) { - for (exi = exptable[i]; exi; exi = exi->fid_hash.next) { + for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next) exi_cache_trim(exi); - } } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); atomic_inc_uint(&nfsauth_cache_reclaim); } -void +static void exi_cache_trim(struct exportinfo *exi) { struct auth_cache_clnt *c; diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c index b034aa4a77..c88cff3739 100644 --- a/usr/src/uts/common/fs/nfs/nfs_client.c +++ b/usr/src/uts/common/fs/nfs/nfs_client.c @@ -18,13 +18,20 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. + */ + +/* + * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> @@ -60,6 +67,7 @@ #include <nfs/nfs.h> #include <nfs/nfs_clnt.h> +#include <nfs/nfs_cmd.h> #include <nfs/rnode.h> #include <nfs/nfs_acl.h> @@ -2796,7 +2804,7 @@ nfs_mi_zonelist_remove(mntinfo_t *mi) * NFS Client initialization routine. This routine should only be called * once. It performs the following tasks: * - Initalize all global locks - * - Call sub-initialization routines (localize access to variables) + * - Call sub-initialization routines (localize access to variables) */ int nfs_clntinit(void) @@ -2827,6 +2835,8 @@ nfs_clntinit(void) nfs4_clnt_init(); + nfscmd_init(); + #ifdef DEBUG nfs_clntup = B_TRUE; #endif @@ -2846,6 +2856,7 @@ nfs_clntfini(void) nfs_subrfini(); nfs_vfsfini(); nfs4_clnt_fini(); + nfscmd_fini(); } /* @@ -3346,7 +3357,7 @@ nfs_free_delmapcall(nfs_delmapcall_t *delmap_call) * Returns: * 0 if the caller wasn't found * 1 if the caller was found, removed and freed. *errp is set to what - * the result of the delmap was. + * the result of the delmap was. */ int nfs_find_and_delete_delmapcall(rnode_t *rp, int *errp) diff --git a/usr/src/uts/common/fs/nfs/nfs_cmd.c b/usr/src/uts/common/fs/nfs/nfs_cmd.c index 343bbd491a..40775bb231 100644 --- a/usr/src/uts/common/fs/nfs/nfs_cmd.c +++ b/usr/src/uts/common/fs/nfs/nfs_cmd.c @@ -18,11 +18,16 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/param.h> #include <sys/types.h> #include <sys/pathname.h> @@ -45,32 +50,65 @@ #endif #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) -kmutex_t nfscmd_lock; -door_handle_t nfscmd_dh; +typedef struct nfscmd_globals { + kmutex_t nfscmd_lock; + door_handle_t nfscmd_dh; +} nfscmd_globals_t; + +static zone_key_t nfscmd_zone_key; static struct charset_cache *nfscmd_charmap(exportinfo_t *exi, struct sockaddr *sp); - +static void *nfscmd_zone_init(zoneid_t); +static void nfscmd_zone_fini(zoneid_t, void *); void nfscmd_args(uint_t did) { - mutex_enter(&nfscmd_lock); - if (nfscmd_dh) - door_ki_rele(nfscmd_dh); - nfscmd_dh = door_ki_lookup(did); - mutex_exit(&nfscmd_lock); + nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone); + + mutex_enter(&ncg->nfscmd_lock); + if (ncg->nfscmd_dh != NULL) + door_ki_rele(ncg->nfscmd_dh); + ncg->nfscmd_dh = door_ki_lookup(did); + mutex_exit(&ncg->nfscmd_lock); } void nfscmd_init(void) { - mutex_init(&nfscmd_lock, NULL, MUTEX_DEFAULT, NULL); + zone_key_create(&nfscmd_zone_key, nfscmd_zone_init, + NULL, nfscmd_zone_fini); } void nfscmd_fini(void) { + (void) zone_key_delete(nfscmd_zone_key); +} + +/*ARGSUSED*/ +static void * +nfscmd_zone_init(zoneid_t zoneid) +{ + nfscmd_globals_t *ncg; + + ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP); + mutex_init(&ncg->nfscmd_lock, NULL, MUTEX_DEFAULT, NULL); + + return (ncg); +} + +/*ARGSUSED*/ +static void +nfscmd_zone_fini(zoneid_t zoneid, void *data) +{ + nfscmd_globals_t *ncg = data; + + mutex_destroy(&ncg->nfscmd_lock); + if (ncg->nfscmd_dh) + door_ki_rele(ncg->nfscmd_dh); + kmem_free(ncg, sizeof (*ncg)); } /* @@ -88,13 +126,14 @@ nfscmd_send(nfscmd_arg_t *arg, nfscmd_res_t *res) door_info_t di; int ntries = 0; int last = 0; + nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone); retry: - mutex_enter(&nfscmd_lock); - dh = nfscmd_dh; + mutex_enter(&ncg->nfscmd_lock); + dh = ncg->nfscmd_dh; if (dh != NULL) door_ki_hold(dh); - mutex_exit(&nfscmd_lock); + mutex_exit(&ncg->nfscmd_lock); if (dh == NULL) { /* @@ -141,10 +180,10 @@ retry: * chance to restart mountd(1m) * and establish a new door handle. */ - mutex_enter(&nfscmd_lock); - if (dh == nfscmd_dh) - nfscmd_dh = NULL; - mutex_exit(&nfscmd_lock); + mutex_enter(&ncg->nfscmd_lock); + if (dh == ncg->nfscmd_dh) + ncg->nfscmd_dh = NULL; + mutex_exit(&ncg->nfscmd_lock); door_ki_rele(dh); delay(hz); goto retry; diff --git a/usr/src/uts/common/fs/nfs/nfs_export.c b/usr/src/uts/common/fs/nfs/nfs_export.c index 200ef6668d..080dfe1adf 100644 --- a/usr/src/uts/common/fs/nfs/nfs_export.c +++ b/usr/src/uts/common/fs/nfs/nfs_export.c @@ -20,15 +20,17 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ /* - * Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T. + * Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T. * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ #include <sys/types.h> #include <sys/param.h> @@ -65,14 +67,25 @@ #include <nfs/nfs_log.h> #include <nfs/lm.h> #include <sys/sunddi.h> -#include <sys/pkp_hash.h> - -treenode_t *ns_root; -struct exportinfo *exptable_path_hash[PKP_HASH_SIZE]; -struct exportinfo *exptable[EXPTABLESIZE]; +/* + * exi_id support + * + * exi_id_next The next exi_id available. + * exi_id_overflow The exi_id_next already overflowed, so we should + * thoroughly check for duplicates. + * exi_id_tree AVL tree indexed by exi_id. + * nfs_exi_id_lock Lock to protect the export ID list + * + * All exi_id_next, exi_id_overflow, and exi_id_tree are protected by + * nfs_exi_id_lock. + */ +static int exi_id_next; +static bool_t exi_id_overflow; +avl_tree_t exi_id_tree; +kmutex_t nfs_exi_id_lock; -static int unexport(exportinfo_t *); +static int unexport(nfs_export_t *, exportinfo_t *); static void exportfree(exportinfo_t *); static int loadindex(exportdata_t *); @@ -80,31 +93,18 @@ extern void nfsauth_cache_free(exportinfo_t *); extern int sec_svc_loadrootnames(int, int, caddr_t **, model_t); extern void sec_svc_freerootnames(int, int, caddr_t *); -static int build_seclist_nodups(exportdata_t *, secinfo_t *, int); -static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int); -static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int); -static void srv_secinfo_treeclimb(exportinfo_t *, secinfo_t *, int, bool_t); +static int build_seclist_nodups(exportdata_t *, secinfo_t *, int); +static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int); +static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int); +static void srv_secinfo_treeclimb(nfs_export_t *, exportinfo_t *, + secinfo_t *, int, bool_t); #ifdef VOLATILE_FH_TEST static struct ex_vol_rename *find_volrnm_fh(exportinfo_t *, nfs_fh4 *); static uint32_t find_volrnm_fh_id(exportinfo_t *, nfs_fh4 *); -static void free_volrnm_list(exportinfo_t *); +static void free_volrnm_list(exportinfo_t *); #endif /* VOLATILE_FH_TEST */ -/* - * exported_lock Read/Write lock that protects the exportinfo list. - * This lock must be held when searching or modifiying - * the exportinfo list. - */ -krwlock_t exported_lock; - -/* - * "public" and default (root) location for public filehandle - */ -struct exportinfo *exi_public, *exi_root; - -fid_t exi_rootfid; /* for checking the default public file handle */ - fhandle_t nullfh2; /* for comparing V2 filehandles */ /* @@ -117,6 +117,15 @@ fhandle_t nullfh2; /* for comparing V2 filehandles */ #define exptablehash(fsid, fid) (nfs_fhhash((fsid), (fid)) & (EXPTABLESIZE - 1)) +extern nfs_export_t * +nfs_get_export(void) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + nfs_export_t *ne = ng->nfs_export; + ASSERT(ne != NULL); + return (ne); +} + static uint8_t xor_hash(uint8_t *data, int len) { @@ -693,7 +702,8 @@ vis2exi(treenode_t *tnode) } } - ASSERT(exi_ret); /* Every visible should have its home exportinfo */ + /* Every visible should have its home exportinfo */ + ASSERT(exi_ret != NULL); return (exi_ret); } @@ -702,14 +712,25 @@ vis2exi(treenode_t *tnode) * Add or remove the newly exported or unexported security flavors of the * given exportinfo from its ancestors upto the system root. */ -void -srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt, - bool_t isadd) +static void +srv_secinfo_treeclimb(nfs_export_t *ne, exportinfo_t *exip, secinfo_t *sec, + int seccnt, bool_t isadd) { - treenode_t *tnode = exip->exi_tree; + treenode_t *tnode; - ASSERT(RW_WRITE_HELD(&exported_lock)); - ASSERT(tnode != NULL); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); + + /* + * exi_tree can be null for the zone root + * which means we're already at the "top" + * and there's nothing more to "climb". + */ + tnode = exip->exi_tree; + if (tnode == NULL) { + /* Should only happen for... */ + ASSERT(exip == ne->exi_root); + return; + } if (seccnt == 0) return; @@ -722,6 +743,7 @@ srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt, * transferred from the PSEUDO export in exportfs() */ if (isadd && !(exip->exi_vp->v_flag & VROOT) && + !VN_CMP(exip->exi_vp, EXI_TO_ZONEROOTVP(exip)) && tnode->tree_vis->vis_seccnt > 0) { srv_secinfo_add(&exip->exi_export.ex_secinfo, &exip->exi_export.ex_seccnt, tnode->tree_vis->vis_secinfo, @@ -782,108 +804,302 @@ srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt, *(bucket) = (exi); void -export_link(exportinfo_t *exi) +export_link(nfs_export_t *ne, exportinfo_t *exi) { exportinfo_t **bckt; - bckt = &exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)]; + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); + + bckt = &ne->exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)]; exp_hash_link(exi, fid_hash, bckt); - bckt = &exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path, + bckt = &ne->exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path, strlen(exi->exi_export.ex_path))]; exp_hash_link(exi, path_hash, bckt); + exi->exi_ne = ne; } /* - * Initialization routine for export routines. Should only be called once. + * Helper functions for exi_id handling */ +static int +exi_id_compar(const void *v1, const void *v2) +{ + const struct exportinfo *e1 = v1; + const struct exportinfo *e2 = v2; + + if (e1->exi_id < e2->exi_id) + return (-1); + if (e1->exi_id > e2->exi_id) + return (1); + + return (0); +} + int -nfs_exportinit(void) +exi_id_get_next() +{ + struct exportinfo e; + int ret = exi_id_next; + + ASSERT(MUTEX_HELD(&nfs_exi_id_lock)); + + do { + exi_id_next++; + if (exi_id_next == 0) + exi_id_overflow = TRUE; + + if (!exi_id_overflow) + break; + + if (exi_id_next == ret) + cmn_err(CE_PANIC, "exi_id exhausted"); + + e.exi_id = exi_id_next; + } while (avl_find(&exi_id_tree, &e, NULL) != NULL); + + return (ret); +} + +/* + * Get the root file handle for this zone. + * Called when nfs_svc() starts + */ +int +nfs_export_get_rootfh(nfs_globals_t *g) +{ + nfs_export_t *ne = g->nfs_export; + int err; + + ne->exi_rootfid.fid_len = MAXFIDSZ; + err = vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid); + if (err != 0) { + ne->exi_rootfid.fid_len = 0; + return (err); + } + + /* Setup the fhandle template exi_fh */ + ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid; + ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len; + bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata, + ne->exi_rootfid.fid_len); + ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data); + + return (0); +} + +void +nfs_export_zone_init(nfs_globals_t *ng) { - int error; int i; + nfs_export_t *ne; + zone_t *zone; + + ne = kmem_zalloc(sizeof (*ne), KM_SLEEP); - rw_init(&exported_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ne->exported_lock, NULL, RW_DEFAULT, NULL); + + ne->ne_globals = ng; /* "up" pointer */ /* * Allocate the place holder for the public file handle, which * is all zeroes. It is initially set to the root filesystem. */ - exi_root = kmem_zalloc(sizeof (*exi_root), KM_SLEEP); - exi_public = exi_root; + ne->exi_root = kmem_zalloc(sizeof (*ne->exi_root), KM_SLEEP); + ne->exi_public = ne->exi_root; - exi_root->exi_export.ex_flags = EX_PUBLIC; - exi_root->exi_export.ex_pathlen = 1; /* length of "/" */ - exi_root->exi_export.ex_path = - kmem_alloc(exi_root->exi_export.ex_pathlen + 1, KM_SLEEP); - exi_root->exi_export.ex_path[0] = '/'; - exi_root->exi_export.ex_path[1] = '\0'; + ne->exi_root->exi_export.ex_flags = EX_PUBLIC; + ne->exi_root->exi_export.ex_pathlen = 1; /* length of "/" */ + ne->exi_root->exi_export.ex_path = + kmem_alloc(ne->exi_root->exi_export.ex_pathlen + 1, KM_SLEEP); + ne->exi_root->exi_export.ex_path[0] = '/'; + ne->exi_root->exi_export.ex_path[1] = '\0'; - exi_root->exi_count = 1; - mutex_init(&exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL); + ne->exi_root->exi_count = 1; + mutex_init(&ne->exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL); - exi_root->exi_vp = rootdir; - exi_rootfid.fid_len = MAXFIDSZ; - error = vop_fid_pseudo(exi_root->exi_vp, &exi_rootfid); - if (error) { - mutex_destroy(&exi_root->exi_lock); - kmem_free(exi_root, sizeof (*exi_root)); - return (error); - } + /* + * Because we cannot: + * ASSERT(curzone->zone_id == ng->nfs_zoneid); + * We grab the zone pointer explicitly (like netstacks do) and + * set the rootvp here. + * + * Subsequent exportinfo_t's that get export_link()ed to "ne" also + * will backpoint to "ne" such that exi->exi_ne->exi_root->exi_vp + * will get the zone's rootvp for a given exportinfo_t. + */ + zone = zone_find_by_id_nolock(ng->nfs_zoneid); + ne->exi_root->exi_vp = zone->zone_rootvp; + ne->exi_root->exi_zoneid = ng->nfs_zoneid; /* - * Initialize auth cache and auth cache lock + * Fill in ne->exi_rootfid later, in nfs_export_get_rootfid + * because we can't correctly return errors here. */ + + /* Initialize auth cache and auth cache lock */ for (i = 0; i < AUTH_TABLESIZE; i++) { - exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), + ne->exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); - avl_create(exi_root->exi_cache[i], nfsauth_cache_clnt_compar, - sizeof (struct auth_cache_clnt), + avl_create(ne->exi_root->exi_cache[i], + nfsauth_cache_clnt_compar, sizeof (struct auth_cache_clnt), offsetof(struct auth_cache_clnt, authc_link)); } - rw_init(&exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL); + rw_init(&ne->exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL); - /* setup the fhandle template */ - exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid; - exi_root->exi_fh.fh_xlen = exi_rootfid.fid_len; - bcopy(exi_rootfid.fid_data, exi_root->exi_fh.fh_xdata, - exi_rootfid.fid_len); - exi_root->exi_fh.fh_len = sizeof (exi_root->exi_fh.fh_data); + /* setup exi_fh later, in nfs_export_get_rootfid */ - /* - * Publish the exportinfo in the hash table - */ - export_link(exi_root); + rw_enter(&ne->exported_lock, RW_WRITER); - nfslog_init(); - ns_root = NULL; + /* Publish the exportinfo in the hash table */ + export_link(ne, ne->exi_root); - return (0); + /* Initialize exi_id and exi_kstats */ + mutex_enter(&nfs_exi_id_lock); + ne->exi_root->exi_id = exi_id_get_next(); + avl_add(&exi_id_tree, ne->exi_root); + mutex_exit(&nfs_exi_id_lock); + + rw_exit(&ne->exported_lock); + ne->ns_root = NULL; + + ng->nfs_export = ne; } /* - * Finalization routine for export routines. Called to cleanup previously - * initialization work when the NFS server module could not be loaded correctly. + * During zone shutdown, remove exports */ void -nfs_exportfini(void) +nfs_export_zone_shutdown(nfs_globals_t *ng) +{ + nfs_export_t *ne = ng->nfs_export; + struct exportinfo *exi, *nexi; + int i, errors; + + rw_enter(&ne->exported_lock, RW_READER); + + errors = 0; + for (i = 0; i < EXPTABLESIZE; i++) { + + exi = ne->exptable[i]; + if (exi != NULL) + exi_hold(exi); + + while (exi != NULL) { + + /* + * Get and hold next export before + * dropping the rwlock and unexport + */ + nexi = exi->fid_hash.next; + if (nexi != NULL) + exi_hold(nexi); + + rw_exit(&ne->exported_lock); + + /* + * Skip ne->exi_root which gets special + * create/destroy handling. + */ + if (exi != ne->exi_root && + unexport(ne, exi) != 0) + errors++; + exi_rele(exi); + + rw_enter(&ne->exported_lock, RW_READER); + exi = nexi; + } + } + if (errors > 0) { + cmn_err(CE_NOTE, "NFS: failed un-exports in zone %d", + (int)ng->nfs_zoneid); + } + + rw_exit(&ne->exported_lock); +} + +void +nfs_export_zone_fini(nfs_globals_t *ng) { int i; + nfs_export_t *ne = ng->nfs_export; + struct exportinfo *exi; + + ng->nfs_export = NULL; + + rw_enter(&ne->exported_lock, RW_WRITER); + + mutex_enter(&nfs_exi_id_lock); + avl_remove(&exi_id_tree, ne->exi_root); + mutex_exit(&nfs_exi_id_lock); + + export_unlink(ne, ne->exi_root); + + rw_exit(&ne->exported_lock); + + /* Deallocate the place holder for the public file handle */ + srv_secinfo_list_free(ne->exi_root->exi_export.ex_secinfo, + ne->exi_root->exi_export.ex_seccnt); + mutex_destroy(&ne->exi_root->exi_lock); + + rw_destroy(&ne->exi_root->exi_cache_lock); + for (i = 0; i < AUTH_TABLESIZE; i++) { + avl_destroy(ne->exi_root->exi_cache[i]); + kmem_free(ne->exi_root->exi_cache[i], sizeof (avl_tree_t)); + } + + kmem_free(ne->exi_root->exi_export.ex_path, + ne->exi_root->exi_export.ex_pathlen + 1); + kmem_free(ne->exi_root, sizeof (*ne->exi_root)); /* - * Deallocate the place holder for the public file handle. + * The shutdown hook should have left the exi_id_tree + * with nothing belonging to this zone. */ - srv_secinfo_list_free(exi_root->exi_export.ex_secinfo, - exi_root->exi_export.ex_seccnt); - mutex_destroy(&exi_root->exi_lock); - rw_destroy(&exi_root->exi_cache_lock); - for (i = 0; i < AUTH_TABLESIZE; i++) { - avl_destroy(exi_root->exi_cache[i]); - kmem_free(exi_root->exi_cache[i], sizeof (avl_tree_t)); + mutex_enter(&nfs_exi_id_lock); + i = 0; + exi = avl_first(&exi_id_tree); + while (exi != NULL) { + if (exi->exi_zoneid == ng->nfs_zoneid) + i++; + exi = AVL_NEXT(&exi_id_tree, exi); } - kmem_free(exi_root, sizeof (*exi_root)); + mutex_exit(&nfs_exi_id_lock); + if (i > 0) { + cmn_err(CE_NOTE, + "NFS: zone %d has %d export IDs left after shutdown", + (int)ng->nfs_zoneid, i); + } + rw_destroy(&ne->exported_lock); + kmem_free(ne, sizeof (*ne)); +} + +/* + * Initialization routine for export routines. + * Should only be called once. + */ +void +nfs_exportinit(void) +{ + mutex_init(&nfs_exi_id_lock, NULL, MUTEX_DEFAULT, NULL); + + /* exi_id handling initialization */ + exi_id_next = 0; + exi_id_overflow = FALSE; + avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo), + offsetof(struct exportinfo, exi_id_link)); + + nfslog_init(); +} - rw_destroy(&exported_lock); +/* + * Finalization routine for export routines. + */ +void +nfs_exportfini(void) +{ + avl_destroy(&exi_id_tree); + mutex_destroy(&nfs_exi_id_lock); } /* @@ -922,6 +1138,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, int i, j; rpc_gss_rawcred_t *raw_cred; struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); /* * We don't deal with delegated credentials. @@ -932,9 +1149,10 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, raw_cred = lock->raw_cred; *cookie = NULL; - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); + for (i = 0; i < EXPTABLESIZE; i++) { - exi = exptable[i]; + exi = ne->exptable[i]; while (exi) { if (exi->exi_export.ex_seccnt > 0) { struct secinfo *secp; @@ -974,7 +1192,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context, } } done: - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* * If no nfs pseudo number mapping can be found in the export @@ -1041,6 +1259,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) int oldcnt; int i; struct pathname lookpn; + nfs_export_t *ne = nfs_get_export(); STRUCT_SET_HANDLE(uap, model, args); @@ -1049,25 +1268,25 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) return (error); /* Walk the export list looking for that pathname */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); DTRACE_PROBE(nfss__i__exported_lock1_start); - for (ex1 = exptable_path_hash[pkp_tab_hash(lookpn.pn_path, + for (ex1 = ne->exptable_path_hash[pkp_tab_hash(lookpn.pn_path, strlen(lookpn.pn_path))]; ex1; ex1 = ex1->path_hash.next) { - if (ex1 != exi_root && 0 == + if (ex1 != ne->exi_root && 0 == strcmp(ex1->exi_export.ex_path, lookpn.pn_path)) { exi_hold(ex1); break; } } DTRACE_PROBE(nfss__i__exported_lock1_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* Is this an unshare? */ if (STRUCT_FGETP(uap, uex) == NULL) { pn_free(&lookpn); if (ex1 == NULL) return (EINVAL); - error = unexport(ex1); + error = unexport(ne, ex1); exi_rele(ex1); return (error); } @@ -1163,15 +1382,15 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * Do not allow re-sharing a shared vnode under a different path * PSEUDO export has ex_path fabricated, e.g. "/tmp (pseudo)", skip it. */ - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); DTRACE_PROBE(nfss__i__exported_lock2_start); - for (ex2 = exptable[exptablehash(&fsid, &fid)]; ex2; + for (ex2 = ne->exptable[exptablehash(&fsid, &fid)]; ex2; ex2 = ex2->fid_hash.next) { - if (ex2 != exi_root && !PSEUDO(ex2) && + if (ex2 != ne->exi_root && !PSEUDO(ex2) && VN_CMP(ex2->exi_vp, vp) && strcmp(ex2->exi_export.ex_path, lookpn.pn_path) != 0) { DTRACE_PROBE(nfss__i__exported_lock2_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); VN_RELE(vp); if (dvp != NULL) VN_RELE(dvp); @@ -1180,7 +1399,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) } } DTRACE_PROBE(nfss__i__exported_lock2_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); pn_free(&lookpn); exi = kmem_zalloc(sizeof (*exi), KM_SLEEP); @@ -1188,6 +1407,8 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) exi->exi_fid = fid; exi->exi_vp = vp; exi->exi_count = 1; + exi->exi_zoneid = crgetzoneid(cr); + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag & VSW_VOLATILEDEV) ? 1 : 0; mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1461,10 +1682,10 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) /* * Insert the new entry at the front of the export list */ - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); DTRACE_PROBE(nfss__i__exported_lock3_start); - export_link(exi); + export_link(ne, exi); /* * Check the rest of the list for an old entry for the fs. @@ -1472,8 +1693,11 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * only reference and then free it. */ for (ex = exi->fid_hash.next; ex != NULL; ex = ex->fid_hash.next) { - if (ex != exi_root && VN_CMP(ex->exi_vp, vp)) { - export_unlink(ex); + if (ex != ne->exi_root && VN_CMP(ex->exi_vp, vp)) { + mutex_enter(&nfs_exi_id_lock); + avl_remove(&exi_id_tree, ex); + mutex_exit(&nfs_exi_id_lock); + export_unlink(ne, ex); break; } } @@ -1482,8 +1706,8 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * If the public filehandle is pointing at the * old entry, then point it back at the root. */ - if (ex != NULL && ex == exi_public) - exi_public = exi_root; + if (ex != NULL && ex == ne->exi_public) + ne->exi_public = ne->exi_root; /* * If the public flag is on, make the global exi_public @@ -1491,7 +1715,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * we can distinguish it from the place holder export. */ if (kex->ex_flags & EX_PUBLIC) { - exi_public = exi; + ne->exi_public = exi; kex->ex_flags &= ~EX_PUBLIC; } @@ -1523,7 +1747,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) exi->exi_tree->tree_exi = exi; /* Update the change timestamp */ - tree_update_change(exi->exi_tree, NULL); + tree_update_change(ne, exi->exi_tree, NULL); } /* @@ -1533,7 +1757,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) */ newcnt = build_seclist_nodups(&exi->exi_export, newsec, FALSE); - srv_secinfo_treeclimb(exi, newsec, newcnt, TRUE); + srv_secinfo_treeclimb(ne, exi, newsec, newcnt, TRUE); /* * If re-sharing an old export entry, update the secinfo data @@ -1558,7 +1782,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) * Remove old flavor refs last. */ srv_secinfo_exp2exp(&exi->exi_export, oldsec, oldcnt); - srv_secinfo_treeclimb(ex, oldsec, oldcnt, FALSE); + srv_secinfo_treeclimb(ne, ex, oldsec, oldcnt, FALSE); } } @@ -1571,10 +1795,24 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) ex->exi_visible = NULL; } + /* + * Initialize exi_id and exi_kstats + */ + if (ex != NULL) { + exi->exi_id = ex->exi_id; + } else { + mutex_enter(&nfs_exi_id_lock); + exi->exi_id = exi_id_get_next(); + mutex_exit(&nfs_exi_id_lock); + } + mutex_enter(&nfs_exi_id_lock); + avl_add(&exi_id_tree, exi); + mutex_exit(&nfs_exi_id_lock); + DTRACE_PROBE(nfss__i__exported_lock3_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); - if (exi_public == exi || kex->ex_flags & EX_LOG) { + if (ne->exi_public == exi || kex->ex_flags & EX_LOG) { /* * Log share operation to this buffer only. */ @@ -1588,9 +1826,9 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr) out7: /* Unlink the new export in exptable. */ - export_unlink(exi); + export_unlink(ne, exi); DTRACE_PROBE(nfss__i__exported_lock3_stop); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); out6: if (kex->ex_flags & EX_INDEX) kmem_free(kex->ex_index, strlen(kex->ex_index) + 1); @@ -1634,40 +1872,44 @@ out1: * Remove the exportinfo from the export list */ void -export_unlink(struct exportinfo *exi) +export_unlink(nfs_export_t *ne, struct exportinfo *exi) { - ASSERT(RW_WRITE_HELD(&exported_lock)); + ASSERT(RW_WRITE_HELD(&ne->exported_lock)); exp_hash_unlink(exi, fid_hash); exp_hash_unlink(exi, path_hash); + ASSERT3P(exi->exi_ne, ==, ne); + exi->exi_ne = NULL; } /* * Unexport an exported filesystem */ static int -unexport(struct exportinfo *exi) +unexport(nfs_export_t *ne, struct exportinfo *exi) { struct secinfo cursec[MAX_FLAVORS]; int curcnt; - rw_enter(&exported_lock, RW_WRITER); + rw_enter(&ne->exported_lock, RW_WRITER); /* Check if exi is still linked in the export table */ if (!EXP_LINKED(exi) || PSEUDO(exi)) { - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (EINVAL); } - export_unlink(exi); + mutex_enter(&nfs_exi_id_lock); + avl_remove(&exi_id_tree, exi); + mutex_exit(&nfs_exi_id_lock); + export_unlink(ne, exi); /* * Remove security flavors before treeclimb_unexport() is called * because srv_secinfo_treeclimb needs the namespace tree */ curcnt = build_seclist_nodups(&exi->exi_export, cursec, TRUE); - - srv_secinfo_treeclimb(exi, cursec, curcnt, FALSE); + srv_secinfo_treeclimb(ne, exi, cursec, curcnt, FALSE); /* * If there's a visible list, then need to leave @@ -1677,7 +1919,7 @@ unexport(struct exportinfo *exi) if (exi->exi_visible != NULL) { struct exportinfo *newexi; - newexi = pseudo_exportfs(exi->exi_vp, &exi->exi_fid, + newexi = pseudo_exportfs(ne, exi->exi_vp, &exi->exi_fid, exi->exi_visible, &exi->exi_export); exi->exi_visible = NULL; @@ -1686,12 +1928,12 @@ unexport(struct exportinfo *exi) newexi->exi_tree->tree_exi = newexi; /* Update the change timestamp */ - tree_update_change(exi->exi_tree, NULL); + tree_update_change(ne, exi->exi_tree, NULL); } else { - treeclimb_unexport(exi); + treeclimb_unexport(ne, exi); } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); /* * Need to call into the NFSv4 server and release all data @@ -1699,7 +1941,7 @@ unexport(struct exportinfo *exi) * the v4 server may be holding file locks or vnodes under * this export. */ - rfs4_clean_state_exi(exi); + rfs4_clean_state_exi(ne, exi); /* * Notify the lock manager that the filesystem is being @@ -1711,15 +1953,19 @@ unexport(struct exportinfo *exi) * If this was a public export, restore * the public filehandle to the root. */ - if (exi == exi_public) { - exi_public = exi_root; - nfslog_share_record(exi_public, CRED()); + /* + * XXX KEBE ASKS --> Should CRED() instead be + * exi->exi_zone->zone_kcred? + */ + if (exi == ne->exi_public) { + ne->exi_public = ne->exi_root; + + nfslog_share_record(ne->exi_public, CRED()); } - if (exi->exi_export.ex_flags & EX_LOG) { + if (exi->exi_export.ex_flags & EX_LOG) nfslog_unshare_record(exi, CRED()); - } exi_rele(exi); return (0); @@ -1946,7 +2192,8 @@ nfs_vptoexi(vnode_t *dvp, vnode_t *vp, cred_t *cr, int *walk, * If we're at the root of this filesystem, then * it's time to stop (with failure). */ - if (vp->v_flag & VROOT) { + ASSERT3P(vp->v_vfsp->vfs_zone, ==, curzone); + if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) { error = EINVAL; break; } @@ -2446,9 +2693,10 @@ struct exportinfo * checkexport(fsid_t *fsid, fid_t *fid) { struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); - rw_enter(&exported_lock, RW_READER); - for (exi = exptable[exptablehash(fsid, fid)]; + rw_enter(&ne->exported_lock, RW_READER); + for (exi = ne->exptable[exptablehash(fsid, fid)]; exi != NULL; exi = exi->fid_hash.next) { if (exportmatch(exi, fsid, fid)) { @@ -2459,15 +2707,15 @@ checkexport(fsid_t *fsid, fid_t *fid) * handle. */ if (exi->exi_export.ex_flags & EX_PUBLIC) { - exi = exi_public; + exi = ne->exi_public; } exi_hold(exi); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (exi); } } - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (NULL); } @@ -2483,10 +2731,11 @@ struct exportinfo * checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp) { struct exportinfo *exi; + nfs_export_t *ne = nfs_get_export(); - ASSERT(RW_LOCK_HELD(&exported_lock)); + ASSERT(RW_LOCK_HELD(&ne->exported_lock)); - for (exi = exptable[exptablehash(fsid, fid)]; + for (exi = ne->exptable[exptablehash(fsid, fid)]; exi != NULL; exi = exi->fid_hash.next) { if (exportmatch(exi, fsid, fid)) { @@ -2497,7 +2746,7 @@ checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp) * handle. */ if (exi->exi_export.ex_flags & EX_PUBLIC) { - exi = exi_public; + exi = ne->exi_public; } /* diff --git a/usr/src/uts/common/fs/nfs/nfs_log.c b/usr/src/uts/common/fs/nfs/nfs_log.c index 7cf0fe24e9..a314f4319a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_log.c +++ b/usr/src/uts/common/fs/nfs/nfs_log.c @@ -18,10 +18,15 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/cred.h> #include <sys/cmn_err.h> #include <sys/debug.h> @@ -43,8 +48,6 @@ #define NUM_RECORDS_TO_WRITE 256 #define NUM_BYTES_TO_WRITE 65536 -extern krwlock_t exported_lock; - static int nfslog_num_records_to_write = NUM_RECORDS_TO_WRITE; static int nfslog_num_bytes_to_write = NUM_BYTES_TO_WRITE; @@ -595,11 +598,8 @@ log_file_rele(struct log_file *lfp) */ /* ARGSUSED */ void * -nfslog_record_alloc( - struct exportinfo *exi, - int alloc_indx, - void **cookie, - int flags) +nfslog_record_alloc(struct exportinfo *exi, int alloc_indx, void **cookie, + int flags) { struct lr_alloc *lrp; @@ -652,7 +652,7 @@ nfslog_record_alloc( */ void nfslog_record_put(void *cookie, size_t size, bool_t sync, - unsigned int which_buffers) + unsigned int which_buffers) { struct lr_alloc *lrp = (struct lr_alloc *)cookie; struct log_buffer *lbp = lrp->lb; @@ -768,8 +768,8 @@ nfslog_records_flush_to_disk_nolock(struct log_buffer *lbp) * them to the end of the log file. */ static int -nfslog_write_logrecords(struct log_file *lfp, - struct lr_alloc *lrp_writers, int num_recs) +nfslog_write_logrecords(struct log_file *lfp, struct lr_alloc *lrp_writers, + int num_recs) { struct uio uio; struct iovec *iovp; @@ -1161,8 +1161,8 @@ nfsl_flush(struct nfsl_flush_args *args, model_t model) /* * Do the work asynchronously */ - (void) thread_create(NULL, 0, nfslog_do_flush, - tparams, 0, &p0, TS_RUN, minclsyspri); + (void) zthread_create(NULL, 0, nfslog_do_flush, + tparams, 0, minclsyspri); } return (error); @@ -1249,8 +1249,7 @@ out: */ kmem_free(args->buff, args->buff_len); kmem_free(tparams, sizeof (*tparams)); - thread_exit(); - /* NOTREACHED */ + zthread_exit(); } tparams->tp_error = error; @@ -1529,6 +1528,7 @@ static int nfslog_dispatch_table_arglen = sizeof (nfslog_dispatch_table) / */ struct exportinfo * nfslog_get_exi( + nfs_export_t *ne, struct exportinfo *exi, struct svc_req *req, caddr_t res, @@ -1560,7 +1560,7 @@ nfslog_get_exi( return (exi); } - if (exi != exi_public) + if (exi != ne->exi_public) return (NULL); /* @@ -1625,8 +1625,8 @@ static long long rfslog_records_ignored = 0; */ void nfslog_write_record(struct exportinfo *exi, struct svc_req *req, - caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb, - unsigned int record_id, unsigned int which_buffers) + caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb, + unsigned int record_id, unsigned int which_buffers) { struct nfslog_prog_disp *progtable; /* prog struct */ struct nfslog_vers_disp *verstable; /* version struct */ @@ -1764,17 +1764,17 @@ nfslog_write_record(struct exportinfo *exi, struct svc_req *req, static char * get_publicfh_path(int *alloc_length) { - extern struct exportinfo *exi_public; char *pubpath; + nfs_export_t *ne = nfs_get_export(); - rw_enter(&exported_lock, RW_READER); + rw_enter(&ne->exported_lock, RW_READER); - *alloc_length = exi_public->exi_export.ex_pathlen + 1; + *alloc_length = ne->exi_public->exi_export.ex_pathlen + 1; pubpath = kmem_alloc(*alloc_length, KM_SLEEP); - (void) strcpy(pubpath, exi_public->exi_export.ex_path); + (void) strcpy(pubpath, ne->exi_public->exi_export.ex_path); - rw_exit(&exported_lock); + rw_exit(&ne->exported_lock); return (pubpath); } @@ -1870,11 +1870,8 @@ nfslog_unshare_record(struct exportinfo *exi, cred_t *cr) void -nfslog_getfh(struct exportinfo *exi, - fhandle *fh, - char *fname, - enum uio_seg seg, - cred_t *cr) +nfslog_getfh(struct exportinfo *exi, fhandle *fh, char *fname, enum uio_seg seg, + cred_t *cr) { struct svc_req req; int res = 0; diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c index 476da6685a..5b7658d048 100644 --- a/usr/src/uts/common/fs/nfs/nfs_server.c +++ b/usr/src/uts/common/fs/nfs/nfs_server.c @@ -22,8 +22,8 @@ * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Bayard G. Bell. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017 Joyent Inc + * Copyright 2019 Nexenta by DDN, Inc. */ /* @@ -83,7 +83,6 @@ #include <nfs/nfs_clnt.h> #include <nfs/nfs_acl.h> #include <nfs/nfs_log.h> -#include <nfs/nfs_cmd.h> #include <nfs/lm.h> #include <nfs/nfs_dispatch.h> #include <nfs/nfs4_drc.h> @@ -109,6 +108,10 @@ static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; +zone_key_t nfssrv_zone_key; +list_t nfssrv_globals_list; +krwlock_t nfssrv_globals_rwl; + kmem_cache_t *nfs_xuio_cache; int nfs_loaned_buffers = 0; @@ -117,10 +120,7 @@ _init(void) { int status; - if ((status = nfs_srvinit()) != 0) { - cmn_err(CE_WARN, "_init: nfs_srvinit failed"); - return (status); - } + nfs_srvinit(); status = mod_install((struct modlinkage *)&modlinkage); if (status != 0) { @@ -177,27 +177,26 @@ _info(struct modinfo *modinfop) * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported * public (i.e., not the placeholder). */ -#define PUBLICFH_CHECK(disp, exi, fsid, xfid) \ +#define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \ ((disp->dis_flags & RPC_PUBLICFH_OK) && \ ((exi->exi_export.ex_flags & EX_PUBLIC) || \ - (exi == exi_public && exportmatch(exi_root, \ + (exi == ne->exi_public && exportmatch(ne->exi_root, \ fsid, xfid)))) static void nfs_srv_shutdown_all(int); -static void rfs4_server_start(int); +static void rfs4_server_start(nfs_globals_t *, int); static void nullfree(void); static void rfs_dispatch(struct svc_req *, SVCXPRT *); static void acl_dispatch(struct svc_req *, SVCXPRT *); -static void common_dispatch(struct svc_req *, SVCXPRT *, - rpcvers_t, rpcvers_t, char *, - struct rpc_disptable *); -static void hanfsv4_failover(void); static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int, bool_t, bool_t *); static char *client_name(struct svc_req *req); static char *client_addr(struct svc_req *req, char *buf); extern int sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *); extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *); +static void *nfs_server_zone_init(zoneid_t); +static void nfs_server_zone_fini(zoneid_t, void *); +static void nfs_server_zone_shutdown(zoneid_t, void *); #define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \ (nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \ @@ -248,24 +247,6 @@ static SVC_CALLOUT __nfs_sc_rdma[] = { static SVC_CALLOUT_TABLE nfs_sct_rdma = { sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma }; -rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT; -rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT; - -/* - * Used to track the state of the server so that initialization - * can be done properly. - */ -typedef enum { - NFS_SERVER_STOPPED, /* server state destroyed */ - NFS_SERVER_STOPPING, /* server state being destroyed */ - NFS_SERVER_RUNNING, - NFS_SERVER_QUIESCED, /* server state preserved */ - NFS_SERVER_OFFLINE /* server pool offline */ -} nfs_server_running_t; - -static nfs_server_running_t nfs_server_upordown; -static kmutex_t nfs_server_upordown_lock; -static kcondvar_t nfs_server_upordown_cv; /* * DSS: distributed stable storage @@ -277,10 +258,24 @@ int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *); bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *); /* - * RDMA wait variables. + * Stash NFS zone globals in TSD to avoid some lock contention + * from frequent zone_getspecific calls. */ -static kcondvar_t rdma_wait_cv; -static kmutex_t rdma_wait_mutex; +static uint_t nfs_server_tsd_key; + +nfs_globals_t * +nfs_srv_getzg(void) +{ + nfs_globals_t *ng; + + ng = tsd_get(nfs_server_tsd_key); + if (ng == NULL) { + ng = zone_getspecific(nfssrv_zone_key, curzone); + (void) tsd_set(nfs_server_tsd_key, ng); + } + + return (ng); +} /* * Will be called at the point the server pool is being unregistered @@ -291,11 +286,15 @@ static kmutex_t rdma_wait_mutex; void nfs_srv_offline(void) { - mutex_enter(&nfs_server_upordown_lock); - if (nfs_server_upordown == NFS_SERVER_RUNNING) { - nfs_server_upordown = NFS_SERVER_OFFLINE; + nfs_globals_t *ng; + + ng = nfs_srv_getzg(); + + mutex_enter(&ng->nfs_server_upordown_lock); + if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) { + ng->nfs_server_upordown = NFS_SERVER_OFFLINE; } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } /* @@ -324,15 +323,18 @@ nfs_srv_quiesce_all(void) } static void -nfs_srv_shutdown_all(int quiesce) { - mutex_enter(&nfs_server_upordown_lock); +nfs_srv_shutdown_all(int quiesce) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + + mutex_enter(&ng->nfs_server_upordown_lock); if (quiesce) { - if (nfs_server_upordown == NFS_SERVER_RUNNING || - nfs_server_upordown == NFS_SERVER_OFFLINE) { - nfs_server_upordown = NFS_SERVER_QUIESCED; - cv_signal(&nfs_server_upordown_cv); + if (ng->nfs_server_upordown == NFS_SERVER_RUNNING || + ng->nfs_server_upordown == NFS_SERVER_OFFLINE) { + ng->nfs_server_upordown = NFS_SERVER_QUIESCED; + cv_signal(&ng->nfs_server_upordown_cv); - /* reset DSS state, for subsequent warm restart */ + /* reset DSS state */ rfs4_dss_numnewpaths = 0; rfs4_dss_newpaths = NULL; @@ -340,22 +342,27 @@ nfs_srv_shutdown_all(int quiesce) { "NFSv4 state has been preserved"); } } else { - if (nfs_server_upordown == NFS_SERVER_OFFLINE) { - nfs_server_upordown = NFS_SERVER_STOPPING; - mutex_exit(&nfs_server_upordown_lock); - rfs4_state_fini(); - rfs4_fini_drc(nfs4_drc); - mutex_enter(&nfs_server_upordown_lock); - nfs_server_upordown = NFS_SERVER_STOPPED; - cv_signal(&nfs_server_upordown_cv); + if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) { + ng->nfs_server_upordown = NFS_SERVER_STOPPING; + mutex_exit(&ng->nfs_server_upordown_lock); + rfs4_state_zone_fini(); + rfs4_fini_drc(); + mutex_enter(&ng->nfs_server_upordown_lock); + ng->nfs_server_upordown = NFS_SERVER_STOPPED; + + /* reset DSS state */ + rfs4_dss_numnewpaths = 0; + rfs4_dss_newpaths = NULL; + + cv_signal(&ng->nfs_server_upordown_cv); } } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } static int nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp, - rpcvers_t versmin, rpcvers_t versmax) + rpcvers_t versmin, rpcvers_t versmax) { struct strioctl strioc; struct T_info_ack tinfo; @@ -418,6 +425,7 @@ nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp, int nfs_svc(struct nfs_svc_args *arg, model_t model) { + nfs_globals_t *ng; file_t *fp; SVCMASTERXPRT *xprt; int error; @@ -432,6 +440,7 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) model = model; /* STRUCT macros don't always refer to it */ #endif + ng = nfs_srv_getzg(); STRUCT_SET_HANDLE(uap, model, arg); /* Check privileges in nfssys() */ @@ -439,6 +448,10 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL) return (EBADF); + /* Setup global file handle in nfs_export */ + if ((error = nfs_export_get_rootfh(ng)) != 0) + return (error); + /* * Set read buffer size to rsize * and add room for RPC headers. @@ -465,27 +478,27 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) return (error); } - nfs_versmin = STRUCT_FGET(uap, versmin); - nfs_versmax = STRUCT_FGET(uap, versmax); + ng->nfs_versmin = STRUCT_FGET(uap, versmin); + ng->nfs_versmax = STRUCT_FGET(uap, versmax); /* Double check the vers min/max ranges */ - if ((nfs_versmin > nfs_versmax) || - (nfs_versmin < NFS_VERSMIN) || - (nfs_versmax > NFS_VERSMAX)) { - nfs_versmin = NFS_VERSMIN_DEFAULT; - nfs_versmax = NFS_VERSMAX_DEFAULT; + if ((ng->nfs_versmin > ng->nfs_versmax) || + (ng->nfs_versmin < NFS_VERSMIN) || + (ng->nfs_versmax > NFS_VERSMAX)) { + ng->nfs_versmin = NFS_VERSMIN_DEFAULT; + ng->nfs_versmax = NFS_VERSMAX_DEFAULT; } - if (error = - nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) { + if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin, + ng->nfs_versmax)) { releasef(STRUCT_FGET(uap, fd)); kmem_free(addrmask.buf, addrmask.maxlen); return (error); } /* Initialize nfsv4 server */ - if (nfs_versmax == (rpcvers_t)NFS_V4) - rfs4_server_start(STRUCT_FGET(uap, delegation)); + if (ng->nfs_versmax == (rpcvers_t)NFS_V4) + rfs4_server_start(ng, STRUCT_FGET(uap, delegation)); /* Create a transport handle. */ error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt, @@ -504,59 +517,36 @@ nfs_svc(struct nfs_svc_args *arg, model_t model) } static void -rfs4_server_start(int nfs4_srv_delegation) +rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation) { /* * Determine if the server has previously been "started" and * if not, do the per instance initialization */ - mutex_enter(&nfs_server_upordown_lock); + mutex_enter(&ng->nfs_server_upordown_lock); - if (nfs_server_upordown != NFS_SERVER_RUNNING) { + if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) { /* Do we need to stop and wait on the previous server? */ - while (nfs_server_upordown == NFS_SERVER_STOPPING || - nfs_server_upordown == NFS_SERVER_OFFLINE) - cv_wait(&nfs_server_upordown_cv, - &nfs_server_upordown_lock); + while (ng->nfs_server_upordown == NFS_SERVER_STOPPING || + ng->nfs_server_upordown == NFS_SERVER_OFFLINE) + cv_wait(&ng->nfs_server_upordown_cv, + &ng->nfs_server_upordown_lock); - if (nfs_server_upordown != NFS_SERVER_RUNNING) { + if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) { (void) svc_pool_control(NFS_SVCPOOL_ID, SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline); (void) svc_pool_control(NFS_SVCPOOL_ID, SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all); - /* is this an nfsd warm start? */ - if (nfs_server_upordown == NFS_SERVER_QUIESCED) { - cmn_err(CE_NOTE, "nfs_server: " - "server was previously quiesced; " - "existing NFSv4 state will be re-used"); - - /* - * HA-NFSv4: this is also the signal - * that a Resource Group failover has - * occurred. - */ - if (cluster_bootflags & CLUSTER_BOOTED) - hanfsv4_failover(); - } else { - /* cold start */ - rfs4_state_init(); - nfs4_drc = rfs4_init_drc(nfs4_drc_max, - nfs4_drc_hash); - } - - /* - * Check to see if delegation is to be - * enabled at the server - */ - if (nfs4_srv_delegation != FALSE) - rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE); + rfs4_do_server_start(ng->nfs_server_upordown, + nfs4_srv_delegation, + cluster_bootflags & CLUSTER_BOOTED); - nfs_server_upordown = NFS_SERVER_RUNNING; + ng->nfs_server_upordown = NFS_SERVER_RUNNING; } - cv_signal(&nfs_server_upordown_cv); + cv_signal(&ng->nfs_server_upordown_cv); } - mutex_exit(&nfs_server_upordown_lock); + mutex_exit(&ng->nfs_server_upordown_lock); } /* @@ -566,6 +556,7 @@ rfs4_server_start(int nfs4_srv_delegation) int rdma_start(struct rdma_svc_args *rsa) { + nfs_globals_t *ng; int error; rdma_xprt_group_t started_rdma_xprts; rdma_stat stat; @@ -578,8 +569,10 @@ rdma_start(struct rdma_svc_args *rsa) rsa->nfs_versmin = NFS_VERSMIN_DEFAULT; rsa->nfs_versmax = NFS_VERSMAX_DEFAULT; } - nfs_versmin = rsa->nfs_versmin; - nfs_versmax = rsa->nfs_versmax; + + ng = nfs_srv_getzg(); + ng->nfs_versmin = rsa->nfs_versmin; + ng->nfs_versmax = rsa->nfs_versmax; /* Set the versions in the callout table */ __nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin; @@ -593,7 +586,7 @@ rdma_start(struct rdma_svc_args *rsa) /* Initialize nfsv4 server */ if (rsa->nfs_versmax == (rpcvers_t)NFS_V4) - rfs4_server_start(rsa->delegation); + rfs4_server_start(ng, rsa->delegation); started_rdma_xprts.rtg_count = 0; started_rdma_xprts.rtg_listhead = NULL; @@ -610,7 +603,7 @@ restart: /* * wait till either interrupted by a signal on * nfs service stop/restart or signalled by a - * rdma plugin attach/detatch. + * rdma attach/detatch. */ stat = rdma_kwait(); @@ -661,10 +654,10 @@ void rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { - DTRACE_NFSV3_3(op__null__start, struct svc_req *, req, - cred_t *, cr, vnode_t *, NULL); - DTRACE_NFSV3_3(op__null__done, struct svc_req *, req, - cred_t *, cr, vnode_t *, NULL); + DTRACE_NFSV3_4(op__null__start, struct svc_req *, req, + cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi); + DTRACE_NFSV3_4(op__null__done, struct svc_req *, req, + cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi); } /* ARGSUSED */ @@ -1342,13 +1335,13 @@ union rfs_res { static struct rpc_disptable rfs_disptable[] = { {sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]), rfscallnames_v2, - &rfsproccnt_v2_ptr, rfsdisptab_v2}, + rfsdisptab_v2}, {sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]), rfscallnames_v3, - &rfsproccnt_v3_ptr, rfsdisptab_v3}, + rfsdisptab_v3}, {sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]), rfscallnames_v4, - &rfsproccnt_v4_ptr, rfsdisptab_v4}, + rfsdisptab_v4}, }; /* @@ -1367,7 +1360,6 @@ static int cred_hits = 0; static int cred_misses = 0; #endif - #ifdef DEBUG /* * Debug code to allow disabling of rfs_dispatch() use of @@ -1471,11 +1463,9 @@ auth_tooweak(struct svc_req *req, char *res) return (FALSE); } - static void common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, - rpcvers_t max_vers, char *pgmname, - struct rpc_disptable *disptable) + rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable) { int which; rpcvers_t vers; @@ -1508,9 +1498,18 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, char **procnames; char cbuf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */ bool_t ro = FALSE; + nfs_globals_t *ng = nfs_srv_getzg(); + nfs_export_t *ne = ng->nfs_export; + kstat_named_t *svstat, *procstat; + + ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM); vers = req->rq_vers; + svstat = ng->svstat[req->rq_vers]; + procstat = (req->rq_prog == NFS_PROGRAM) ? + ng->rfsproccnt[vers] : ng->aclproccnt[vers]; + if (vers < min_vers || vers > max_vers) { svcerr_progvers(req->rq_xprt, min_vers, max_vers); error++; @@ -1526,7 +1525,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, goto done; } - (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++; + procstat[which].value.ui64++; disp = &disptable[(int)vers].dis_table[which]; procnames = disptable[(int)vers].dis_procnames; @@ -1632,13 +1631,15 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, cr = xprt->xp_cred; ASSERT(cr != NULL); #ifdef DEBUG - if (crgetref(cr) != 1) { - crfree(cr); - cr = crget(); - xprt->xp_cred = cr; - cred_misses++; - } else - cred_hits++; + { + if (crgetref(cr) != 1) { + crfree(cr); + cr = crget(); + xprt->xp_cred = cr; + cred_misses++; + } else + cred_hits++; + } #else if (crgetref(cr) != 1) { crfree(cr); @@ -1650,7 +1651,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, exi = checkexport(fsid, xfid); if (exi != NULL) { - publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid); + publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid); /* * Don't allow non-V4 clients access @@ -1763,7 +1764,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, * file system. */ if (nfslog_buffer_list != NULL) { - nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id); + nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id); /* * Is logging enabled? */ @@ -1846,9 +1847,8 @@ done: if (exi != NULL) exi_rele(exi); - global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error; - - global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++; + svstat[NFS_BADCALLS].value.ui64 += error; + svstat[NFS_CALLS].value.ui64++; } static void @@ -1971,10 +1971,10 @@ static struct rpcdisp acldisptab_v3[] = { static struct rpc_disptable acl_disptable[] = { {sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]), aclcallnames_v2, - &aclproccnt_v2_ptr, acldisptab_v2}, + acldisptab_v2}, {sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]), aclcallnames_v3, - &aclproccnt_v3_ptr, acldisptab_v3}, + acldisptab_v3}, }; static void @@ -2568,31 +2568,29 @@ client_addr(struct svc_req *req, char *buf) * - Initialize all locks * - initialize the version 3 write verifier */ -int +void nfs_srvinit(void) { - int error; - error = nfs_exportinit(); - if (error != 0) - return (error); - error = rfs4_srvrinit(); - if (error != 0) { - nfs_exportfini(); - return (error); - } + /* Truly global stuff in this module (not per zone) */ + rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL); + list_create(&nfssrv_globals_list, sizeof (nfs_globals_t), + offsetof(nfs_globals_t, nfs_g_link)); + tsd_create(&nfs_server_tsd_key, NULL); + + /* The order here is important */ + nfs_exportinit(); rfs_srvrinit(); rfs3_srvrinit(); + rfs4_srvrinit(); nfsauth_init(); - /* Init the stuff to control start/stop */ - nfs_server_upordown = NFS_SERVER_STOPPED; - mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL); - cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL); - - return (0); + /* + * NFS server zone-specific global variables + * Note the zone_init is called for the GZ here. + */ + zone_key_create(&nfssrv_zone_key, nfs_server_zone_init, + nfs_server_zone_shutdown, nfs_server_zone_fini); } /* @@ -2603,21 +2601,126 @@ nfs_srvinit(void) void nfs_srvfini(void) { + + /* + * NFS server zone-specific global variables + * Note the zone_fini is called for the GZ here. + */ + (void) zone_key_delete(nfssrv_zone_key); + + /* The order here is important (reverse of init) */ nfsauth_fini(); + rfs4_srvrfini(); rfs3_srvrfini(); rfs_srvrfini(); nfs_exportfini(); - mutex_destroy(&nfs_server_upordown_lock); - cv_destroy(&nfs_server_upordown_cv); - mutex_destroy(&rdma_wait_mutex); - cv_destroy(&rdma_wait_cv); + /* Truly global stuff in this module (not per zone) */ + tsd_destroy(&nfs_server_tsd_key); + list_destroy(&nfssrv_globals_list); + rw_destroy(&nfssrv_globals_rwl); } /* - * Set up an iovec array of up to cnt pointers. + * Zone init, shutdown, fini functions for the NFS server + * + * This design is careful to create the entire hierarhcy of + * NFS server "globals" (including those created by various + * per-module *_zone_init functions, etc.) so that all these + * objects have exactly the same lifetime. + * + * These objects are also kept on a list for two reasons: + * 1: It makes finding these in mdb _much_ easier. + * 2: It allows operating across all zone globals for + * functions like nfs_auth.c:exi_cache_reclaim */ +static void * +nfs_server_zone_init(zoneid_t zoneid) +{ + nfs_globals_t *ng; + + ng = kmem_zalloc(sizeof (*ng), KM_SLEEP); + + ng->nfs_versmin = NFS_VERSMIN_DEFAULT; + ng->nfs_versmax = NFS_VERSMAX_DEFAULT; + + /* Init the stuff to control start/stop */ + ng->nfs_server_upordown = NFS_SERVER_STOPPED; + mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL); + + ng->nfs_zoneid = zoneid; + + /* + * Order here is important. + * export init must precede srv init calls. + */ + nfs_export_zone_init(ng); + rfs_stat_zone_init(ng); + rfs_srv_zone_init(ng); + rfs3_srv_zone_init(ng); + rfs4_srv_zone_init(ng); + nfsauth_zone_init(ng); + + rw_enter(&nfssrv_globals_rwl, RW_WRITER); + list_insert_tail(&nfssrv_globals_list, ng); + rw_exit(&nfssrv_globals_rwl); + + return (ng); +} +/* ARGSUSED */ +static void +nfs_server_zone_shutdown(zoneid_t zoneid, void *data) +{ + nfs_globals_t *ng; + + ng = (nfs_globals_t *)data; + + /* + * Order is like _fini, but only + * some modules need this hook. + */ + nfsauth_zone_shutdown(ng); + nfs_export_zone_shutdown(ng); +} + +/* ARGSUSED */ +static void +nfs_server_zone_fini(zoneid_t zoneid, void *data) +{ + nfs_globals_t *ng; + + ng = (nfs_globals_t *)data; + + rw_enter(&nfssrv_globals_rwl, RW_WRITER); + list_remove(&nfssrv_globals_list, ng); + rw_exit(&nfssrv_globals_rwl); + + /* + * Order here is important. + * reverse order from init + */ + nfsauth_zone_fini(ng); + rfs4_srv_zone_fini(ng); + rfs3_srv_zone_fini(ng); + rfs_srv_zone_fini(ng); + rfs_stat_zone_fini(ng); + nfs_export_zone_fini(ng); + + mutex_destroy(&ng->nfs_server_upordown_lock); + cv_destroy(&ng->nfs_server_upordown_cv); + mutex_destroy(&ng->rdma_wait_mutex); + cv_destroy(&ng->rdma_wait_cv); + + kmem_free(ng, sizeof (*ng)); +} + +/* + * Set up an iovec array of up to cnt pointers. + */ void mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp) { @@ -2855,11 +2958,13 @@ rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp, /* Release the reference on the old exi value */ ASSERT(*exi != NULL); exi_rele(*exi); + *exi = NULL; if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) { VN_RELE(*vpp); goto publicfh_done; } + /* Have a new *exi */ } } @@ -2886,6 +2991,8 @@ rfs_pathname( struct pathname pn; int error; + ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id); + /* * If pathname starts with '/', then set startdvp to root. */ @@ -2893,7 +3000,7 @@ rfs_pathname( while (*path == '/') path++; - startdvp = rootdir; + startdvp = ZONE_ROOTVP(); } error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf)); @@ -2916,7 +3023,7 @@ rfs_pathname( } VN_HOLD(startdvp); error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp, - rootdir, startdvp, cr); + ZONE_ROOTVP(), startdvp, cr); } if (error == ENAMETOOLONG) { /* @@ -2933,7 +3040,7 @@ rfs_pathname( } VN_HOLD(startdvp); error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp, - rootdir, startdvp, cr); + ZONE_ROOTVP(), startdvp, cr); pn_free(&pn); } @@ -3038,168 +3145,6 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr, } /* - * Do the main work of handling HA-NFSv4 Resource Group failover on - * Sun Cluster. - * We need to detect whether any RG admin paths have been added or removed, - * and adjust resources accordingly. - * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In - * order to scale, the list and array of paths need to be held in more - * suitable data structures. - */ -static void -hanfsv4_failover(void) -{ - int i, start_grace, numadded_paths = 0; - char **added_paths = NULL; - rfs4_dss_path_t *dss_path; - - /* - * Note: currently, rfs4_dss_pathlist cannot be NULL, since - * it will always include an entry for NFS4_DSS_VAR_DIR. If we - * make the latter dynamically specified too, the following will - * need to be adjusted. - */ - - /* - * First, look for removed paths: RGs that have been failed-over - * away from this node. - * Walk the "currently-serving" rfs4_dss_pathlist and, for each - * path, check if it is on the "passed-in" rfs4_dss_newpaths array - * from nfsd. If not, that RG path has been removed. - * - * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed - * any duplicates. - */ - dss_path = rfs4_dss_pathlist; - do { - int found = 0; - char *path = dss_path->path; - - /* used only for non-HA so may not be removed */ - if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { - dss_path = dss_path->next; - continue; - } - - for (i = 0; i < rfs4_dss_numnewpaths; i++) { - int cmpret; - char *newpath = rfs4_dss_newpaths[i]; - - /* - * Since nfsd has sorted rfs4_dss_newpaths for us, - * once the return from strcmp is negative we know - * we've passed the point where "path" should be, - * and can stop searching: "path" has been removed. - */ - cmpret = strcmp(path, newpath); - if (cmpret < 0) - break; - if (cmpret == 0) { - found = 1; - break; - } - } - - if (found == 0) { - unsigned index = dss_path->index; - rfs4_servinst_t *sip = dss_path->sip; - rfs4_dss_path_t *path_next = dss_path->next; - - /* - * This path has been removed. - * We must clear out the servinst reference to - * it, since it's now owned by another - * node: we should not attempt to touch it. - */ - ASSERT(dss_path == sip->dss_paths[index]); - sip->dss_paths[index] = NULL; - - /* remove from "currently-serving" list, and destroy */ - remque(dss_path); - /* allow for NUL */ - kmem_free(dss_path->path, strlen(dss_path->path) + 1); - kmem_free(dss_path, sizeof (rfs4_dss_path_t)); - - dss_path = path_next; - } else { - /* path was found; not removed */ - dss_path = dss_path->next; - } - } while (dss_path != rfs4_dss_pathlist); - - /* - * Now, look for added paths: RGs that have been failed-over - * to this node. - * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and, - * for each path, check if it is on the "currently-serving" - * rfs4_dss_pathlist. If not, that RG path has been added. - * - * Note: we don't do duplicate detection here; nfsd does that for us. - * - * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us - * an upper bound for the size needed for added_paths[numadded_paths]. - */ - - /* probably more space than we need, but guaranteed to be enough */ - if (rfs4_dss_numnewpaths > 0) { - size_t sz = rfs4_dss_numnewpaths * sizeof (char *); - added_paths = kmem_zalloc(sz, KM_SLEEP); - } - - /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */ - for (i = 0; i < rfs4_dss_numnewpaths; i++) { - int found = 0; - char *newpath = rfs4_dss_newpaths[i]; - - dss_path = rfs4_dss_pathlist; - do { - char *path = dss_path->path; - - /* used only for non-HA */ - if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) { - dss_path = dss_path->next; - continue; - } - - if (strncmp(path, newpath, strlen(path)) == 0) { - found = 1; - break; - } - - dss_path = dss_path->next; - } while (dss_path != rfs4_dss_pathlist); - - if (found == 0) { - added_paths[numadded_paths] = newpath; - numadded_paths++; - } - } - - /* did we find any added paths? */ - if (numadded_paths > 0) { - /* create a new server instance, and start its grace period */ - start_grace = 1; - rfs4_servinst_create(start_grace, numadded_paths, added_paths); - - /* read in the stable storage state from these paths */ - rfs4_dss_readstate(numadded_paths, added_paths); - - /* - * Multiple failovers during a grace period will cause - * clients of the same resource group to be partitioned - * into different server instances, with different - * grace periods. Since clients of the same resource - * group must be subject to the same grace period, - * we need to reset all currently active grace periods. - */ - rfs4_grace_reset_all(); - } - - if (rfs4_dss_numnewpaths > 0) - kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *)); -} - -/* * Used by NFSv3 and NFSv4 server to query label of * a pathname component during lookup/access ops. */ diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c index 2535ab3219..b320f599df 100644 --- a/usr/src/uts/common/fs/nfs/nfs_srv.c +++ b/usr/src/uts/common/fs/nfs/nfs_srv.c @@ -30,6 +30,11 @@ * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + #include <sys/param.h> #include <sys/types.h> #include <sys/systm.h> @@ -70,6 +75,21 @@ #include <sys/strsubr.h> +struct rfs_async_write_list; + +/* + * Zone globals of NFSv2 server + */ +typedef struct nfs_srv { + kmutex_t async_write_lock; + struct rfs_async_write_list *async_write_head; + + /* + * enables write clustering if == 1 + */ + int write_async; +} nfs_srv_t; + /* * These are the interface routines for the server side of the * Network File System. See the NFS version 2 protocol specification @@ -80,6 +100,7 @@ static int sattr_to_vattr(struct nfssattr *, struct vattr *); static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, cred_t *); + /* * Some "over the wire" UNIX file types. These are encoded * into the mode. This needs to be fixed in the next rev. @@ -91,6 +112,15 @@ static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *, u_longlong_t nfs2_srv_caller_id; +static nfs_srv_t * +nfs_get_srv(void) +{ + nfs_globals_t *ng = nfs_srv_getzg(); + nfs_srv_t *srv = ng->nfs_srv; + ASSERT(srv != NULL); + return (srv); +} + /* * Get file attributes. * Returns the current attributes of the file with the given fhandle. @@ -386,17 +416,20 @@ rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr) { struct exportinfo *exi; vnode_t *dvp = *dvpp; + vnode_t *zone_rootvp; - ASSERT(dvp->v_flag & VROOT); + zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp; + ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp)); VN_HOLD(dvp); - dvp = untraverse(dvp); + dvp = untraverse(dvp, zone_rootvp); exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE); if (exi == NULL) { VN_RELE(dvp); return (-1); } + ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid); exi_rele(*exip); *exip = exi; VN_RELE(*dvpp); @@ -446,7 +479,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, * location of the public filehandle. */ if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) { - dvp = rootdir; + dvp = ZONE_ROOTVP(); VN_HOLD(dvp); } else { dvp = nfs_fhtovp(fhp, exi); @@ -457,6 +490,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, } exi_hold(exi); + ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id); /* * Not allow lookup beyond root. @@ -466,7 +500,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, if (strcmp(da->da_name, "..") == 0 && EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) { if ((exi->exi_export.ex_flags & EX_NOHIDE) && - (dvp->v_flag & VROOT)) { + ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) { /* * special case for ".." and 'nohide'exported root */ @@ -502,6 +536,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr, publicfh_flag = TRUE; exi_rele(exi); + exi = NULL; error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi, &sec); @@ -635,10 +670,12 @@ rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi, if (is_referral) { char *s; size_t strsz; + kstat_named_t *stat = + exi->exi_ne->ne_globals->svstat[NFS_VERSION]; /* Get an artificial symlink based on a referral */ s = build_symlink(vp, cr, &strsz); - global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++; + stat[NFS_REFERLINKS].value.ui64++; DTRACE_PROBE2(nfs2serv__func__referral__reflink, vnode_t *, vp, char *, s); if (s == NULL) @@ -775,6 +812,8 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr, /* check if a monitor detected a delegation conflict */ if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { + if (in_crit) + nbl_end_crit(vp); VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; @@ -1100,10 +1139,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, /* check if a monitor detected a delegation conflict */ if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) { - VN_RELE(vp); - /* mark as wouldblock so response is dropped */ - curthread->t_flag |= T_WOULDBLOCK; - return; + goto out; } if (wa->wa_data || wa->wa_rlist) { @@ -1143,6 +1179,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns, error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct); curthread->t_cred = savecred; } else { + iovcnt = 0; for (m = wa->wa_mblk; m != NULL; m = m->b_cont) iovcnt++; @@ -1286,8 +1323,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, cred_t *savecred; int in_crit = 0; caller_context_t ct; + nfs_srv_t *nsrv; - if (!rfs_write_async) { + ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id); + nsrv = nfs_get_srv(); + if (!nsrv->write_async) { rfs_write_sync(wa, ns, exi, req, cr, ro); return; } @@ -1312,8 +1352,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, * Look to see if there is already a cluster started * for this file. */ - mutex_enter(&rfs_async_write_lock); - for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) { + mutex_enter(&nsrv->async_write_lock); + for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) { if (bcmp(&wa->wa_fhandle, lp->fhp, sizeof (fhandle_t)) == 0) break; @@ -1339,8 +1379,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, else trp->list = nrp; while (nrp->ns->ns_status == RFSWRITE_INITVAL) - cv_wait(&lp->cv, &rfs_async_write_lock); - mutex_exit(&rfs_async_write_lock); + cv_wait(&lp->cv, &nsrv->async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1357,15 +1397,15 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, nlp->list = nrp; nlp->next = NULL; - if (rfs_async_write_head == NULL) { - rfs_async_write_head = nlp; + if (nsrv->async_write_head == NULL) { + nsrv->async_write_head = nlp; } else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != NULL) lp = lp->next; lp->next = nlp; } - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); /* * Convert the file handle common to all of the requests @@ -1373,11 +1413,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ vp = nfs_fhtovp(&wa->wa_fhandle, exi); if (vp == NULL) { - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1388,7 +1428,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1399,11 +1439,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, */ if (vp->v_type != VREG) { VN_RELE(vp); - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1414,7 +1454,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, rp->thread->t_flag |= t_flag; } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1446,11 +1486,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, VN_RELE(vp); /* mark as wouldblock so response is dropped */ curthread->t_flag |= T_WOULDBLOCK; - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; @@ -1462,7 +1502,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, } } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); return; } @@ -1484,16 +1524,16 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, * will allow more requests to be clustered in this * second cluster. */ - mutex_enter(&rfs_async_write_lock); - if (rfs_async_write_head == nlp) - rfs_async_write_head = nlp->next; + mutex_enter(&nsrv->async_write_lock); + if (nsrv->async_write_head == nlp) + nsrv->async_write_head = nlp->next; else { - lp = rfs_async_write_head; + lp = nsrv->async_write_head; while (lp->next != nlp) lp = lp->next; lp->next = nlp->next; } - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); /* * Step through the list of requests in this cluster. @@ -1738,7 +1778,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, VN_RELE(vp); t_flag = curthread->t_flag & T_WOULDBLOCK; - mutex_enter(&rfs_async_write_lock); + mutex_enter(&nsrv->async_write_lock); for (rp = nlp->list; rp != NULL; rp = rp->list) { if (rp->ns->ns_status == RFSWRITE_INITVAL) { rp->ns->ns_status = puterrno(error); @@ -1746,7 +1786,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns, } } cv_broadcast(&nlp->cv); - mutex_exit(&rfs_async_write_lock); + mutex_exit(&nsrv->async_write_lock); } @@ -2211,7 +2251,7 @@ rfs_rename(struct nfsrnmargs *args, enum nfsstat *status, /* Check for delegation on the file being renamed over, if it exists */ - if (rfs4_deleg_policy != SRV_NEVER_DELEGATE && + if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE && VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr, NULL, NULL, NULL) == 0) { @@ -2578,7 +2618,7 @@ rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status, * supplying a vnode known to exist and illegal to * remove. */ - error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0); + error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0); /* * Force modified data and metadata out to stable storage. @@ -2853,7 +2893,7 @@ sattr_to_vattr(struct nfssattr *sa, struct vattr *vap) return (0); } -static enum nfsftype vt_to_nf[] = { +static const enum nfsftype vt_to_nf[] = { 0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0 }; @@ -3072,14 +3112,38 @@ acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr) void rfs_srvrinit(void) { - mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL); nfs2_srv_caller_id = fs_new_caller_id(); } void rfs_srvrfini(void) { - mutex_destroy(&rfs_async_write_lock); +} + +/* ARGSUSED */ +void +rfs_srv_zone_init(nfs_globals_t *ng) +{ + nfs_srv_t *ns; + + ns = kmem_zalloc(sizeof (*ns), KM_SLEEP); + + mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL); + ns->write_async = 1; + + ng->nfs_srv = ns; +} + +/* ARGSUSED */ +void +rfs_srv_zone_fini(nfs_globals_t *ng) +{ + nfs_srv_t *ns = ng->nfs_srv; + + ng->nfs_srv = NULL; + + mutex_destroy(&ns->async_write_lock); + kmem_free(ns, sizeof (*ns)); } static int diff --git a/usr/src/uts/common/fs/nfs/nfs_stats.c b/usr/src/uts/common/fs/nfs/nfs_stats.c index baaf47a82a..97f820d756 100644 --- a/usr/src/uts/common/fs/nfs/nfs_stats.c +++ b/usr/src/uts/common/fs/nfs/nfs_stats.c @@ -45,8 +45,8 @@ zone_key_t nfsstat_zone_key; */ static kstat_named_t * nfsstat_zone_init_common(zoneid_t zoneid, const char *module, int vers, - const char *name, const kstat_named_t *template, - size_t template_size) + const char *name, const kstat_named_t *template, + size_t template_size) { kstat_t *ksp; kstat_named_t *ks_data; @@ -68,7 +68,7 @@ nfsstat_zone_init_common(zoneid_t zoneid, const char *module, int vers, */ static void nfsstat_zone_fini_common(zoneid_t zoneid, const char *module, int vers, - const char *name) + const char *name) { kstat_delete_byname_zone(module, vers, name, zoneid); } @@ -89,38 +89,22 @@ static const kstat_named_t svstat_tmpl[] = { { "referlinks", KSTAT_DATA_UINT64 }, }; -/* Points to the global zone server kstat data for all nfs versions */ -kstat_named_t *global_svstat_ptr[NFS_VERSMAX + 1]; - static void nfsstat_zone_init_server(zoneid_t zoneid, kstat_named_t *svstatp[]) { int vers; - /* - * first two indexes of these arrays are not used, so initialize - * to NULL - */ - svstatp[0] = NULL; - svstatp[1] = NULL; - global_svstat_ptr[0] = NULL; - global_svstat_ptr[0] = NULL; - for (vers = NFS_VERSION; vers <= NFS_V4; vers++) { svstatp[vers] = nfsstat_zone_init_common(zoneid, "nfs", vers, "nfs_server", svstat_tmpl, sizeof (svstat_tmpl)); - if (zoneid == GLOBAL_ZONEID) - global_svstat_ptr[vers] = svstatp[vers]; } } static void -nfsstat_zone_fini_server(zoneid_t zoneid, kstat_named_t **svstatp) +nfsstat_zone_fini_server(zoneid_t zoneid, kstat_named_t *svstatp[]) { int vers; for (vers = NFS_VERSION; vers <= NFS_V4; vers++) { - if (zoneid == GLOBAL_ZONEID) - global_svstat_ptr[vers] = NULL; nfsstat_zone_fini_common(zoneid, "nfs", vers, "nfs_server"); kmem_free(svstatp[vers], sizeof (svstat_tmpl)); } @@ -188,29 +172,6 @@ static const kstat_named_t rfsproccnt_v2_tmpl[] = { { "statfs", KSTAT_DATA_UINT64 } }; -kstat_named_t *rfsproccnt_v2_ptr; - -static void -nfsstat_zone_init_rfsproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v2", - rfsproccnt_v2_tmpl, sizeof (rfsproccnt_v2_tmpl)); - statsp->rfsproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v2_ptr = ks_data; -} - -static void -nfsstat_zone_fini_rfsproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v2_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v2"); - kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v2_tmpl)); -} - /* * NFSv2 client ACL stats */ @@ -249,30 +210,6 @@ static const kstat_named_t aclproccnt_v2_tmpl[] = { { "getxattrdir", KSTAT_DATA_UINT64 } }; -kstat_named_t *aclproccnt_v2_ptr; - -static void -nfsstat_zone_init_aclproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0, - "aclproccnt_v2", aclproccnt_v2_tmpl, - sizeof (aclproccnt_v2_tmpl)); - statsp->aclproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v2_ptr = ks_data; -} - -static void -nfsstat_zone_fini_aclproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v2_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v2"); - kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v2_tmpl)); -} - /* * NFSv3 client stats */ @@ -343,29 +280,6 @@ static const kstat_named_t rfsproccnt_v3_tmpl[] = { { "commit", KSTAT_DATA_UINT64 } }; -kstat_named_t *rfsproccnt_v3_ptr; - -static void -nfsstat_zone_init_rfsproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v3", - rfsproccnt_v3_tmpl, sizeof (rfsproccnt_v3_tmpl)); - statsp->rfsproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v3_ptr = ks_data; -} - -static void -nfsstat_zone_fini_rfsproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v3_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v3"); - kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v3_tmpl)); -} - /* * NFSv3 client ACL stats */ @@ -400,30 +314,6 @@ static const kstat_named_t aclproccnt_v3_tmpl[] = { { "getxattrdir", KSTAT_DATA_UINT64 } }; -kstat_named_t *aclproccnt_v3_ptr; - -static void -nfsstat_zone_init_aclproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0, - "aclproccnt_v3", aclproccnt_v3_tmpl, - sizeof (aclproccnt_v3_tmpl)); - statsp->aclproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v3_ptr = ks_data; -} - -static void -nfsstat_zone_fini_aclproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v3_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v3"); - kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v3_tmpl)); -} - /* * NFSv4 client stats */ @@ -530,29 +420,6 @@ static const kstat_named_t rfsproccnt_v4_tmpl[] = { { "illegal", KSTAT_DATA_UINT64 }, }; -kstat_named_t *rfsproccnt_v4_ptr; - -static void -nfsstat_zone_init_rfsproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v4", - rfsproccnt_v4_tmpl, sizeof (rfsproccnt_v4_tmpl)); - statsp->rfsproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v4_ptr = ks_data; -} - -static void -nfsstat_zone_fini_rfsproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - rfsproccnt_v4_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v4"); - kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v4_tmpl)); -} - /* * NFSv4 client ACL stats */ @@ -577,39 +444,6 @@ nfsstat_zone_fini_aclreq_v4(zoneid_t zoneid, struct nfs_version_stats *statsp) } /* - * NFSv4 server ACL stats - */ -static const kstat_named_t aclproccnt_v4_tmpl[] = { - { "null", KSTAT_DATA_UINT64 }, - { "getacl", KSTAT_DATA_UINT64 }, - { "setacl", KSTAT_DATA_UINT64 } -}; - -kstat_named_t *aclproccnt_v4_ptr; - -static void -nfsstat_zone_init_aclproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - kstat_named_t *ks_data; - - ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0, - "aclproccnt_v4", aclproccnt_v4_tmpl, - sizeof (aclproccnt_v4_tmpl)); - statsp->aclproccnt_ptr = ks_data; - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v4_ptr = ks_data; -} - -static void -nfsstat_zone_fini_aclproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp) -{ - if (zoneid == GLOBAL_ZONEID) - aclproccnt_v4_ptr = NULL; - nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v4"); - kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v4_tmpl)); -} - -/* * Zone initializer callback to setup the kstats. */ void * @@ -620,31 +454,20 @@ nfsstat_zone_init(zoneid_t zoneid) nfs_stats_ptr = kmem_zalloc(sizeof (*nfs_stats_ptr), KM_SLEEP); /* - * Initialize all versions of the nfs_server - */ - nfsstat_zone_init_server(zoneid, nfs_stats_ptr->nfs_stats_svstat_ptr); - - /* * Initialize v2 stats */ nfsstat_zone_init_rfsreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); - nfsstat_zone_init_rfsproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); nfsstat_zone_init_aclreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); - nfsstat_zone_init_aclproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); /* * Initialize v3 stats */ nfsstat_zone_init_rfsreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); - nfsstat_zone_init_rfsproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); nfsstat_zone_init_aclreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); - nfsstat_zone_init_aclproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); /* * Initialize v4 stats */ nfsstat_zone_init_rfsreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); - nfsstat_zone_init_rfsproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); nfsstat_zone_init_aclreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); - nfsstat_zone_init_aclproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); return (nfs_stats_ptr); } @@ -658,31 +481,74 @@ nfsstat_zone_fini(zoneid_t zoneid, void *data) struct nfs_stats *nfs_stats_ptr = data; /* - * Free nfs:0:nfs_server stats - */ - nfsstat_zone_fini_server(zoneid, nfs_stats_ptr->nfs_stats_svstat_ptr); - - /* * Free v2 stats */ nfsstat_zone_fini_rfsreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); - nfsstat_zone_fini_rfsproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); nfsstat_zone_fini_aclreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); - nfsstat_zone_fini_aclproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2); /* * Free v3 stats */ nfsstat_zone_fini_rfsreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); - nfsstat_zone_fini_rfsproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); nfsstat_zone_fini_aclreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); - nfsstat_zone_fini_aclproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3); /* * Free v4 stats */ nfsstat_zone_fini_rfsreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); - nfsstat_zone_fini_rfsproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); nfsstat_zone_fini_aclreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); - nfsstat_zone_fini_aclproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4); kmem_free(nfs_stats_ptr, sizeof (*nfs_stats_ptr)); } + +void +rfs_stat_zone_init(nfs_globals_t *ng) +{ + zoneid_t zoneid = ng->nfs_zoneid; + + /* Initialize all versions of the nfs_server */ + nfsstat_zone_init_server(zoneid, ng->svstat); + + /* NFS proc */ + ng->rfsproccnt[NFS_V2] = nfsstat_zone_init_common(zoneid, "nfs", 0, + "rfsproccnt_v2", rfsproccnt_v2_tmpl, sizeof (rfsproccnt_v2_tmpl)); + + ng->rfsproccnt[NFS_V3] = nfsstat_zone_init_common(zoneid, "nfs", 0, + "rfsproccnt_v3", rfsproccnt_v3_tmpl, sizeof (rfsproccnt_v3_tmpl)); + + ng->rfsproccnt[NFS_V4] = nfsstat_zone_init_common(zoneid, "nfs", 0, + "rfsproccnt_v4", rfsproccnt_v4_tmpl, sizeof (rfsproccnt_v4_tmpl)); + + /* ACL proc */ + ng->aclproccnt[NFS_V2] = nfsstat_zone_init_common(zoneid, "nfs_acl", 0, + "aclproccnt_v2", aclproccnt_v2_tmpl, sizeof (aclproccnt_v2_tmpl)); + + ng->aclproccnt[NFS_V3] = nfsstat_zone_init_common(zoneid, "nfs_acl", 0, + "aclproccnt_v3", aclproccnt_v3_tmpl, sizeof (aclproccnt_v3_tmpl)); + +} + +void +rfs_stat_zone_fini(nfs_globals_t *ng) +{ + zoneid_t zoneid = ng->nfs_zoneid; + + /* Free nfs:x:nfs_server stats */ + nfsstat_zone_fini_server(zoneid, ng->svstat); + + /* NFS */ + nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v2"); + kmem_free(ng->rfsproccnt[NFS_V2], sizeof (rfsproccnt_v2_tmpl)); + + nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v3"); + kmem_free(ng->rfsproccnt[NFS_V3], sizeof (rfsproccnt_v3_tmpl)); + + nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v4"); + kmem_free(ng->rfsproccnt[NFS_V4], sizeof (rfsproccnt_v4_tmpl)); + + /* ACL */ + nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v2"); + kmem_free(ng->aclproccnt[NFS_V2], sizeof (aclproccnt_v2_tmpl)); + + nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v3"); + kmem_free(ng->aclproccnt[NFS_V3], sizeof (aclproccnt_v3_tmpl)); + +} diff --git a/usr/src/uts/common/fs/nfs/nfs_sys.c b/usr/src/uts/common/fs/nfs/nfs_sys.c index e6ff4a2e0b..434c9a2a3e 100644 --- a/usr/src/uts/common/fs/nfs/nfs_sys.c +++ b/usr/src/uts/common/fs/nfs/nfs_sys.c @@ -18,14 +18,21 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * + */ + +/* * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. * All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/types.h> #include <rpc/types.h> #include <sys/systm.h> @@ -80,8 +87,6 @@ nfs_export(void *arg) { STRUCT_DECL(exportfs_args, ea); - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(ea, get_udatamodel()); if (copyin(arg, STRUCT_BUF(ea), STRUCT_SIZE(ea))) return (set_errno(EFAULT)); @@ -111,9 +116,6 @@ nfssys(enum nfssys_op opcode, void *arg) break; } - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); - STRUCT_INIT(u_clr, get_udatamodel()); if (copyin(arg, STRUCT_BUF(u_clr), STRUCT_SIZE(u_clr))) @@ -164,8 +166,6 @@ nfssys(enum nfssys_op opcode, void *arg) struct rdma_svc_args rsa; char netstore[20] = "tcp"; - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); if (get_udatamodel() != DATAMODEL_NATIVE) { STRUCT_DECL(rdma_svc_args, ursa); @@ -189,9 +189,6 @@ nfssys(enum nfssys_op opcode, void *arg) case NFS_SVC: { /* NFS server daemon */ STRUCT_DECL(nfs_svc_args, nsa); - - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(nsa, get_udatamodel()); if (copyin(arg, STRUCT_BUF(nsa), STRUCT_SIZE(nsa))) @@ -209,8 +206,6 @@ nfssys(enum nfssys_op opcode, void *arg) case NFS_GETFH: { /* get a file handle */ STRUCT_DECL(nfs_getfh_args, nga); - if (!INGLOBALZONE(curproc)) - return (set_errno(EPERM)); STRUCT_INIT(nga, get_udatamodel()); if (copyin(arg, STRUCT_BUF(nga), STRUCT_SIZE(nga))) return (set_errno(EFAULT)); diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c index 1fa1617ec8..1065d86719 100644 --- a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c +++ b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c @@ -23,6 +23,10 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <sys/atomic.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -243,16 +247,6 @@ sharefs_unmount(vfs_t *vfsp, int flag, struct cred *cr) return (EBUSY); /* - * Only allow an unmount iff there are no entries in memory. - */ - rw_enter(&sharetab_lock, RW_READER); - if (sharetab_size != 0) { - rw_exit(&sharetab_lock); - return (EBUSY); - } - rw_exit(&sharetab_lock); - - /* * Release the last hold on the root vnode */ VN_RELE(data->sharefs_vfs_root); diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c index 2ca3f293a5..8e5a9a2cc7 100644 --- a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c +++ b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #include <fs/fs_subr.h> #include <sys/errno.h> @@ -45,7 +49,7 @@ * the shares enumerated. */ static int -sharefs_snap_create(shnode_t *sft) +sharefs_snap_create(sharetab_globals_t *sg, shnode_t *sft) { sharetab_t *sht; share_t *sh; @@ -53,16 +57,16 @@ sharefs_snap_create(shnode_t *sft) int iCount = 0; char *buf; - rw_enter(&sharefs_lock, RW_WRITER); - rw_enter(&sharetab_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_WRITER); + rw_enter(&sg->sharetab_lock, RW_READER); if (sft->sharefs_snap) { /* * Nothing has changed, so no need to grab a new copy! */ - if (sft->sharefs_generation == sharetab_generation) { - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + if (sft->sharefs_generation == sg->sharetab_generation) { + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -71,12 +75,12 @@ sharefs_snap_create(shnode_t *sft) sft->sharefs_snap = NULL; } - sft->sharefs_size = sharetab_size; - sft->sharefs_count = sharetab_count; + sft->sharefs_size = sg->sharetab_size; + sft->sharefs_count = sg->sharetab_count; if (sft->sharefs_size == 0) { - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -87,7 +91,7 @@ sharefs_snap_create(shnode_t *sft) /* * Walk the Sharetab, dumping each entry. */ - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { int i; for (i = 0; i < SHARETAB_HASHES; i++) { @@ -132,14 +136,14 @@ sharefs_snap_create(shnode_t *sft) * We want to record the generation number and * mtime inside this snapshot. */ - gethrestime(&sharetab_snap_time); - sft->sharefs_snap_time = sharetab_snap_time; - sft->sharefs_generation = sharetab_generation; + gethrestime(&sg->sharetab_snap_time); + sft->sharefs_snap_time = sg->sharetab_snap_time; + sft->sharefs_generation = sg->sharetab_generation; ASSERT(iCount == sft->sharefs_count); - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (0); error_fault: @@ -148,8 +152,8 @@ error_fault: sft->sharefs_size = 0; sft->sharefs_count = 0; sft->sharefs_snap = NULL; - rw_exit(&sharetab_lock); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharetab_lock); + rw_exit(&sg->sharefs_lock); return (EFAULT); } @@ -161,13 +165,14 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, { timestruc_t now; shnode_t *sft = VTOSH(vp); + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); vap->va_type = VREG; vap->va_mode = S_IRUSR | S_IRGRP | S_IROTH; vap->va_nodeid = SHAREFS_INO_FILE; vap->va_nlink = 1; - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); /* * If we get asked about a snapped vnode, then @@ -177,15 +182,15 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, * sharetab. */ if (sft->sharefs_real_vp) { - rw_enter(&sharetab_lock, RW_READER); - vap->va_size = sharetab_size; - vap->va_mtime = sharetab_mtime; - rw_exit(&sharetab_lock); + rw_enter(&sg->sharetab_lock, RW_READER); + vap->va_size = sg->sharetab_size; + vap->va_mtime = sg->sharetab_mtime; + rw_exit(&sg->sharetab_lock); } else { vap->va_size = sft->sharefs_size; vap->va_mtime = sft->sharefs_snap_time; } - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); gethrestime(&now); vap->va_atime = vap->va_ctime = now; @@ -259,7 +264,8 @@ sharefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) * are dumping an extremely huge sharetab, we make a copy * of it here and use it to dump instead. */ - error = sharefs_snap_create(sft); + error = sharefs_snap_create(sharetab_get_globals(vp->v_vfsp->vfs_zone), + sft); return (error); } @@ -270,11 +276,12 @@ sharefs_close(vnode_t *vp, int flag, int count, offset_t off, cred_t *cr, caller_context_t *ct) { shnode_t *sft = VTOSH(vp); + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); if (count > 1) return (0); - rw_enter(&sharefs_lock, RW_WRITER); + rw_enter(&sg->sharefs_lock, RW_WRITER); if (vp->v_count == 1) { if (sft->sharefs_snap != NULL) { kmem_free(sft->sharefs_snap, sft->sharefs_size + 1); @@ -284,7 +291,7 @@ sharefs_close(vnode_t *vp, int flag, int count, } } atomic_dec_32(&sft->sharefs_refs); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (0); } @@ -292,30 +299,31 @@ sharefs_close(vnode_t *vp, int flag, int count, /* ARGSUSED */ static int sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, - caller_context_t *ct) + caller_context_t *ct) { shnode_t *sft = VTOSH(vp); off_t off = uio->uio_offset; size_t len = uio->uio_resid; int error = 0; + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); /* * First check to see if we need to grab a new snapshot. */ if (off == (off_t)0) { - rw_exit(&sharefs_lock); - error = sharefs_snap_create(sft); + rw_exit(&sg->sharefs_lock); + error = sharefs_snap_create(sg, sft); if (error) { return (EFAULT); } - rw_enter(&sharefs_lock, RW_READER); + rw_enter(&sg->sharefs_lock, RW_READER); } /* LINTED */ if (len <= 0 || off >= sft->sharefs_size) { - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (error); } @@ -323,7 +331,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, len = sft->sharefs_size - off; if (off < 0 || len > sft->sharefs_size) { - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (EFAULT); } @@ -332,7 +340,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, len, UIO_READ, uio); } - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); return (error); } @@ -342,16 +350,17 @@ sharefs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *tx) { gfs_file_t *fp = vp->v_data; shnode_t *sft; + sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone); sft = (shnode_t *)gfs_file_inactive(vp); if (sft) { - rw_enter(&sharefs_lock, RW_WRITER); + rw_enter(&sg->sharefs_lock, RW_WRITER); if (sft->sharefs_snap != NULL) { kmem_free(sft->sharefs_snap, sft->sharefs_size + 1); } kmem_free(sft, fp->gfs_size); - rw_exit(&sharefs_lock); + rw_exit(&sg->sharefs_lock); } } diff --git a/usr/src/uts/common/fs/sharefs/sharetab.c b/usr/src/uts/common/fs/sharefs/sharetab.c index 0f8543641c..d66c1aa85f 100644 --- a/usr/src/uts/common/fs/sharefs/sharetab.c +++ b/usr/src/uts/common/fs/sharefs/sharetab.c @@ -23,6 +23,11 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2020 Joyent, Inc. + */ + #include <sys/types.h> #include <sys/types32.h> #include <sys/param.h> @@ -47,15 +52,13 @@ */ #define SHARETAB_COPYIN(field) \ if (copyinstr(STRUCT_FGETP(u_sh, sh_##field), \ - buf, \ - bufsz + 1, /* Add one for extra NUL */ \ - &len)) { \ + buf, \ + bufsz + 1, /* Add one for extra NUL */ \ + &len)) { \ error = EFAULT; \ goto cleanup; \ } \ - /* \ - * Need to remove 1 because copyinstr() counts the NUL. \ - */ \ + /* Need to remove 1 because copyinstr() counts the NUL */ \ len--; \ sh->sh_##field = kmem_alloc(len + 1, KM_SLEEP); \ bcopy(buf, sh->sh_##field, len); \ @@ -64,24 +67,13 @@ sh->sh_size += shl.shl_##field; /* Debug counting */ #define SHARETAB_DELETE_FIELD(field) \ - if (sh->sh_##field) { \ + if (sh->sh_##field != NULL) { \ kmem_free(sh->sh_##field, \ - shl ? shl->shl_##field + 1 : \ - strlen(sh->sh_##field) + 1); \ + shl ? shl->shl_##field + 1 : \ + strlen(sh->sh_##field) + 1); \ } -sharetab_t *sharefs_sharetab = NULL; /* The incore sharetab. */ -size_t sharetab_size; -uint_t sharetab_count; - -krwlock_t sharetab_lock; /* lock to protect the cached sharetab */ - -krwlock_t sharefs_lock; /* lock to protect the vnode ops */ - -timestruc_t sharetab_mtime; -timestruc_t sharetab_snap_time; - -uint_t sharetab_generation; /* Only increments and wraps! */ +static zone_key_t sharetab_zone_key; /* * Take care of cleaning up a share. @@ -91,7 +83,7 @@ uint_t sharetab_generation; /* Only increments and wraps! */ static void sharefree(share_t *sh, sharefs_lens_t *shl) { - if (!sh) + if (sh == NULL) return; SHARETAB_DELETE_FIELD(path); @@ -100,7 +92,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl) SHARETAB_DELETE_FIELD(opts); SHARETAB_DELETE_FIELD(descr); - kmem_free(sh, sizeof (share_t)); + kmem_free(sh, sizeof (*sh)); } /* @@ -108,7 +100,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl) * cleaning up the memory associated with the share argument. */ static int -sharefs_remove(share_t *sh, sharefs_lens_t *shl) +sharefs_remove(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl) { int iHash; sharetab_t *sht; @@ -118,23 +110,22 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) if (!sh) return (ENOENT); - rw_enter(&sharetab_lock, RW_WRITER); - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { - if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) { + rw_enter(&sg->sharetab_lock, RW_WRITER); + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { + if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) break; - } } /* * There does not exist a fstype in memory which * matches the share passed in. */ - if (!sht) { - rw_exit(&sharetab_lock); + if (sht == NULL) { + rw_exit(&sg->sharetab_lock); return (ENOENT); } - iPath = shl ? shl->shl_path : strlen(sh->sh_path); + iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path); iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path)); /* @@ -147,22 +138,21 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) */ if (strcmp(sh->sh_path, s->sh_path) == 0 && strlen(s->sh_path) == iPath) { - if (p) { + if (p != NULL) p->sh_next = s->sh_next; - } else { + else sht->s_buckets[iHash].ssh_sh = s->sh_next; - } ASSERT(sht->s_buckets[iHash].ssh_count != 0); atomic_dec_32(&sht->s_buckets[iHash].ssh_count); atomic_dec_32(&sht->s_count); - atomic_dec_32(&sharetab_count); + atomic_dec_32(&sg->sharetab_count); - ASSERT(sharetab_size >= s->sh_size); - sharetab_size -= s->sh_size; + ASSERT(sg->sharetab_size >= s->sh_size); + sg->sharetab_size -= s->sh_size; - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); break; } @@ -170,18 +160,15 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) p = s; } - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); - if (!s) { + if (s == NULL) return (ENOENT); - } s->sh_next = NULL; sharefree(s, NULL); - /* - * We need to free the share for the caller. - */ + /* We need to free the share for the caller */ sharefree(sh, shl); return (0); @@ -191,7 +178,7 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl) * The caller must have allocated memory for us to use. */ static int -sharefs_add(share_t *sh, sharefs_lens_t *shl) +sharefs_add(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl) { int iHash; sharetab_t *sht; @@ -199,41 +186,31 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) int iPath; int n; - if (!sh) { + if (sh == NULL) return (ENOENT); - } - /* - * We need to find the hash buckets for the fstype. - */ - rw_enter(&sharetab_lock, RW_WRITER); - for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) { - if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) { + /* We need to find the hash buckets for the fstype */ + rw_enter(&sg->sharetab_lock, RW_WRITER); + for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) { + if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) break; - } } - /* - * Did not exist, so allocate one and add it to the - * sharetab. - */ - if (!sht) { + /* Did not exist, so allocate one and add it to the sharetab */ + if (sht == NULL) { sht = kmem_zalloc(sizeof (*sht), KM_SLEEP); n = strlen(sh->sh_fstype); sht->s_fstype = kmem_zalloc(n + 1, KM_SLEEP); (void) strncpy(sht->s_fstype, sh->sh_fstype, n); - sht->s_next = sharefs_sharetab; - sharefs_sharetab = sht; + sht->s_next = sg->sharefs_sharetab; + sg->sharefs_sharetab = sht; } - /* - * Now we need to find where we have to add the entry. - */ + /* Now we need to find where we have to add the entry */ + iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path); iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path)); - iPath = shl ? shl->shl_path : strlen(sh->sh_path); - if (shl) { sh->sh_size = shl->shl_path + shl->shl_res + shl->shl_fstype + shl->shl_opts + shl->shl_descr; @@ -243,15 +220,10 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) strlen(sh->sh_opts) + strlen(sh->sh_descr); } - /* - * We need to account for field seperators and - * the EOL. - */ + /* We need to account for field separators and the EOL */ sh->sh_size += 5; - /* - * Now walk down the hash table and add the new entry! - */ + /* Now walk down the hash table and add the new entry */ for (p = NULL, s = sht->s_buckets[iHash].ssh_sh; s != NULL; s = s->sh_next) { /* @@ -263,28 +235,25 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) */ if (strcmp(sh->sh_path, s->sh_path) == 0 && strlen(s->sh_path) == iPath) { - if (p) { + if (p != NULL) p->sh_next = sh; - } else { + else sht->s_buckets[iHash].ssh_sh = sh; - } sh->sh_next = s->sh_next; - ASSERT(sharetab_size >= s->sh_size); - sharetab_size -= s->sh_size; - sharetab_size += sh->sh_size; + ASSERT(sg->sharetab_size >= s->sh_size); + sg->sharetab_size -= s->sh_size; + sg->sharetab_size += sh->sh_size; - /* - * Get rid of the old node. - */ + /* Get rid of the old node */ sharefree(s, NULL); - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); ASSERT(sht->s_buckets[iHash].ssh_count != 0); - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); return (0); } @@ -300,29 +269,82 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl) sht->s_buckets[iHash].ssh_sh = sh; atomic_inc_32(&sht->s_buckets[iHash].ssh_count); atomic_inc_32(&sht->s_count); - atomic_inc_32(&sharetab_count); - sharetab_size += sh->sh_size; + atomic_inc_32(&sg->sharetab_count); + sg->sharetab_size += sh->sh_size; - gethrestime(&sharetab_mtime); - atomic_inc_32(&sharetab_generation); + gethrestime(&sg->sharetab_mtime); + atomic_inc_32(&sg->sharetab_generation); - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); return (0); } +/* ARGSUSED */ +static void * +sharetab_zone_init(zoneid_t zoneid) +{ + sharetab_globals_t *sg; + + sg = kmem_zalloc(sizeof (*sg), KM_SLEEP); + + rw_init(&sg->sharetab_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sg->sharefs_lock, NULL, RW_DEFAULT, NULL); + + sg->sharetab_size = 0; + sg->sharetab_count = 0; + sg->sharetab_generation = 1; + + gethrestime(&sg->sharetab_mtime); + gethrestime(&sg->sharetab_snap_time); + + return (sg); +} + +/* ARGSUSED */ +static void +sharetab_zone_fini(zoneid_t zoneid, void *data) +{ + sharetab_globals_t *sg = data; + + rw_destroy(&sg->sharefs_lock); + rw_destroy(&sg->sharetab_lock); + + /* ALL of the allocated things must be cleaned before we free sg. */ + while (sg->sharefs_sharetab != NULL) { + int i; + sharetab_t *freeing = sg->sharefs_sharetab; + + sg->sharefs_sharetab = freeing->s_next; + kmem_free(freeing->s_fstype, strlen(freeing->s_fstype) + 1); + for (i = 0; i < PKP_HASH_SIZE; i++) { + sharefs_hash_head_t *bucket; + + bucket = &(freeing->s_buckets[i]); + while (bucket->ssh_sh != NULL) { + share_t *share = bucket->ssh_sh; + + bucket->ssh_sh = share->sh_next; + sharefree(share, NULL); + } + } + kmem_free(freeing, sizeof (*freeing)); + } + + kmem_free(sg, sizeof (*sg)); +} + void sharefs_sharetab_init(void) { - rw_init(&sharetab_lock, NULL, RW_DEFAULT, NULL); - rw_init(&sharefs_lock, NULL, RW_DEFAULT, NULL); - - sharetab_size = 0; - sharetab_count = 0; - sharetab_generation = 1; + zone_key_create(&sharetab_zone_key, sharetab_zone_init, + NULL, sharetab_zone_fini); +} - gethrestime(&sharetab_mtime); - gethrestime(&sharetab_snap_time); +sharetab_globals_t * +sharetab_get_globals(zone_t *zone) +{ + return (zone_getspecific(sharetab_zone_key, zone)); } int @@ -332,12 +354,10 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) size_t len; size_t bufsz; share_t *sh; - sharefs_lens_t shl; - model_t model; - char *buf = NULL; + sharetab_globals_t *sg = sharetab_get_globals(curzone); STRUCT_DECL(share, u_sh); @@ -347,20 +367,20 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) * Before we do anything, lets make sure we have * a sharetab in memory if we need one. */ - rw_enter(&sharetab_lock, RW_READER); + rw_enter(&sg->sharetab_lock, RW_READER); switch (opcode) { - case (SHAREFS_REMOVE) : - case (SHAREFS_REPLACE) : - if (!sharefs_sharetab) { - rw_exit(&sharetab_lock); + case SHAREFS_REMOVE: + case SHAREFS_REPLACE: + if (!sg->sharefs_sharetab) { + rw_exit(&sg->sharetab_lock); return (set_errno(ENOENT)); } break; - case (SHAREFS_ADD) : - default : + case SHAREFS_ADD: + default: break; } - rw_exit(&sharetab_lock); + rw_exit(&sg->sharetab_lock); model = get_udatamodel(); @@ -368,49 +388,37 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) * Initialize the data pointers. */ STRUCT_INIT(u_sh, model); - if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) { + if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) return (set_errno(EFAULT)); - } - /* - * Get the share. - */ + /* Get the share */ sh = kmem_zalloc(sizeof (share_t), KM_SLEEP); - /* - * Get some storage for copying in the strings. - */ + /* Get some storage for copying in the strings */ buf = kmem_zalloc(bufsz + 1, KM_SLEEP); bzero(&shl, sizeof (sharefs_lens_t)); - /* - * Only grab these two until we know what we want. - */ + /* Only grab these two until we know what we want */ SHARETAB_COPYIN(path); SHARETAB_COPYIN(fstype); switch (opcode) { - case (SHAREFS_ADD) : - case (SHAREFS_REPLACE) : + case SHAREFS_ADD: + case SHAREFS_REPLACE: SHARETAB_COPYIN(res); SHARETAB_COPYIN(opts); SHARETAB_COPYIN(descr); - - error = sharefs_add(sh, &shl); + error = sharefs_add(sg, sh, &shl); break; - - case (SHAREFS_REMOVE) : - - error = sharefs_remove(sh, &shl); + case SHAREFS_REMOVE: + error = sharefs_remove(sg, sh, &shl); break; - default: error = EINVAL; break; } cleanup: - /* * If there is no error, then we have stashed the structure * away in the sharetab hash table or have deleted it. @@ -418,22 +426,38 @@ cleanup: * Either way, the only reason to blow away the data is if * there was an error. */ - if (error != 0) { + if (error != 0) sharefree(sh, &shl); - } - if (buf) { + if (buf != NULL) kmem_free(buf, bufsz + 1); - } - return ((error != 0) ? set_errno(error) : 0); + return (error != 0 ? set_errno(error) : 0); } int sharefs(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen) { - if (secpolicy_sys_config(CRED(), B_FALSE) != 0) - return (set_errno(EPERM)); + /* + * If we're in the global zone PRIV_SYS_CONFIG gives us the + * privileges needed to act on sharetab. However if we're in + * a non-global zone PRIV_SYS_CONFIG is not allowed. To work + * around this issue PRIV_SYS_NFS is used in this case. + * + * TODO: This basically overloads the definition/use of + * PRIV_SYS_NFS to work around the limitation of PRIV_SYS_CONFIG + * in a zone. Solaris 11 solved this by implementing a PRIV_SYS_SHARE + * we should do the same and replace the use of PRIV_SYS_NFS here and + * in zfs_secpolicy_share. + */ + if (INGLOBALZONE(curproc)) { + if (secpolicy_sys_config(CRED(), B_FALSE) != 0) + return (set_errno(EPERM)); + } else { + /* behave like zfs_secpolicy_share() */ + if (secpolicy_nfs(CRED()) != 0) + return (set_errno(EPERM)); + } return (sharefs_impl(opcode, sh_in, iMaxLen)); } diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index a46390cceb..1d723c89f6 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -21,10 +21,13 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. * Portions Copyright 2011 Martin Matuska * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. - * Copyright 2016 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. @@ -787,9 +790,6 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) int zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - if (!INGLOBALZONE(curproc)) - return (SET_ERROR(EPERM)); - if (secpolicy_nfs(cr) == 0) { return (0); } else { @@ -800,9 +800,6 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) int zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) { - if (!INGLOBALZONE(curproc)) - return (SET_ERROR(EPERM)); - if (secpolicy_smb(cr) == 0) { return (0); } else { diff --git a/usr/src/uts/common/klm/klmmod.c b/usr/src/uts/common/klm/klmmod.c index 51ed43e198..ca6dd6f3aa 100644 --- a/usr/src/uts/common/klm/klmmod.c +++ b/usr/src/uts/common/klm/klmmod.c @@ -92,6 +92,7 @@ lm_zone_init(zoneid_t zoneid) g->lockd_pid = 0; g->run_status = NLM_ST_DOWN; + g->nlm_zoneid = zoneid; nlm_globals_register(g); return (g); @@ -103,6 +104,8 @@ lm_zone_fini(zoneid_t zoneid, void *data) { struct nlm_globals *g = data; + nlm_globals_unregister(g); + ASSERT(avl_is_empty(&g->nlm_hosts_tree)); avl_destroy(&g->nlm_hosts_tree); mod_hash_destroy_idhash(g->nlm_hosts_hash); @@ -113,7 +116,6 @@ lm_zone_fini(zoneid_t zoneid, void *data) cv_destroy(&g->nlm_gc_finish_cv); mutex_destroy(&g->clean_lock); - nlm_globals_unregister(g); kmem_free(g, sizeof (*g)); } diff --git a/usr/src/uts/common/klm/nlm_impl.c b/usr/src/uts/common/klm/nlm_impl.c index 1e9033a17c..bab08acdae 100644 --- a/usr/src/uts/common/klm/nlm_impl.c +++ b/usr/src/uts/common/klm/nlm_impl.c @@ -843,7 +843,7 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb) * statd using the status monitor protocol. */ error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS, - 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle); + 0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle); if (error != 0) goto error; @@ -852,7 +852,8 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb) * local statd using the address registration protocol. */ error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM, - NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle); + NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(), + &nsm->ns_addr_handle); if (error != 0) goto error; @@ -861,8 +862,11 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb) error: kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); - if (nsm->ns_handle) + if (nsm->ns_handle) { + ASSERT(nsm->ns_handle->cl_auth != NULL); + auth_destroy(nsm->ns_handle->cl_auth); CLNT_DESTROY(nsm->ns_handle); + } return (error); } @@ -871,8 +875,12 @@ static void nlm_nsm_fini(struct nlm_nsm *nsm) { kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); + if (nsm->ns_addr_handle->cl_auth != NULL) + auth_destroy(nsm->ns_addr_handle->cl_auth); CLNT_DESTROY(nsm->ns_addr_handle); nsm->ns_addr_handle = NULL; + if (nsm->ns_handle->cl_auth != NULL) + auth_destroy(nsm->ns_handle->cl_auth); CLNT_DESTROY(nsm->ns_handle); nsm->ns_handle = NULL; sema_destroy(&nsm->ns_sem); @@ -2562,14 +2570,17 @@ nlm_vp_active(const vnode_t *vp) * on them. */ void -nlm_unexport(struct exportinfo *exi) +nlm_zone_unexport(struct nlm_globals *g, struct exportinfo *exi) { - struct nlm_globals *g; struct nlm_host *hostp; - g = zone_getspecific(nlm_zone_key, curzone); - mutex_enter(&g->lock); + if (g->run_status != NLM_ST_UP) { + /* nothing to do */ + mutex_exit(&g->lock); + return; + } + hostp = avl_first(&g->nlm_hosts_tree); while (hostp != NULL) { struct nlm_vhold *nvp; @@ -2617,6 +2628,28 @@ nlm_unexport(struct exportinfo *exi) mutex_exit(&g->lock); } +void +nlm_unexport(struct exportinfo *exi) +{ + struct nlm_globals *g; + + rw_enter(&lm_lck, RW_READER); + TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) { + if (g->nlm_zoneid == exi->exi_zoneid) { + /* + * NOTE: If we want to drop lm_lock before + * calling nlm_zone_unexport(), we should break, + * and have a post-rw_exit() snippit like: + * if (g != NULL) + * nlm_zone_unexport(g, exi); + */ + nlm_zone_unexport(g, exi); + break; /* Only going to match once! */ + } + } + rw_exit(&lm_lck); +} + /* * Allocate new unique sysid. * In case of failure (no available sysids) @@ -2785,7 +2818,7 @@ static void nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm) { (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0, - NLM_RPC_RETRIES, kcred); + NLM_RPC_RETRIES, zone_kcred()); } static void diff --git a/usr/src/uts/common/klm/nlm_impl.h b/usr/src/uts/common/klm/nlm_impl.h index 6b2df7f8b0..fd5d656f4e 100644 --- a/usr/src/uts/common/klm/nlm_impl.h +++ b/usr/src/uts/common/klm/nlm_impl.h @@ -459,6 +459,7 @@ struct nlm_globals { int cn_idle_tmo; /* (z) */ int grace_period; /* (z) */ int retrans_tmo; /* (z) */ + zoneid_t nlm_zoneid; /* (c) */ kmutex_t clean_lock; /* (c) */ TAILQ_ENTRY(nlm_globals) nlm_link; /* (g) */ }; diff --git a/usr/src/uts/common/nfs/export.h b/usr/src/uts/common/nfs/export.h index 60c6320db7..41bd4ab74f 100644 --- a/usr/src/uts/common/nfs/export.h +++ b/usr/src/uts/common/nfs/export.h @@ -23,6 +23,7 @@ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Jason King. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -37,6 +38,11 @@ #include <nfs/nfs4.h> #include <sys/kiconv.h> #include <sys/avl.h> +#include <sys/zone.h> + +#ifdef _KERNEL +#include <sys/pkp_hash.h> /* for PKP_HASH_SIZE */ +#endif /* _KERNEL */ #ifdef __cplusplus extern "C" { @@ -467,19 +473,25 @@ typedef struct treenode { } treenode_t; /* - * TREE_ROOT checks if the node corresponds to a filesystem root + * Now that we have links to chase, we can get the zone rootvp just from + * an export. No current-zone-context needed. + */ +#define EXI_TO_ZONEROOTVP(exi) ((exi)->exi_ne->exi_root->exi_vp) + +/* + * TREE_ROOT checks if the node corresponds to a filesystem root or + * the zone's root directory. * TREE_EXPORTED checks if the node is explicitly shared */ #define TREE_ROOT(t) \ - ((t)->tree_exi && (t)->tree_exi->exi_vp->v_flag & VROOT) + ((t)->tree_exi != NULL && \ + (((t)->tree_exi->exi_vp->v_flag & VROOT) || \ + VN_CMP(EXI_TO_ZONEROOTVP((t)->tree_exi), (t)->tree_exi->exi_vp))) #define TREE_EXPORTED(t) \ ((t)->tree_exi && !PSEUDO((t)->tree_exi)) -/* Root of nfs pseudo namespace */ -extern treenode_t *ns_root; - #define EXPTABLESIZE 256 struct exp_hash { @@ -517,6 +529,7 @@ struct exportinfo { krwlock_t exi_cache_lock; kmutex_t exi_lock; uint_t exi_count; + zoneid_t exi_zoneid; vnode_t *exi_vp; vnode_t *exi_dvp; avl_tree_t *exi_cache[AUTH_TABLESIZE]; @@ -525,11 +538,20 @@ struct exportinfo { struct charset_cache *exi_charset; unsigned exi_volatile_dev:1; unsigned exi_moved:1; + int exi_id; + avl_node_t exi_id_link; + /* + * Soft-reference/backpointer to zone's nfs_export_t. + * This allows us access to the zone's rootvp (stored in + * exi_ne->exi_root->exi_vp) even if the current thread isn't in + * same-zone context. + */ + struct nfs_export *exi_ne; #ifdef VOLATILE_FH_TEST uint32_t exi_volatile_id; struct ex_vol_rename *exi_vol_rename; kmutex_t exi_vol_rename_lock; -#endif /* VOLATILE_FH_TEST */ +#endif /* VOLATILE_FH_TEST -- keep last! */ }; typedef struct exportinfo exportinfo_t; @@ -608,8 +630,12 @@ extern int nfsauth4_secinfo_access(struct exportinfo *, struct svc_req *, int, int, cred_t *); extern int nfsauth_cache_clnt_compar(const void *, const void *); extern int nfs_fhbcmp(char *, char *, int); -extern int nfs_exportinit(void); +extern void nfs_exportinit(void); extern void nfs_exportfini(void); +extern void nfs_export_zone_init(nfs_globals_t *); +extern void nfs_export_zone_fini(nfs_globals_t *); +extern void nfs_export_zone_shutdown(nfs_globals_t *); +extern int nfs_export_get_rootfh(nfs_globals_t *); extern int chk_clnt_sec(struct exportinfo *, struct svc_req *); extern int makefh(fhandle_t *, struct vnode *, struct exportinfo *); extern int makefh_ol(fhandle_t *, struct exportinfo *, uint_t); @@ -625,32 +651,61 @@ extern struct exportinfo *nfs_vptoexi(vnode_t *, vnode_t *, cred_t *, int *, int *, bool_t); extern int nfs_check_vpexi(vnode_t *, vnode_t *, cred_t *, struct exportinfo **); -extern void export_link(struct exportinfo *); -extern void export_unlink(struct exportinfo *); -extern vnode_t *untraverse(vnode_t *); +extern vnode_t *untraverse(vnode_t *, vnode_t *); extern int vn_is_nfs_reparse(vnode_t *, cred_t *); extern int client_is_downrev(struct svc_req *); extern char *build_symlink(vnode_t *, cred_t *, size_t *); +extern fhandle_t nullfh2; /* for comparing V2 filehandles */ + +typedef struct nfs_export { + /* Root of nfs pseudo namespace */ + treenode_t *ns_root; + + nfs_globals_t *ne_globals; /* "up" pointer */ + + struct exportinfo *exptable_path_hash[PKP_HASH_SIZE]; + struct exportinfo *exptable[EXPTABLESIZE]; + + /* + * Read/Write lock that protects the exportinfo list. This lock + * must be held when searching or modifiying the exportinfo list. + */ + krwlock_t exported_lock; + + /* "public" and default (root) location for public filehandle */ + struct exportinfo *exi_public; + struct exportinfo *exi_root; + /* For checking default public file handle */ + fid_t exi_rootfid; + /* For comparing V2 filehandles */ + fhandle_t nullfh2; + + /* The change attribute value of the root of nfs pseudo namespace */ + timespec_t ns_root_change; +} nfs_export_t; + /* * Functions that handle the NFSv4 server namespace */ extern exportinfo_t *vis2exi(treenode_t *); extern int treeclimb_export(struct exportinfo *); -extern void treeclimb_unexport(struct exportinfo *); +extern void treeclimb_unexport(nfs_export_t *, struct exportinfo *); extern int nfs_visible(struct exportinfo *, vnode_t *, int *); extern int nfs_visible_inode(struct exportinfo *, ino64_t, - struct exp_visible **); + struct exp_visible **); extern int has_visible(struct exportinfo *, vnode_t *); extern void free_visible(struct exp_visible *); extern int nfs_exported(struct exportinfo *, vnode_t *); -extern struct exportinfo *pseudo_exportfs(vnode_t *, fid_t *, - struct exp_visible *, struct exportdata *); +extern struct exportinfo *pseudo_exportfs(nfs_export_t *, vnode_t *, fid_t *, + struct exp_visible *, struct exportdata *); extern int vop_fid_pseudo(vnode_t *, fid_t *); extern int nfs4_vget_pseudo(struct exportinfo *, vnode_t **, fid_t *); extern bool_t nfs_visible_change(struct exportinfo *, vnode_t *, - timespec_t *); -extern void tree_update_change(treenode_t *, timespec_t *); + timespec_t *); +extern void tree_update_change(nfs_export_t *, treenode_t *, timespec_t *); +extern void rfs4_clean_state_exi(nfs_export_t *, struct exportinfo *); + /* * Functions that handle the NFSv4 server namespace security flavors * information. @@ -658,13 +713,16 @@ extern void tree_update_change(treenode_t *, timespec_t *); extern void srv_secinfo_exp2pseu(struct exportdata *, struct exportdata *); extern void srv_secinfo_list_free(struct secinfo *, int); +extern nfs_export_t *nfs_get_export(); +extern void export_link(nfs_export_t *, struct exportinfo *); +extern void export_unlink(nfs_export_t *, struct exportinfo *); + /* - * "public" and default (root) location for public filehandle + * exi_id support */ -extern struct exportinfo *exi_public, *exi_root; -extern fhandle_t nullfh2; /* for comparing V2 filehandles */ -extern krwlock_t exported_lock; -extern struct exportinfo *exptable[]; +extern kmutex_t nfs_exi_id_lock; +extern avl_tree_t exi_id_tree; +extern int exi_id_get_next(void); /* * Two macros for identifying public filehandles. diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h index e26025b003..6f76fc93ea 100644 --- a/usr/src/uts/common/nfs/nfs.h +++ b/usr/src/uts/common/nfs/nfs.h @@ -20,19 +20,20 @@ */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. + */ + #ifndef _NFS_NFS_H #define _NFS_NFS_H -/* nfs.h 2.38 88/08/19 SMI */ - #include <sys/isa_defs.h> #include <sys/vfs.h> #include <sys/stream.h> @@ -72,8 +73,56 @@ extern "C" { #define NFS_VERSMIN_DEFAULT ((rpcvers_t)2) #define NFS_VERSMAX_DEFAULT ((rpcvers_t)4) -extern rpcvers_t nfs_versmin; -extern rpcvers_t nfs_versmax; +/* + * Used to track the state of the server so that initialization + * can be done properly. + */ +typedef enum { + NFS_SERVER_STOPPED, /* server state destroyed */ + NFS_SERVER_STOPPING, /* server state being destroyed */ + NFS_SERVER_RUNNING, + NFS_SERVER_QUIESCED, /* server state preserved */ + NFS_SERVER_OFFLINE /* server pool offline */ +} nfs_server_running_t; + +/* Forward declarations for nfs_globals */ +struct nfs_export; +struct nfs_srv; +struct nfs3_srv; +struct nfs4_srv; +struct nfsauth_globals; + +/* + * Zone globals variables of NFS server + */ +typedef struct nfs_globals { + list_node_t nfs_g_link; /* all globals list */ + + rpcvers_t nfs_versmin; + rpcvers_t nfs_versmax; + + /* NFS server locks and state */ + nfs_server_running_t nfs_server_upordown; + kmutex_t nfs_server_upordown_lock; + kcondvar_t nfs_server_upordown_cv; + + /* RDMA wait variables */ + kcondvar_t rdma_wait_cv; + kmutex_t rdma_wait_mutex; + + zoneid_t nfs_zoneid; + /* Per-zone data structures private to each module */ + struct nfs_export *nfs_export; /* nfs_export.c */ + struct nfs_srv *nfs_srv; /* nfs_srv.c */ + struct nfs3_srv *nfs3_srv; /* nfs3_srv.c */ + struct nfs4_srv *nfs4_srv; /* nfs4_srv.c */ + struct nfsauth_globals *nfs_auth; /* nfs_auth.c */ + + /* statistic: nfs_stat.c, etc. */ + kstat_named_t *svstat[NFS_VERSMAX + 1]; + kstat_named_t *rfsproccnt[NFS_VERSMAX + 1]; + kstat_named_t *aclproccnt[NFS_VERSMAX + 1]; +} nfs_globals_t; /* * Default delegation setting for the server ==> "on" @@ -872,6 +921,8 @@ extern void rfs_statfs(fhandle_t *, struct nfsstatfs *, struct exportinfo *, extern void *rfs_statfs_getfh(fhandle_t *); extern void rfs_srvrinit(void); extern void rfs_srvrfini(void); +extern void rfs_srv_zone_init(nfs_globals_t *); +extern void rfs_srv_zone_fini(nfs_globals_t *); /* * flags to define path types during Multi Component Lookups @@ -884,6 +935,8 @@ extern void rfs_srvrfini(void); /* index for svstat_ptr */ enum nfs_svccounts {NFS_CALLS, NFS_BADCALLS, NFS_REFERRALS, NFS_REFERLINKS}; +#define NFS_V2 NFS_VERSION + /* function defs for NFS kernel */ extern int nfs_waitfor_purge_complete(vnode_t *); extern int nfs_validate_caches(vnode_t *, cred_t *); @@ -904,7 +957,7 @@ extern int nfs_async_stop_sig(struct vfs *); extern int nfs_clntinit(void); extern void nfs_clntfini(void); extern int nfstsize(void); -extern int nfs_srvinit(void); +extern void nfs_srvinit(void); extern void nfs_srvfini(void); extern int vattr_to_sattr(struct vattr *, struct nfssattr *); extern void setdiropargs(struct nfsdiropargs *, char *, vnode_t *); @@ -942,10 +995,14 @@ extern int nfsauth_access(struct exportinfo *, struct svc_req *, cred_t *, uid_t *, gid_t *, uint_t *, gid_t **); extern void nfsauth_init(void); extern void nfsauth_fini(void); +extern void nfsauth_zone_init(nfs_globals_t *); +extern void nfsauth_zone_fini(nfs_globals_t *); +extern void nfsauth_zone_shutdown(nfs_globals_t *); extern int nfs_setopts(vnode_t *, model_t, struct nfs_args *); extern int nfs_mount_label_policy(vfs_t *, struct netbuf *, struct knetconfig *, cred_t *); extern boolean_t nfs_has_ctty(void); +extern nfs_globals_t *nfs_srv_getzg(void); extern void nfs_srv_stop_all(void); extern void nfs_srv_quiesce_all(void); extern int rfs4_dss_setpaths(char *, size_t); @@ -957,9 +1014,12 @@ extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths; extern kstat_named_t *global_svstat_ptr[]; +extern zone_key_t nfssrv_zone_key; +extern list_t nfssrv_globals_list; +extern krwlock_t nfssrv_globals_rwl; + extern krwlock_t rroklock; extern vtype_t nf_to_vt[]; -extern kstat_named_t *rfsproccnt_v2_ptr; extern kmutex_t nfs_minor_lock; extern int nfs_major; extern int nfs_minor; @@ -975,16 +1035,13 @@ extern int (*nfs_srv_dss_func)(char *, size_t); */ struct nfs_version_stats { kstat_named_t *aclreqcnt_ptr; /* nfs_acl:0:aclreqcnt_v? */ - kstat_named_t *aclproccnt_ptr; /* nfs_acl:0:aclproccnt_v? */ kstat_named_t *rfsreqcnt_ptr; /* nfs:0:rfsreqcnt_v? */ - kstat_named_t *rfsproccnt_ptr; /* nfs:0:rfsproccnt_v? */ }; /* * A bit of asymmetry: nfs:0:nfs_client isn't part of this structure. */ struct nfs_stats { - kstat_named_t *nfs_stats_svstat_ptr[NFS_VERSMAX + 1]; struct nfs_version_stats nfs_stats_v2; struct nfs_version_stats nfs_stats_v3; struct nfs_version_stats nfs_stats_v4; @@ -1001,6 +1058,9 @@ extern zone_key_t nfsstat_zone_key; extern void *nfsstat_zone_init(zoneid_t); extern void nfsstat_zone_fini(zoneid_t, void *); +extern void rfs_stat_zone_init(nfs_globals_t *); +extern void rfs_stat_zone_fini(nfs_globals_t *); + #endif /* _KERNEL */ /* @@ -2248,6 +2308,8 @@ extern void rfs3_commit(COMMIT3args *, COMMIT3res *, struct exportinfo *, extern void *rfs3_commit_getfh(COMMIT3args *); extern void rfs3_srvrinit(void); extern void rfs3_srvrfini(void); +extern void rfs3_srv_zone_init(nfs_globals_t *); +extern void rfs3_srv_zone_fini(nfs_globals_t *); extern int nfs3_validate_caches(vnode_t *, cred_t *); extern void nfs3_cache_post_op_attr(vnode_t *, post_op_attr *, hrtime_t, @@ -2282,7 +2344,6 @@ extern int rfs_cross_mnt(vnode_t **, struct exportinfo **); extern int rfs_climb_crossmnt(vnode_t **, struct exportinfo **, cred_t *); extern vtype_t nf3_to_vt[]; -extern kstat_named_t *rfsproccnt_v3_ptr; extern vfsops_t *nfs3_vfsops; extern struct vnodeops *nfs3_vnodeops; extern const struct fs_operation_def nfs3_vnodeops_template[]; @@ -2312,11 +2373,11 @@ extern bool_t rfs4_check_delegated(int mode, vnode_t *, bool_t trunc); * if no delegation is present. */ extern int rfs4_delegated_getattr(vnode_t *, vattr_t *, int, cred_t *); -extern void rfs4_hold_deleg_policy(void); -extern void rfs4_rele_deleg_policy(void); extern int do_xattr_exists_check(vnode_t *, ulong_t *, cred_t *); +extern int protect_zfs_mntpt(vnode_t *); + extern ts_label_t *nfs_getflabel(vnode_t *, struct exportinfo *); extern boolean_t do_rfs_label_check(bslabel_t *, vnode_t *, int, struct exportinfo *); diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h index 2dee6d22f4..ce09473f95 100644 --- a/usr/src/uts/common/nfs/nfs4.h +++ b/usr/src/uts/common/nfs/nfs4.h @@ -20,12 +20,13 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright 2018 Nexenta Systems, Inc. + * Copyright 2019 Nexenta by DDN, Inc. */ #ifndef _NFS4_H @@ -39,6 +40,7 @@ #ifdef _KERNEL #include <nfs/nfs4_kprot.h> +#include <nfs/nfs4_drc.h> #include <sys/nvpair.h> #else #include <rpcsvc/nfs4_prot.h> @@ -119,7 +121,21 @@ typedef struct { /* opaque entry type for later use */ rfs4_dbe_t *dbe; } *rfs4_entry_t; -extern rfs4_table_t *rfs4_client_tab; +/* + * NFSv4 server state databases + * + * Initialized when the module is loaded and used by NFSv4 state tables. + * These kmem_cache free pools are used globally, the NFSv4 state tables + * which make use of these kmem_cache free pools are per zone. + */ +extern kmem_cache_t *rfs4_client_mem_cache; +extern kmem_cache_t *rfs4_clntIP_mem_cache; +extern kmem_cache_t *rfs4_openown_mem_cache; +extern kmem_cache_t *rfs4_openstID_mem_cache; +extern kmem_cache_t *rfs4_lockstID_mem_cache; +extern kmem_cache_t *rfs4_lockown_mem_cache; +extern kmem_cache_t *rfs4_file_mem_cache; +extern kmem_cache_t *rfs4_delegstID_mem_cache; /* database, table, index creation entry points */ extern rfs4_database_t *rfs4_database_create(uint32_t); @@ -128,6 +144,8 @@ extern void rfs4_database_destroy(rfs4_database_t *); extern void rfs4_database_destroy(rfs4_database_t *); +extern kmem_cache_t *nfs4_init_mem_cache(char *, uint32_t, uint32_t, + uint32_t); extern rfs4_table_t *rfs4_table_create(rfs4_database_t *, char *, time_t, uint32_t, bool_t (*create)(rfs4_entry_t, void *), @@ -369,12 +387,6 @@ typedef struct rfs4_dss_path { char **rfs4_dss_newpaths; uint_t rfs4_dss_numnewpaths; -/* - * Circular doubly-linked list of paths for currently-served RGs. - * No locking required: only changed on warmstart. Managed with insque/remque. - */ -rfs4_dss_path_t *rfs4_dss_pathlist; - /* nvlists of all DSS paths: current, and before last warmstart */ nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths; @@ -740,26 +752,8 @@ typedef struct rfs4_file { krwlock_t rf_file_rwlock; } rfs4_file_t; -extern int rfs4_seen_first_compound; /* set first time we see one */ - -extern rfs4_servinst_t *rfs4_cur_servinst; /* current server instance */ -extern kmutex_t rfs4_servinst_lock; /* protects linked list */ -extern void rfs4_servinst_create(int, int, char **); -extern void rfs4_servinst_destroy_all(void); -extern void rfs4_servinst_assign(rfs4_client_t *, - rfs4_servinst_t *); -extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *); -extern int rfs4_clnt_in_grace(rfs4_client_t *); -extern int rfs4_servinst_in_grace(rfs4_servinst_t *); -extern int rfs4_servinst_grace_new(rfs4_servinst_t *); -extern void rfs4_grace_start(rfs4_servinst_t *); -extern void rfs4_grace_start_new(void); -extern void rfs4_grace_reset_all(void); -extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *); -extern void rfs4_dss_readstate(int, char **); - /* - * rfs4_deleg_policy is used to signify the server's global delegation + * nfs4_deleg_policy is used to signify the server's global delegation * policy. The default is to NEVER delegate files and the * administrator must configure the server to enable delegations. * @@ -771,8 +765,6 @@ typedef enum { SRV_NORMAL_DELEGATE = 1 } srv_deleg_policy_t; -extern srv_deleg_policy_t rfs4_deleg_policy; -extern kmutex_t rfs4_deleg_lock; extern void rfs4_disable_delegation(void), rfs4_enable_delegation(void); /* @@ -790,11 +782,124 @@ typedef enum { #define NFS4_DELEG4TYPE2REQTYPE(x) (delegreq_t)(x) /* + * Zone global variables of NFSv4 server + */ +typedef struct nfs4_srv { + /* Unique write verifier */ + verifier4 write4verf; + /* Delegation lock */ + kmutex_t deleg_lock; + /* Used to serialize create/destroy of nfs4_server_state database */ + kmutex_t state_lock; + rfs4_database_t *nfs4_server_state; + /* Used to manage access to server instance linked list */ + kmutex_t servinst_lock; + rfs4_servinst_t *nfs4_cur_servinst; + /* Used to manage access to nfs4_deleg_policy */ + krwlock_t deleg_policy_lock; + srv_deleg_policy_t nfs4_deleg_policy; + /* Set first time we see one */ + int seen_first_compound; + /* + * Circular double-linked list of paths for currently-served RGs. + * No locking required -- only changed on server start. + * Managed with insque/remque. + */ + rfs4_dss_path_t *dss_pathlist; + /* Duplicate request cache */ + rfs4_drc_t *nfs4_drc; + /* nfsv4 server start time */ + time_t rfs4_start_time; + /* Used to serialize lookups of clientids */ + krwlock_t rfs4_findclient_lock; + + /* NFSv4 server state client tables */ + /* table expiry times */ + time_t rfs4_client_cache_time; + time_t rfs4_openowner_cache_time; + time_t rfs4_state_cache_time; + time_t rfs4_lo_state_cache_time; + time_t rfs4_lockowner_cache_time; + time_t rfs4_file_cache_time; + time_t rfs4_deleg_state_cache_time; + time_t rfs4_clntip_cache_time; + /* tables and indexes */ + /* client table */ + rfs4_table_t *rfs4_client_tab; + rfs4_index_t *rfs4_clientid_idx; + rfs4_index_t *rfs4_nfsclnt_idx; + /* client IP table */ + rfs4_table_t *rfs4_clntip_tab; + rfs4_index_t *rfs4_clntip_idx; + /* Open Owner table */ + rfs4_table_t *rfs4_openowner_tab; + rfs4_index_t *rfs4_openowner_idx; + /* Open State ID table */ + rfs4_table_t *rfs4_state_tab; + rfs4_index_t *rfs4_state_idx; + rfs4_index_t *rfs4_state_owner_file_idx; + rfs4_index_t *rfs4_state_file_idx; + /* Lock State ID table */ + rfs4_table_t *rfs4_lo_state_tab; + rfs4_index_t *rfs4_lo_state_idx; + rfs4_index_t *rfs4_lo_state_owner_idx; + /* Lock owner table */ + rfs4_table_t *rfs4_lockowner_tab; + rfs4_index_t *rfs4_lockowner_idx; + rfs4_index_t *rfs4_lockowner_pid_idx; + /* File table */ + rfs4_table_t *rfs4_file_tab; + rfs4_index_t *rfs4_file_idx; + /* Deleg State table */ + rfs4_table_t *rfs4_deleg_state_tab; + rfs4_index_t *rfs4_deleg_idx; + rfs4_index_t *rfs4_deleg_state_idx; + + /* client stable storage */ + int rfs4_ss_enabled; +} nfs4_srv_t; + +/* + * max length of the NFSv4 server database name + */ +#define RFS4_MAX_MEM_CACHE_NAME 48 + +/* + * global NFSv4 server kmem caches + * r_db_name - The name of the state database and the table that will use it + * These tables are defined in nfs4_srv_t + * r_db_mem_cache - The kmem cache associated with the state database name + */ +typedef struct rfs4_db_mem_cache { + char r_db_name[RFS4_MAX_MEM_CACHE_NAME]; + kmem_cache_t *r_db_mem_cache; +} rfs4_db_mem_cache_t; + +#define RFS4_DB_MEM_CACHE_NUM 8 + +rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM]; + + +extern srv_deleg_policy_t nfs4_get_deleg_policy(); + +extern void rfs4_servinst_create(nfs4_srv_t *, int, int, char **); +extern void rfs4_servinst_destroy_all(nfs4_srv_t *); +extern void rfs4_servinst_assign(nfs4_srv_t *, rfs4_client_t *, + rfs4_servinst_t *); +extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *); +extern int rfs4_clnt_in_grace(rfs4_client_t *); +extern int rfs4_servinst_in_grace(rfs4_servinst_t *); +extern int rfs4_servinst_grace_new(rfs4_servinst_t *); +extern void rfs4_grace_start(rfs4_servinst_t *); +extern void rfs4_grace_start_new(nfs4_srv_t *); +extern void rfs4_grace_reset_all(nfs4_srv_t *); +extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *); +extern void rfs4_dss_readstate(nfs4_srv_t *, int, char **); + +/* * Various interfaces to manipulate the state structures introduced * above */ -extern kmutex_t rfs4_state_lock; -extern void rfs4_clean_state_exi(struct exportinfo *exi); extern void rfs4_free_reply(nfs_resop4 *); extern void rfs4_copy_reply(nfs_resop4 *, nfs_resop4 *); @@ -946,7 +1051,10 @@ extern fem_t *deleg_wrops; extern int rfs4_share(rfs4_state_t *, uint32_t, uint32_t); extern int rfs4_unshare(rfs4_state_t *); -extern void rfs4_set_deleg_policy(srv_deleg_policy_t); +extern void rfs4_set_deleg_policy(nfs4_srv_t *, srv_deleg_policy_t); +extern void rfs4_hold_deleg_policy(nfs4_srv_t *); +extern void rfs4_rele_deleg_policy(nfs4_srv_t *); + #ifdef DEBUG #define NFS4_DEBUG(var, args) if (var) cmn_err args @@ -1348,7 +1456,6 @@ extern vtype_t nf4_to_vt[]; extern struct nfs4_ntov_map nfs4_ntov_map[]; extern uint_t nfs4_ntov_map_size; -extern kstat_named_t *rfsproccnt_v4_ptr; extern struct vfsops *nfs4_vfsops; extern struct vnodeops *nfs4_vnodeops; extern const struct fs_operation_def nfs4_vnodeops_template[]; @@ -1377,15 +1484,21 @@ extern stateid4 clnt_special1; * The NFS Version 4 service procedures. */ +extern void rfs4_do_server_start(int, int, int); extern void rfs4_compound(COMPOUND4args *, COMPOUND4res *, struct exportinfo *, struct svc_req *, cred_t *, int *); extern void rfs4_compound_free(COMPOUND4res *); extern void rfs4_compound_flagproc(COMPOUND4args *, int *); -extern int rfs4_srvrinit(void); +extern void rfs4_srvrinit(void); extern void rfs4_srvrfini(void); -extern void rfs4_state_init(void); -extern void rfs4_state_fini(void); +extern void rfs4_srv_zone_init(nfs_globals_t *); +extern void rfs4_srv_zone_fini(nfs_globals_t *); +extern void rfs4_state_g_init(void); +extern void rfs4_state_zone_init(nfs4_srv_t *); +extern void rfs4_state_g_fini(void); +extern void rfs4_state_zone_fini(void); +extern nfs4_srv_t *nfs4_get_srv(void); #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/nfs/nfs4_drc.h b/usr/src/uts/common/nfs/nfs4_drc.h index a77fb60818..f42867d197 100644 --- a/usr/src/uts/common/nfs/nfs4_drc.h +++ b/usr/src/uts/common/nfs/nfs4_drc.h @@ -18,16 +18,19 @@ * * CDDL HEADER END */ + /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS4_DRC_H #define _NFS4_DRC_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -36,26 +39,26 @@ extern "C" { * NFSv4 Duplicate Request cache. */ typedef struct rfs4_drc { - kmutex_t lock; + kmutex_t lock; uint32_t dr_hash; - uint32_t max_size; - uint32_t in_use; + uint32_t max_size; + uint32_t in_use; list_t dr_cache; - list_t *dr_buckets; + list_t *dr_buckets; } rfs4_drc_t; /* * NFSv4 Duplicate request cache entry. */ typedef struct rfs4_dupreq { - list_node_t dr_bkt_next; + list_node_t dr_bkt_next; list_node_t dr_next; list_t *dr_bkt; rfs4_drc_t *drc; int dr_state; uint32_t dr_xid; struct netbuf dr_addr; - COMPOUND4res dr_res; + COMPOUND4res dr_res; } rfs4_dupreq_t; /* @@ -67,15 +70,14 @@ typedef struct rfs4_dupreq { #define NFS4_DUP_PENDING 2 #define NFS4_DUP_FREE 3 -#define NFS4_DUP_REPLAY 4 +#define NFS4_DUP_REPLAY 4 #define NFS4_DUP_INUSE 5 -extern rfs4_drc_t *nfs4_drc; extern uint32_t nfs4_drc_max; extern uint32_t nfs4_drc_hash; rfs4_drc_t *rfs4_init_drc(uint32_t, uint32_t); -void rfs4_fini_drc(rfs4_drc_t *); +void rfs4_fini_drc(void); void rfs4_dr_chstate(rfs4_dupreq_t *, int); rfs4_dupreq_t *rfs4_alloc_dr(rfs4_drc_t *); int rfs4_find_dr(struct svc_req *, rfs4_drc_t *, rfs4_dupreq_t **); diff --git a/usr/src/uts/common/nfs/nfs_acl.h b/usr/src/uts/common/nfs/nfs_acl.h index a9dd2e3635..f3ab476467 100644 --- a/usr/src/uts/common/nfs/nfs_acl.h +++ b/usr/src/uts/common/nfs/nfs_acl.h @@ -379,8 +379,6 @@ extern void nfs_acl_free(vsecattr_t *); #ifdef _KERNEL /* server and client data structures */ -extern kstat_named_t *aclproccnt_v2_ptr; -extern kstat_named_t *aclproccnt_v3_ptr; extern char *aclnames_v2[]; extern uchar_t acl_call_type_v2[]; diff --git a/usr/src/uts/common/nfs/nfs_cmd.h b/usr/src/uts/common/nfs/nfs_cmd.h index fe1dda528f..feb2e5a9fc 100644 --- a/usr/src/uts/common/nfs/nfs_cmd.h +++ b/usr/src/uts/common/nfs/nfs_cmd.h @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -26,6 +27,10 @@ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS_CMD_H #define _NFS_CMD_H @@ -80,6 +85,7 @@ extern struct charset_cache *nfscmd_findmap(struct exportinfo *, struct sockaddr *); extern char *nfscmd_convname(struct sockaddr *, struct exportinfo *, char *, int, size_t); + extern char *nfscmd_convdirent(struct sockaddr *, struct exportinfo *, char *, size_t, enum nfsstat3 *); extern size_t nfscmd_convdirplus(struct sockaddr *, struct exportinfo *, char *, @@ -87,6 +93,9 @@ extern size_t nfscmd_convdirplus(struct sockaddr *, struct exportinfo *, char *, extern size_t nfscmd_countents(char *, size_t); extern size_t nfscmd_dropped_entrysize(struct dirent64 *, size_t, size_t); +extern void nfscmd_init(void); +extern void nfscmd_fini(void); + #endif #ifdef __cplusplus diff --git a/usr/src/uts/common/nfs/nfs_dispatch.h b/usr/src/uts/common/nfs/nfs_dispatch.h index 16475fea47..d1757fdb28 100644 --- a/usr/src/uts/common/nfs/nfs_dispatch.h +++ b/usr/src/uts/common/nfs/nfs_dispatch.h @@ -73,7 +73,6 @@ typedef struct rpcdisp { typedef struct rpc_disptable { int dis_nprocs; char **dis_procnames; - kstat_named_t **dis_proccntp; struct rpcdisp *dis_table; } rpc_disptable_t; diff --git a/usr/src/uts/common/nfs/nfs_log.h b/usr/src/uts/common/nfs/nfs_log.h index ff0f38a455..2bb90b37af 100644 --- a/usr/src/uts/common/nfs/nfs_log.h +++ b/usr/src/uts/common/nfs/nfs_log.h @@ -19,16 +19,19 @@ * * CDDL HEADER END */ + /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _NFS_LOG_H #define _NFS_LOG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -654,7 +657,7 @@ extern void nfslog_dprint(const int, const char *fmt, ...) extern void *nfslog_record_alloc(struct exportinfo *, int, void **, int); extern void nfslog_record_free(void *, void *, size_t); -extern struct exportinfo *nfslog_get_exi(struct exportinfo *, +extern struct exportinfo *nfslog_get_exi(nfs_export_t *, struct exportinfo *, struct svc_req *, caddr_t, unsigned int *); extern void nfslog_write_record(struct exportinfo *, struct svc_req *, caddr_t, caddr_t, cred_t *, struct netbuf *, unsigned int, diff --git a/usr/src/uts/common/sharefs/sharefs.h b/usr/src/uts/common/sharefs/sharefs.h index 3587504c5e..d222227cb8 100644 --- a/usr/src/uts/common/sharefs/sharefs.h +++ b/usr/src/uts/common/sharefs/sharefs.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright 2018 Nexenta Systems, Inc. + */ + #ifndef _SHAREFS_SHAREFS_H #define _SHAREFS_SHAREFS_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This header provides service for the sharefs module. */ @@ -67,24 +69,27 @@ typedef struct sharefs_vfs { #define SHAREFS_NAME_MAX MAXNAMELEN -/* - * The lock ordering whenever sharefs_lock and sharetab_lock both - * need to be held is: sharefs_lock and then sharetab_lock. - */ -extern krwlock_t sharefs_lock; /* lock for the vnode ops */ -extern sharetab_t *sharefs_sharetab; /* The sharetab. */ +typedef struct sharetab_globals { + /* + * The lock ordering whenever sharefs_lock and sharetab_lock both + * need to be held is: sharefs_lock and then sharetab_lock. + */ + krwlock_t sharefs_lock; /* lock for the vnode ops */ + sharetab_t *sharefs_sharetab; /* The sharetab. */ -extern uint_t sharetab_count; /* How many shares? */ -extern krwlock_t sharetab_lock; /* lock for the cached sharetab */ -extern size_t sharetab_size; /* How big is the sharetab file? */ + uint_t sharetab_count; /* How many shares? */ + krwlock_t sharetab_lock; /* lock for the cached sharetab */ + size_t sharetab_size; /* How big is the sharetab file? */ -extern timestruc_t sharetab_mtime; /* Last mod to sharetab */ -extern timestruc_t sharetab_snap_time; /* Last snap */ -extern uint_t sharetab_generation; /* Which copy is it? */ + timestruc_t sharetab_mtime; /* Last mod to sharetab */ + timestruc_t sharetab_snap_time; /* Last snap */ + uint_t sharetab_generation; /* Which copy is it? */ +} sharetab_globals_t; #define SHAREFS_INO_FILE 0x80 extern vnode_t *sharefs_create_root_file(vfs_t *); +extern sharetab_globals_t *sharetab_get_globals(zone_t *zone); /* * Sharetab file diff --git a/usr/src/uts/common/sys/sdt.h b/usr/src/uts/common/sys/sdt.h index 1535734a5c..d0f423ca8b 100644 --- a/usr/src/uts/common/sys/sdt.h +++ b/usr/src/uts/common/sys/sdt.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. - * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright 2018 Nexenta Systems, Inc. All rights reserved. */ #ifndef _SYS_SDT_H @@ -231,11 +231,17 @@ extern "C" { type3, arg3) \ DTRACE_PROBE3(__nfsv3_##name, type1, arg1, type2, arg2, \ type3, arg3); + #define DTRACE_NFSV3_4(name, type1, arg1, type2, arg2, \ type3, arg3, type4, arg4) \ DTRACE_PROBE4(__nfsv3_##name, type1, arg1, type2, arg2, \ type3, arg3, type4, arg4); +#define DTRACE_NFSV3_5(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5) \ + DTRACE_PROBE5(__nfsv3_##name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5); + #define DTRACE_NFSV4_1(name, type1, arg1) \ DTRACE_PROBE1(__nfsv4_##name, type1, arg1); diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 9eeb9a0db8..15b416429e 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -18,11 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Joyent, Inc. - * Copyright 2019 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright 2019 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Joyent, Inc. */ #ifndef _SYS_ZONE_H @@ -639,7 +640,6 @@ typedef struct zone { */ #define ZONE_PS_INVAL PS_MYID - extern zone_t zone0; extern zone_t *global_zone; extern uint_t maxzones; @@ -781,6 +781,26 @@ struct zsd_entry { #define ZONE_SPECIALPID(x) ((x) == 0 || (x) == 1) /* + * A root vnode of the current zone. + * + * NOTE: It may be necessary (initialization time for file sharing where an + * NGZ loads a file-sharing kernel module that does zsd initialization) to NOT + * use this macro. One should ASSERT() that curzone == active ZSD (an + * ASSERTion that's not always true at ZSD initialization time) during regular + * use of this macro. + */ +#define ZONE_ROOTVP() (curzone->zone_rootvp) + +/* + * Since a zone's root isn't necessarily an actual filesystem boundary + * (i.e. VROOT may not be set on zone->zone_rootvp) we need to not assume it. + * This macro helps in checking if a vnode is the current zone's rootvp. + * NOTE: Using the VN_ prefix, even though it's defined here in zone.h. + * NOTE2: See above warning about ZONE_ROOTVP(). + */ +#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP())) + +/* * Zone-safe version of thread_create() to be used when the caller wants to * create a kernel thread to run within the current zone's context. */ |