diff options
author | Dan Kruchinin <dan.kruchinin@nexenta.com> | 2013-08-26 20:42:32 -0800 |
---|---|---|
committer | Christopher Siden <chris.siden@delphix.com> | 2013-08-26 21:42:32 -0700 |
commit | bbaa8b60dd95d714741fc474adad3cf710ef4efd (patch) | |
tree | 996c4e12a768ed7d389f5a56b8605318955067b3 | |
parent | 69962b5647e4a8b9b14998733b765925381b727e (diff) | |
download | illumos-gate-bbaa8b60dd95d714741fc474adad3cf710ef4efd.tar.gz |
195 Need replacement for nfs/lockd+klm
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Jeremy Jones <jeremy@delphix.com>
Reviewed by: Jeff Biseda <jbiseda@delphix.com>
Approved by: Garrett D'Amore <garrett@damore.org>
48 files changed, 11165 insertions, 386 deletions
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_nlm.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_nlm.c index 2834788a8c..45c23ee116 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_nlm.c +++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_nlm.c @@ -24,6 +24,11 @@ * All rights reserved. */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #include <sys/types.h> #include <setjmp.h> #include <string.h> @@ -818,17 +823,17 @@ interpret_nlm_4(flags, type, xid, vers, proc, data, len) procnames_short_4[proc]); line += strlen(line); switch (proc) { - case NLMPROC4_TEST: - case NLMPROC4_GRANTED: - case NLMPROC4_TEST_MSG: - case NLMPROC4_GRANTED_MSG: + case NLM4_TEST: + case NLM4_GRANTED: + case NLM4_TEST_MSG: + case NLM4_GRANTED_MSG: /* testargs */ (void) strcat(line, sum_netobj("OH")); (void) getxdr_bool(); /* Excl */ (void) strcat(line, sum_lock4()); break; - case NLMPROC4_LOCK: - case NLMPROC4_LOCK_MSG: + case NLM4_LOCK: + case NLM4_LOCK_MSG: /* lockargs */ (void) strcat(line, sum_netobj("OH")); (void) getxdr_bool(); /* Block */ @@ -836,43 +841,43 @@ interpret_nlm_4(flags, type, xid, vers, proc, data, len) (void) strcat(line, sum_lock4()); /* ignore reclaim, state fields */ break; - case NLMPROC4_CANCEL: - case NLMPROC4_CANCEL_MSG: + case NLM4_CANCEL: + case NLM4_CANCEL_MSG: /* cancargs */ (void) strcat(line, sum_netobj("OH")); (void) getxdr_bool(); /* Block */ (void) getxdr_bool(); /* Excl */ (void) strcat(line, sum_lock4()); break; - case NLMPROC4_UNLOCK: - case NLMPROC4_UNLOCK_MSG: + case NLM4_UNLOCK: + case NLM4_UNLOCK_MSG: /* unlockargs */ (void) strcat(line, sum_netobj("OH")); (void) strcat(line, sum_lock4()); break; - case NLMPROC4_TEST_RES: + case NLM4_TEST_RES: /* testres */ (void) strcat(line, sum_netobj("OH")); (void) strcat(line, " "); (void) strcat(line, nameof_stat4(getxdr_u_long())); break; - case NLMPROC4_LOCK_RES: - case NLMPROC4_CANCEL_RES: - case NLMPROC4_UNLOCK_RES: - case NLMPROC4_GRANTED_RES: + case NLM4_LOCK_RES: + case NLM4_CANCEL_RES: + case NLM4_UNLOCK_RES: + case NLM4_GRANTED_RES: /* res */ (void) strcat(line, sum_netobj("OH")); (void) strcat(line, " "); (void) strcat(line, nameof_stat4(getxdr_u_long())); break; - case NLMPROC4_SHARE: - case NLMPROC4_UNSHARE: + case NLM4_SHARE: + case NLM4_UNSHARE: (void) strcat(line, sum_netobj("OH")); (void) strcat(line, sum_share()); break; - case NLMPROC4_NM_LOCK: + case NLM4_NM_LOCK: /* lockargs */ skip_netobj(); /* Cookie */ (void) getxdr_bool(); /* Block */ @@ -880,7 +885,7 @@ interpret_nlm_4(flags, type, xid, vers, proc, data, len) (void) strcat(line, sum_lock4()); /* skip reclaim & state fields */ break; - case NLMPROC4_FREE_ALL: + case NLM4_FREE_ALL: (void) sprintf(line, " %s", sum_notify()); break; @@ -891,33 +896,33 @@ interpret_nlm_4(flags, type, xid, vers, proc, data, len) procnames_short_4[proc]); line += strlen(line); switch (proc) { - case NLMPROC4_TEST: + case NLM4_TEST: /* testres */ (void) strcat(line, sum_netobj("OH")); (void) strcat(line, " "); (void) strcat(line, nameof_stat4(getxdr_u_long())); break; - case NLMPROC4_LOCK: - case NLMPROC4_CANCEL: - case NLMPROC4_UNLOCK: - case NLMPROC4_GRANTED: - case NLMPROC4_NM_LOCK: + case NLM4_LOCK: + case NLM4_CANCEL: + case NLM4_UNLOCK: + case NLM4_GRANTED: + case NLM4_NM_LOCK: /* res */ (void) strcat(line, sum_netobj("OH")); (void) strcat(line, " "); (void) strcat(line, nameof_stat4(getxdr_u_long())); break; - case NLMPROC4_SHARE: - case NLMPROC4_UNSHARE: + case NLM4_SHARE: + case NLM4_UNSHARE: /* shareres */ pl = sum_netobj("OH"); i = getxdr_u_long(); sprintf(line, "%s %s %ld", pl, nameof_stat4(i), getxdr_long()); break; - case NLMPROC4_FREE_ALL: + case NLM4_FREE_ALL: break; } } @@ -934,64 +939,64 @@ interpret_nlm_4(flags, type, xid, vers, proc, data, len) proc, procnames_long_4[proc]); if (type == CALL) { switch (proc) { - case NLMPROC4_TEST: - case NLMPROC4_GRANTED: - case NLMPROC4_TEST_MSG: - case NLMPROC4_GRANTED_MSG: + case NLM4_TEST: + case NLM4_GRANTED: + case NLM4_TEST_MSG: + case NLM4_GRANTED_MSG: show_testargs4(); break; - case NLMPROC4_LOCK: - case NLMPROC4_LOCK_MSG: - case NLMPROC4_NM_LOCK: + case NLM4_LOCK: + case NLM4_LOCK_MSG: + case NLM4_NM_LOCK: show_lockargs4(); break; - case NLMPROC4_CANCEL: - case NLMPROC4_CANCEL_MSG: + case NLM4_CANCEL: + case NLM4_CANCEL_MSG: show_cancargs4(); break; - case NLMPROC4_UNLOCK: - case NLMPROC4_UNLOCK_MSG: + case NLM4_UNLOCK: + case NLM4_UNLOCK_MSG: show_unlockargs4(); break; - case NLMPROC4_TEST_RES: + case NLM4_TEST_RES: show_testres4(); break; - case NLMPROC4_LOCK_RES: - case NLMPROC4_CANCEL_RES: - case NLMPROC4_UNLOCK_RES: - case NLMPROC4_GRANTED_RES: + case NLM4_LOCK_RES: + case NLM4_CANCEL_RES: + case NLM4_UNLOCK_RES: + case NLM4_GRANTED_RES: show_res4(); break; - case NLMPROC4_SHARE: - case NLMPROC4_UNSHARE: + case NLM4_SHARE: + case NLM4_UNSHARE: show_shareargs(); break; - case NLMPROC4_FREE_ALL: + case NLM4_FREE_ALL: show_notify(); break; } } else { switch (proc) { - case NLMPROC4_TEST: + case NLM4_TEST: show_testres4(); break; - case NLMPROC4_LOCK: - case NLMPROC4_CANCEL: - case NLMPROC4_UNLOCK: - case NLMPROC4_GRANTED: + case NLM4_LOCK: + case NLM4_CANCEL: + case NLM4_UNLOCK: + case NLM4_GRANTED: case NLM_NM_LOCK: show_res4(); break; - case NLMPROC4_TEST_MSG: - case NLMPROC4_LOCK_MSG: - case NLMPROC4_CANCEL_MSG: - case NLMPROC4_UNLOCK_MSG: - case NLMPROC4_GRANTED_MSG: - case NLMPROC4_TEST_RES: - case NLMPROC4_LOCK_RES: - case NLMPROC4_CANCEL_RES: - case NLMPROC4_UNLOCK_RES: - case NLMPROC4_GRANTED_RES: + case NLM4_TEST_MSG: + case NLM4_LOCK_MSG: + case NLM4_CANCEL_MSG: + case NLM4_UNLOCK_MSG: + case NLM4_GRANTED_MSG: + case NLM4_TEST_RES: + case NLM4_LOCK_RES: + case NLM4_CANCEL_RES: + case NLM4_UNLOCK_RES: + case NLM4_GRANTED_RES: break; case NLM_SHARE: case NLM_UNSHARE: @@ -1082,16 +1087,16 @@ nameof_stat4(s) ulong_t s; { switch ((enum nlm4_stats) s) { - case NLM4_GRANTED: return ("granted"); - case NLM4_DENIED: return ("denied"); - case NLM4_DENIED_NOLOCKS:return ("denied (no locks)"); - case NLM4_BLOCKED: return ("blocked"); - case NLM4_DENIED_GRACE_PERIOD: return ("denied (grace period)"); - case NLM4_DEADLCK: return ("deadlock"); - case NLM4_ROFS: return ("read-only fs"); - case NLM4_STALE_FH: return ("stale fh"); - case NLM4_FBIG: return ("file too big"); - case NLM4_FAILED: return ("failed"); + case nlm4_granted: return ("granted"); + case nlm4_denied: return ("denied"); + case nlm4_denied_nolocks:return ("denied (no locks)"); + case nlm4_blocked: return ("blocked"); + case nlm4_denied_grace_period: return ("denied (grace period)"); + case nlm4_deadlck: return ("deadlock"); + case nlm4_rofs: return ("read-only fs"); + case nlm4_stale_fh: return ("stale fh"); + case nlm4_fbig: return ("file too big"); + case nlm4_failed: return ("failed"); default: return ("?"); } } diff --git a/usr/src/cmd/fs.d/nfs/Makefile b/usr/src/cmd/fs.d/nfs/Makefile index 4687ae9d04..20b6690023 100644 --- a/usr/src/cmd/fs.d/nfs/Makefile +++ b/usr/src/cmd/fs.d/nfs/Makefile @@ -34,7 +34,9 @@ include $(SRC)/Makefile.master SUBDIR1= exportfs nfsd rquotad \ statd nfsstat mountd dfshares \ nfsfind nfs4cbd share -SUBDIR2= clear_locks umount showmount \ + +# These do "make catalog" +SUBDIR2= clear_locks lockd umount showmount \ mount dfmounts nfslog nfsmapid \ nfsref rp_basic @@ -46,11 +48,6 @@ SUBDIRS= $(SUBDIR1) $(SUBDIR2) $(SUBDIR3) POFILES= $(SUBDIR2:%=%/%.po) POFILE= nfs.po -LOCKD= $(CLOSED)/cmd/fs.d/nfs/lockd -$(CLOSED_BUILD)CLOSED_SUBDIR2= $(LOCKD) -$(CLOSED_BUILD)POFILES += $(LOCKD)/lockd.po -$(CLOSED_BUILD)SUBDIRS += $(CLOSED_SUBDIR2) - all:= TARGET= all install:= TARGET= install clean:= TARGET= clean @@ -64,7 +61,7 @@ catalog:= TARGET= catalog all install clean clobber lint: $(SUBDIRS) -catalog: $(SUBDIR2) $(CLOSED_SUBDIR2) +catalog: $(SUBDIR2) $(RM) $(POFILE) cat $(POFILES) > $(POFILE) diff --git a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c index dc1e2b3b57..d14bb0329c 100644 --- a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c +++ b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c @@ -18,11 +18,11 @@ * * CDDL HEADER END */ + /* * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. - */ -/* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ @@ -1698,9 +1698,9 @@ bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr, * to all-ones. The port number part of the mask is zeroes. */ static int -set_addrmask(fd, nconf, mask) - struct netconfig *nconf; - struct netbuf *mask; +set_addrmask(int fd, + struct netconfig *nconf, + struct netbuf *mask) { struct t_info info; @@ -1725,8 +1725,7 @@ set_addrmask(fd, nconf, mask) return (0); } - syslog(LOG_ERR, "set_addrmask: address size: %ld", - info.addr); + syslog(LOG_ERR, "set_addrmask: address size: %ld", info.addr); return (-1); } @@ -1743,17 +1742,17 @@ set_addrmask(fd, nconf, mask) */ /* LINTED pointer alignment */ ((struct sockaddr_in *)mask->buf)->sin_addr.s_addr = - (ulong_t)~0; + (ulong_t)~0; /* LINTED pointer alignment */ ((struct sockaddr_in *)mask->buf)->sin_family = - (ushort_t)~0; + (ushort_t)~0; } else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) { /* LINTED pointer alignment */ (void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr, - (uchar_t)~0, sizeof (struct in6_addr)); + (uchar_t)~0, sizeof (struct in6_addr)); /* LINTED pointer alignment */ ((struct sockaddr_in6 *)mask->buf)->sin6_family = - (ushort_t)~0; + (ushort_t)~0; } else { /* diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c index 76576add59..ba2420362a 100644 --- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c +++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c @@ -18,8 +18,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ #include <stdio.h> #include <stdlib.h> @@ -358,6 +360,27 @@ nfs_smf_get_prop(char *prop_name, char *propbuf, char *instance, instance, sctype, svc_name, bufsz)); } +/* Get an integer (base 10) property */ +int +nfs_smf_get_iprop(char *prop_name, int *rvp, char *instance, + scf_type_t sctype, char *svc_name) +{ + char propbuf[32]; + int bufsz, rc, val; + + bufsz = sizeof (propbuf); + rc = fs_smf_get_prop(NFS_SMF, prop_name, propbuf, + instance, sctype, svc_name, &bufsz); + if (rc != SA_OK) + return (rc); + errno = 0; + val = strtol(propbuf, NULL, 10); + if (errno != 0) + return (SA_BAD_VALUE); + *rvp = val; + return (SA_OK); +} + int nfs_smf_set_prop(char *prop_name, char *value, char *instance, scf_type_t type, char *svc_name) diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h index 46855ab8ed..c06327d801 100644 --- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h +++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h @@ -90,6 +90,7 @@ typedef struct fs_smfhandle { #define SMF_NO_PERMISSION 2 #define SMF_NO_PGTYPE 3 +extern int nfs_smf_get_iprop(char *, int *, char *, scf_type_t, char *); extern int nfs_smf_get_prop(char *, char *, char *, scf_type_t, char *, int *); extern int fs_smf_get_prop(smf_fstype_t, char *, char *, char *, scf_type_t, char *, int *); diff --git a/usr/src/cmd/fs.d/nfs/lockd/Makefile b/usr/src/cmd/fs.d/nfs/lockd/Makefile new file mode 100644 index 0000000000..ce725cd336 --- /dev/null +++ b/usr/src/cmd/fs.d/nfs/lockd/Makefile @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2012 by Delphix. All rights reserved. +# + +FSTYPE = nfs +TYPEPROG = lockd +ATTMK = $(TYPEPROG) + +include ../../Makefile.fstype + +LOCAL = lockd.o +OBJS = $(LOCAL) daemon.o nfs_tbind.o smfcfg.o thrpool.o + +POFILE = lockd.po + +SRCS = $(LOCAL:%.o=%.c) ../lib/daemon.c ../lib/nfs_tbind.c \ + ../lib/smfcfg.c ../lib/thrpool.c +LDLIBS += -lnsl -lscf +CPPFLAGS += -I../lib +C99MODE = $(C99_ENABLE) + +CERRWARN += -_gcc=-Wno-parentheses +CERRWARN += -_gcc=-Wno-switch +CERRWARN += -_gcc=-Wno-unused-variable +CERRWARN += -_gcc=-Wno-uninitialized + +$(TYPEPROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +lockd.o: lockd.c + $(COMPILE.c) lockd.c + +nfs_tbind.o: ../lib/nfs_tbind.c + $(COMPILE.c) ../lib/nfs_tbind.c + +thrpool.o: ../lib/thrpool.c + $(COMPILE.c) ../lib/thrpool.c + +daemon.o: ../lib/daemon.c + $(COMPILE.c) ../lib/daemon.c + +smfcfg.o: ../lib/smfcfg.c + $(COMPILE.c) ../lib/smfcfg.c + +# +# message catalog +# +catalog: $(POFILE) + +$(POFILE): $(SRCS) + $(RM) $@ + $(COMPILE.cpp) $(SRCS) > $(POFILE).i + $(XGETTEXT) $(XGETFLAGS) $(POFILE).i + sed "/^domain/d" messages.po > $@ + $(RM) $(POFILE).i messages.po + +lint: + $(LINT.c) $(SRCS) $(LDLIBS) + +clean: + $(RM) $(OBJS) $(DOBJ) diff --git a/usr/src/cmd/fs.d/nfs/lockd/lockd.c b/usr/src/cmd/fs.d/nfs/lockd/lockd.c new file mode 100644 index 0000000000..2fa73e73e3 --- /dev/null +++ b/usr/src/cmd/fs.d/nfs/lockd/lockd.c @@ -0,0 +1,550 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +/* + * NLM server + * + * Most of this copied from ../nfsd/nfsd.c + * and then s:NFS:NLM: applied, etc. + */ + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <syslog.h> +#include <tiuser.h> +#include <rpc/rpc.h> +#include <errno.h> +#include <thread.h> +#include <sys/time.h> +#include <sys/file.h> +#include <nfs/nfs.h> +#include <nfs/nfssys.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <signal.h> +#include <netconfig.h> +#include <netdir.h> +#include <string.h> +#include <unistd.h> +#include <stropts.h> +#include <sys/tihdr.h> +#include <poll.h> +#include <priv_utils.h> +#include <sys/tiuser.h> +#include <netinet/tcp.h> +#include <deflt.h> +#include <rpcsvc/daemon_utils.h> +#include <rpcsvc/nlm_prot.h> +#include <libintl.h> +#include <libscf.h> +#include <libshare.h> +#include "nfs_tbind.h" +#include "thrpool.h" +#include "smfcfg.h" + +/* Option defaults. See nfssys.h */ +struct lm_svc_args lmargs = { + .version = LM_SVC_CUR_VERS, + /* fd, n_fmly, n_proto, n_rdev (below) */ + .debug = 0, + .timout = 5 * 60, + .grace = 60, + .retransmittimeout = 15 +}; +int max_servers = 20; + + +#define RET_OK 0 /* return code for no error */ +#define RET_ERR 33 /* return code for error(s) */ + +static int nlmsvc(int fd, struct netbuf addrmask, + struct netconfig *nconf); +static int nlmsvcpool(int max_servers); +static void usage(void); + +extern int _nfssys(int, void *); +static void sigterm_handler(void); +static void shutdown_lockd(void); + +extern int daemonize_init(void); +extern void daemonize_fini(int fd); + +static char *MyName; + +/* + * We want to bind to these TLI providers, and in this order, + * because the kernel NLM needs the loopback first for its + * initialization. (It uses it to talk to statd.) + */ +static NETSELDECL(defaultproviders)[] = { + "/dev/ticotsord", + "/dev/tcp", + "/dev/udp", + "/dev/tcp6", + "/dev/udp6", + NULL +}; + +/* + * The following are all globals used by routines in nfs_tbind.c. + */ +size_t end_listen_fds; /* used by conn_close_oldest() */ +size_t num_fds = 0; /* used by multiple routines */ +int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ +int (*Mysvc)(int, struct netbuf, struct netconfig *) = nlmsvc; + /* used by cots_listen_event() */ +int max_conns_allowed = -1; /* used by cots_listen_event() */ + +int +main(int ac, char *av[]) +{ + char *propname = NULL; + char *dir = "/"; + char *provider = (char *)NULL; + struct protob *protobp; + NETSELPDECL(providerp); + sigset_t sgset; + int i, c, pid, ret, val; + int pipe_fd = -1; + struct sigaction act; + + MyName = *av; + + /* + * Initializations that require more privileges than we need to run. + */ + (void) _create_daemon_lock(LOCKD, DAEMON_UID, DAEMON_GID); + svcsetprio(); + + if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, + DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS, NULL) == -1) { + (void) fprintf(stderr, "%s should be run with" + " sufficient privileges\n", av[0]); + exit(1); + } + + (void) enable_extended_FILE_stdio(-1, -1); + + /* + * Read in the values from SMF first before we check + * command line options so the options override SMF values. + */ + + /* How long to keep idle connections. */ + propname = "conn_idle_timeout"; /* also -t */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + lmargs.timout = val; + } + + /* Note: debug_level can only be set by args. */ + + /* How long to wait for clients to re-establish locks. */ + propname = "grace_period"; /* also -g */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + lmargs.grace = val; + } + + propname = "listen_backlog"; /* also -l */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + listen_backlog = val; + } + + propname = "max_connections"; /* also -c */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + max_conns_allowed = val; + } + + propname = "max_servers"; /* also argv[1] */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + max_servers = val; + } + + propname = "retrans_timeout"; /* also -r */ + ret = nfs_smf_get_iprop(propname, &val, + DEFAULT_INSTANCE, SCF_TYPE_INTEGER, LOCKD); + if (ret == SA_OK) { + if (val <= 0) + fprintf(stderr, gettext( + "Invalid %s from SMF"), propname); + else + lmargs.retransmittimeout = val; + } + + + while ((c = getopt(ac, av, "c:d:g:l:r:t:")) != EOF) + switch (c) { + case 'c': /* max_connections */ + if ((val = atoi(optarg)) <= 0) + goto badval; + max_conns_allowed = val; + break; + + case 'd': /* debug */ + lmargs.debug = atoi(optarg); + break; + + case 'g': /* grace_period */ + if ((val = atoi(optarg)) <= 0) + goto badval; + lmargs.grace = val; + break; + + case 'l': /* listen_backlog */ + if ((val = atoi(optarg)) <= 0) + goto badval; + listen_backlog = val; + break; + + case 'r': /* retrans_timeout */ + if ((val = atoi(optarg)) <= 0) + goto badval; + lmargs.retransmittimeout = val; + break; + + case 't': /* conn_idle_timeout */ + if ((val = atoi(optarg)) <= 0) + goto badval; + lmargs.timout = val; + break; + + badval: + fprintf(stderr, gettext( + "Invalid -%c option value"), c); + /* FALLTHROUGH */ + default: + usage(); + /* NOTREACHED */ + } + + /* + * If there is exactly one more argument, it is the number of + * servers. + */ + if (optind < ac) { + val = atoi(av[optind]); + if (val <= 0) { + fprintf(stderr, gettext( + "Invalid max_servers argument")); + usage(); + } + max_servers = val; + optind++; + } + /* + * If there are two or more arguments, then this is a usage error. + */ + if (optind != ac) + usage(); + + if (lmargs.debug) { + printf("%s: debug= %d, conn_idle_timout= %d," + " grace_period= %d, listen_backlog= %d," + " max_connections= %d, max_servers= %d," + " retrans_timeout= %d\n", + MyName, lmargs.debug, lmargs.timout, + lmargs.grace, listen_backlog, + max_conns_allowed, max_servers, + lmargs.retransmittimeout); + } + + /* + * Set current dir to server root + */ + if (chdir(dir) < 0) { + (void) fprintf(stderr, "%s: ", MyName); + perror(dir); + exit(1); + } + + /* Daemonize, if not debug. */ + if (lmargs.debug == 0) + pipe_fd = daemonize_init(); + + openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON); + + /* + * establish our lock on the lock file and write our pid to it. + * exit if some other process holds the lock, or if there's any + * error in writing/locking the file. + */ + pid = _enter_daemon_lock(LOCKD); + switch (pid) { + case 0: + break; + case -1: + fprintf(stderr, "error locking for %s: %s", LOCKD, + strerror(errno)); + exit(2); + default: + /* daemon was already running */ + exit(0); + } + + /* + * Block all signals till we spawn other + * threads. + */ + (void) sigfillset(&sgset); + (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL); + + /* Unregister any previous versions. */ + for (i = NLM_VERS; i < NLM4_VERS; i++) { + svc_unreg(NLM_PROG, i); + } + + /* + * Set up kernel RPC thread pool for the NLM server. + */ + if (nlmsvcpool(max_servers)) { + fprintf(stderr, "Can't set up kernel NLM service: %s. Exiting", + strerror(errno)); + exit(1); + } + + /* + * Set up blocked thread to do LWP creation on behalf of the kernel. + */ + if (svcwait(NLM_SVCPOOL_ID)) { + fprintf(stderr, "Can't set up NLM pool creator: %s. Exiting", + strerror(errno)); + exit(1); + } + + /* + * Install atexit and sigterm handlers + */ + act.sa_handler = sigterm_handler; + act.sa_flags = 0; + + (void) sigaction(SIGTERM, &act, NULL); + (void) atexit(shutdown_lockd); + + /* + * Now open up for signal delivery + */ + (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL); + + /* + * Build a protocol block list for registration. + */ + protobp = (struct protob *)malloc(sizeof (struct protob)); + protobp->serv = "NLM"; + protobp->versmin = NLM_VERS; + protobp->versmax = NLM4_VERS; + protobp->program = NLM_PROG; + protobp->next = (struct protob *)NULL; + + for (providerp = defaultproviders; + *providerp != NULL; providerp++) { + provider = *providerp; + do_one(provider, NULL, protobp, nlmsvc); + } + + free(protobp); + + if (num_fds == 0) { + fprintf(stderr, "Could not start NLM service for any protocol." + " Exiting"); + exit(1); + } + + end_listen_fds = num_fds; + + /* + * lockd is up and running as far as we are concerned. + */ + if (lmargs.debug == 0) + daemonize_fini(pipe_fd); + + /* + * Get rid of unneeded privileges. + */ + __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, + PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); + + /* + * Poll for non-data control events on the transport descriptors. + */ + poll_for_action(); + + /* + * If we get here, something failed in poll_for_action(). + */ + return (1); +} + +static int +nlmsvcpool(int maxservers) +{ + struct svcpool_args npa; + + npa.id = NLM_SVCPOOL_ID; + npa.maxthreads = maxservers; + npa.redline = 0; + npa.qsize = 0; + npa.timeout = 0; + npa.stksize = 0; + npa.max_same_xprt = 0; + return (_nfssys(SVCPOOL_CREATE, &npa)); +} + +static int +ncfmly_to_lmfmly(const char *ncfmly) +{ + if (0 == strcmp(ncfmly, NC_INET)) + return (LM_INET); + if (0 == strcmp(ncfmly, NC_INET6)) + return (LM_INET6); + if (0 == strcmp(ncfmly, NC_LOOPBACK)) + return (LM_LOOPBACK); + return (-1); +} + +static int +nctype_to_lmprot(uint_t semantics) +{ + switch (semantics) { + case NC_TPI_CLTS: + return (LM_UDP); + case NC_TPI_COTS_ORD: + return (LM_TCP); + } + return (-1); +} + +static dev_t +ncdev_to_rdev(const char *ncdev) +{ + struct stat st; + + if (stat(ncdev, &st) < 0) + return (NODEV); + return (st.st_rdev); +} + +static void +sigterm_handler(void) +{ + /* to call atexit handler */ + exit(0); +} + +static void +shutdown_lockd(void) +{ + (void) _nfssys(KILL_LOCKMGR, NULL); +} + + +/* + * Establish NLM service thread. + */ +static int +nlmsvc(int fd, struct netbuf addrmask, struct netconfig *nconf) +{ + struct lm_svc_args lma; + + lma = lmargs; /* init by struct copy */ + + /* + * The kernel code needs to reconstruct a complete + * knetconfig from n_fmly, n_proto. We use these + * two fields to convey the family and semantics. + */ + lma.fd = fd; + lma.n_fmly = ncfmly_to_lmfmly(nconf->nc_protofmly); + lma.n_proto = nctype_to_lmprot(nconf->nc_semantics); + lma.n_rdev = ncdev_to_rdev(nconf->nc_device); + + return (_nfssys(LM_SVC, &lma)); +} + +static void +usage(void) +{ + (void) fprintf(stderr, gettext( + "usage: %s [options] [max_servers]\n"), MyName); + (void) fprintf(stderr, gettext( + "options: (see SMF property descriptions)\n")); + /* Note: don't translate these */ + (void) fprintf(stderr, "\t-c max_connections\n"); + (void) fprintf(stderr, "\t-d debug_level\n"); + (void) fprintf(stderr, "\t-g grace_period\n"); + (void) fprintf(stderr, "\t-l listen_backlog\n"); + (void) fprintf(stderr, "\t-r retrans_timeout\n"); + (void) fprintf(stderr, "\t-t conn_idle_timeout\n"); + + exit(1); +} diff --git a/usr/src/cmd/fs.d/nfs/mount/Makefile b/usr/src/cmd/fs.d/nfs/mount/Makefile index dd28ff761d..dad33922a3 100644 --- a/usr/src/cmd/fs.d/nfs/mount/Makefile +++ b/usr/src/cmd/fs.d/nfs/mount/Makefile @@ -118,5 +118,5 @@ install: $(ROOTETCPROG) lint: webnfs.h webnfs_xdr.c webnfs_client.c lint_SRCS -clean: +clean: $(RM) $(OBJS) webnfs.x webnfs.h webnfs_xdr.c webnfs_client.c diff --git a/usr/src/cmd/fs.d/nfs/statd/sm_proc.c b/usr/src/cmd/fs.d/nfs/statd/sm_proc.c index 492cb9c639..22592eb95d 100644 --- a/usr/src/cmd/fs.d/nfs/statd/sm_proc.c +++ b/usr/src/cmd/fs.d/nfs/statd/sm_proc.c @@ -25,6 +25,7 @@ */ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -59,6 +60,8 @@ #include <netdir.h> #include <synch.h> #include <thread.h> +#include <ifaddrs.h> +#include <errno.h> #include <assert.h> #include "sm_statd.h" @@ -89,14 +92,12 @@ extern struct lifconf *getmyaddrs(void); /* ARGSUSED */ void -sm_status(namep, resp) - sm_name *namep; - sm_stat_res *resp; +sm_stat_svc(sm_name *namep, sm_stat_res *resp) { if (debug) (void) printf("proc sm_stat: mon_name = %s\n", - namep->mon_name); + namep->mon_name); resp->res_stat = stat_succ; resp->state = LOCAL_STATE; @@ -104,9 +105,7 @@ sm_status(namep, resp) /* ARGSUSED */ void -sm_mon(monp, resp) - mon *monp; - sm_stat_res *resp; +sm_mon_svc(mon *monp, sm_stat_res *resp) { mon_id *monidp; monidp = &monp->mon_id; @@ -114,7 +113,7 @@ sm_mon(monp, resp) rw_rdlock(&thr_rwlock); if (debug) { (void) printf("proc sm_mon: mon_name = %s, id = %d\n", - monidp->mon_name, * ((int *)monp->priv)); + monidp->mon_name, * ((int *)monp->priv)); pr_mon(monp->mon_id.mon_name); } @@ -132,17 +131,15 @@ sm_mon(monp, resp) /* ARGSUSED */ void -sm_unmon(monidp, resp) - mon_id *monidp; - sm_stat *resp; +sm_unmon_svc(mon_id *monidp, sm_stat *resp) { rw_rdlock(&thr_rwlock); if (debug) { (void) printf( - "proc sm_unmon: mon_name = %s, [%s, %d, %d, %d]\n", - monidp->mon_name, monidp->my_id.my_name, - monidp->my_id.my_prog, monidp->my_id.my_vers, - monidp->my_id.my_proc); + "proc sm_unmon: mon_name = %s, [%s, %d, %d, %d]\n", + monidp->mon_name, monidp->my_id.my_name, + monidp->my_id.my_prog, monidp->my_id.my_vers, + monidp->my_id.my_proc); pr_mon(monidp->mon_name); } @@ -154,16 +151,14 @@ sm_unmon(monidp, resp) /* ARGSUSED */ void -sm_unmon_all(myidp, resp) - my_id *myidp; - sm_stat *resp; +sm_unmon_all_svc(my_id *myidp, sm_stat *resp) { rw_rdlock(&thr_rwlock); if (debug) (void) printf("proc sm_unmon_all: [%s, %d, %d, %d]\n", - myidp->my_name, - myidp->my_prog, myidp->my_vers, - myidp->my_proc); + myidp->my_name, + myidp->my_prog, myidp->my_vers, + myidp->my_proc); delete_mon((char *)NULL, myidp); pr_mon(NULL); resp->state = local_state; @@ -174,21 +169,19 @@ sm_unmon_all(myidp, resp) * Notifies lockd specified by name that state has changed for this server. */ void -sm_notify(ntfp) - stat_chge *ntfp; +sm_notify_svc(stat_chge *ntfp) { rw_rdlock(&thr_rwlock); if (debug) (void) printf("sm_notify: %s state =%d\n", - ntfp->mon_name, ntfp->state); + ntfp->mon_name, ntfp->state); send_notice(ntfp->mon_name, ntfp->state); rw_unlock(&thr_rwlock); } /* ARGSUSED */ void -sm_simu_crash(myidp) - void *myidp; +sm_simu_crash_svc(void *myidp) { int i; struct mon_entry *monitor_q; @@ -727,7 +720,6 @@ thr_send_notice(void *arg) moninfo_t *minfop; minfop = (moninfo_t *)arg; - if (statd_call_lockd(&minfop->id, minfop->state) == -1) { if (debug && minfop->id.mon_id.mon_name) (void) printf("problem with notifying %s failure, " @@ -759,7 +751,7 @@ statd_call_lockd(monp, state) { enum clnt_stat clnt_stat; struct timeval tottimeout; - struct status stat; + struct sm_status stat; my_id *my_idp; char *mon_name; int i; @@ -768,7 +760,7 @@ statd_call_lockd(monp, state) mon_name = monp->mon_id.mon_name; my_idp = &monp->mon_id.my_id; - (void) memset(&stat, 0, sizeof (struct status)); + (void) memset(&stat, 0, sizeof (stat)); stat.mon_name = mon_name; stat.state = state; for (i = 0; i < 16; i++) { @@ -781,12 +773,14 @@ statd_call_lockd(monp, state) tottimeout.tv_sec = SM_RPC_TIMEOUT; tottimeout.tv_usec = 0; - if ((clnt = create_client(my_idp->my_name, my_idp->my_prog, - my_idp->my_vers, &tottimeout)) == (CLIENT *) NULL) { - return (-1); + clnt = create_client(my_idp->my_name, my_idp->my_prog, my_idp->my_vers, + "ticotsord", &tottimeout); + if (clnt == NULL) { + return (-1); } - clnt_stat = clnt_call(clnt, my_idp->my_proc, xdr_status, (char *)&stat, + clnt_stat = clnt_call(clnt, my_idp->my_proc, + xdr_sm_status, (char *)&stat, xdr_void, NULL, tottimeout); if (debug) { (void) printf("clnt_stat=%s(%d)\n", @@ -808,21 +802,35 @@ statd_call_lockd(monp, state) * Client handle created. */ CLIENT * -create_client(host, prognum, versnum, utimeout) - char *host; - int prognum; - int versnum; - struct timeval *utimeout; +create_client(char *host, int prognum, int versnum, char *netid, + struct timeval *utimeout) { int fd; struct timeval timeout; CLIENT *client; struct t_info tinfo; - if ((client = clnt_create_timed(host, prognum, versnum, - "netpath", utimeout)) == NULL) { + if (netid == NULL) { + client = clnt_create_timed(host, prognum, versnum, + "netpath", utimeout); + } else { + struct netconfig *nconf; + + nconf = getnetconfigent(netid); + if (nconf == NULL) { + return (NULL); + } + + client = clnt_tp_create_timed(host, prognum, versnum, nconf, + utimeout); + + freenetconfigent(nconf); + } + + if (client == NULL) { return (NULL); } + (void) CLNT_CONTROL(client, CLGET_FD, (caddr_t)&fd); if (t_getinfo(fd, &tinfo) != -1) { if (tinfo.servtype == T_CLTS) { @@ -832,7 +840,7 @@ create_client(host, prognum, versnum, utimeout) timeout.tv_usec = 0; timeout.tv_sec = SM_CLTS_TIMEOUT; (void) CLNT_CONTROL(client, - CLSET_RETRY_TIMEOUT, (caddr_t)&timeout); + CLSET_RETRY_TIMEOUT, (caddr_t)&timeout); } } else return (NULL); @@ -1301,3 +1309,72 @@ str_cmp_address_specifier(char *specifier1, char *specifier2) } return (1); } + +/* + * Add IP address strings to the host_name list. + */ +void +merge_ips(void) +{ + struct ifaddrs *ifap, *cifap; + int error; + + error = getifaddrs(&ifap); + if (error) { + syslog(LOG_WARNING, "getifaddrs error: '%s'", + strerror(errno)); + return; + } + + for (cifap = ifap; cifap != NULL; cifap = cifap->ifa_next) { + struct sockaddr *sa = cifap->ifa_addr; + char addr_str[INET6_ADDRSTRLEN]; + void *addr = NULL; + + switch (sa->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + /* Skip loopback addresses. */ + if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) { + continue; + } + + addr = &sin->sin_addr; + break; + } + + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + /* Skip loopback addresses. */ + if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) { + continue; + } + + addr = &sin6->sin6_addr; + break; + } + + default: + syslog(LOG_WARNING, "Unknown address family %d for " + "interface %s", sa->sa_family, cifap->ifa_name); + continue; + } + + if (inet_ntop(sa->sa_family, addr, addr_str, sizeof (addr_str)) + == NULL) { + syslog(LOG_WARNING, "Failed to convert address into " + "string representation for interface '%s' " + "address family %d", cifap->ifa_name, + sa->sa_family); + continue; + } + + if (!in_host_array(addr_str)) { + add_to_host_array(addr_str); + } + } + + freeifaddrs(ifap); +} diff --git a/usr/src/cmd/fs.d/nfs/statd/sm_statd.c b/usr/src/cmd/fs.d/nfs/statd/sm_statd.c index 89dce611f2..420dd68620 100644 --- a/usr/src/cmd/fs.d/nfs/statd/sm_statd.c +++ b/usr/src/cmd/fs.d/nfs/statd/sm_statd.c @@ -36,7 +36,9 @@ * contributors. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ /* * sm_statd.c consists of routines used for the intermediate @@ -130,20 +132,20 @@ statd_init() for (i = 0; i < pathix; i++) { (void) sprintf(state_file, "%s/statmon/state", path_name[i]); if ((fp_tmp = fopen(state_file, "r+")) == (FILE *)NULL) { - if ((fp_tmp = fopen(state_file, "w+")) - == (FILE *)NULL) { + if ((fp_tmp = fopen(state_file, "w+")) == + (FILE *)NULL) { if (debug) - syslog(LOG_ERR, - "can't open %s: %m", - state_file); + syslog(LOG_ERR, + "can't open %s: %m", + state_file); continue; } else (void) chmod(state_file, 0644); } if ((fscanf(fp_tmp, "%d", &tmp_state)) == EOF) { if (debug) - syslog(LOG_ERR, - "statd: %s: file empty\n", state_file); + syslog(LOG_ERR, + "statd: %s: file empty\n", state_file); (void) fclose(fp_tmp); continue; } @@ -151,7 +153,7 @@ statd_init() LOCAL_STATE = tmp_state; if (debug) (void) printf("Update LOCAL STATE: %d\n", - tmp_state); + tmp_state); } (void) fclose(fp_tmp); } @@ -180,8 +182,8 @@ statd_init() for (i = 0; i < pathix; i++) { (void) sprintf(state_file, "%s/statmon/state", path_name[i]); if ((fp_tmp = fopen(state_file, "r+")) == (FILE *)NULL) { - if ((fp_tmp = fopen(state_file, "w+")) - == (FILE *)NULL) { + if ((fp_tmp = fopen(state_file, "w+")) == + (FILE *)NULL) { syslog(LOG_ERR, "can't open %s: %m", state_file); continue; @@ -223,8 +225,8 @@ statd_init() while ((dirp = readdir(dp)) != NULL) { if (strcmp(dirp->d_name, ".") != 0 && - strcmp(dirp->d_name, "..") != 0) { - /* rename all entries from CURRENT to BACKUP */ + strcmp(dirp->d_name, "..") != 0) { + /* rename all entries from CURRENT to BACKUP */ (void) move_file(CURRENT, dirp->d_name, BACKUP); } } @@ -297,17 +299,17 @@ thr_statd_init() name = strdup(dirp->d_name); if (name == (char *)NULL) { syslog(LOG_ERR, - "statd: unable to allocate space for name %s\n", - dirp->d_name); + "statd: unable to allocate space for name %s\n", + dirp->d_name); continue; } /* Create a thread to do a statd_call_statd for name */ if (thr_create(NULL, NULL, thr_call_statd, - (void *) name, 0, 0)) { + (void *) name, 0, 0)) { syslog(LOG_ERR, - "statd: unable to create thr_call_statd() for name %s.\n", - dirp->d_name); + "statd: unable to create thr_call_statd() " + "for name %s.\n", dirp->d_name); free(name); continue; } @@ -334,7 +336,7 @@ thr_statd_init() num_threads = 0; while ((dirp = readdir(dp)) != NULL) { if (strcmp(dirp->d_name, ".") == 0 || - strcmp(dirp->d_name, "..") == 0) { + strcmp(dirp->d_name, "..") == 0) { continue; } @@ -350,7 +352,7 @@ thr_statd_init() if (debug) { (void) printf("thr_statd_init: legacy %s\n", - dirp->d_name); + dirp->d_name); } /* @@ -372,17 +374,17 @@ thr_statd_init() name = strdup(dirp->d_name); if (name == (char *)NULL) { syslog(LOG_ERR, - "statd: unable to allocate space for name %s\n", - dirp->d_name); + "statd: unable to allocate space for name %s\n", + dirp->d_name); continue; } /* Create a thread to do a statd_call_statd for name */ if (thr_create(NULL, NULL, thr_call_statd, - (void *) name, 0, 0)) { + (void *) name, 0, 0)) { syslog(LOG_ERR, - "statd: unable to create thr_call_statd() for name %s.\n", - dirp->d_name); + "statd: unable to create thr_call_statd() " + "for name %s.\n", dirp->d_name); free(name); continue; } @@ -410,7 +412,7 @@ thr_statd_init() if ((mkdir(buf, SM_DIRECTORY_MODE)) == -1) { if (errno != EEXIST) syslog(LOG_ERR, "statd: mkdir %s error %m\n", - buf); + buf); else copydir_from_to(BACKUP, buf); } else @@ -434,7 +436,7 @@ thr_statd_init() /* Continue to notify statd on hosts that were unreachable. */ if (thr_create(NULL, NULL, sm_try, NULL, THR_DETACHED, 0)) syslog(LOG_ERR, - "statd: unable to create thread for sm_try().\n"); + "statd: unable to create thread for sm_try().\n"); thr_exit((void *) 0); #ifdef lint return (0); @@ -489,8 +491,8 @@ thr_call_statd(void *namep) if (n <= 0) { if (debug >= 2) { (void) printf( - "thr_call_statd: can't read link %s\n", - path); + "thr_call_statd: can't read " + "link %s\n", path); } } else { rname[n] = '\0'; @@ -581,8 +583,8 @@ statd_call_statd(name) tottimeout.tv_sec = SM_RPC_TIMEOUT; tottimeout.tv_usec = 0; - if ((clnt = create_client(name_or_addr, SM_PROG, SM_VERS, - &tottimeout)) == (CLIENT *) NULL) { + if ((clnt = create_client(name_or_addr, SM_PROG, SM_VERS, NULL, + &tottimeout)) == NULL) { return (-1); } @@ -675,8 +677,8 @@ sm_try() */ if (delay == 0) syslog(LOG_WARNING, - "statd: host %s is not responding\n", - nl->name); + "statd: host %s is not " + "responding\n", nl->name); } } /* @@ -1035,16 +1037,16 @@ remove_single_name(char *name, char *dir1, char *dir2) char dirpath[MAXPATHLEN]; char rname[MAXNAMELEN + 1]; /* +1 for NULL term */ - if (strlen(name) + strlen(dir1) + (dir2 != NULL ? strlen(dir2) : 0) - + 3 > MAXPATHLEN) { + if (strlen(name) + strlen(dir1) + (dir2 != NULL ? strlen(dir2) : 0) + + 3 > MAXPATHLEN) { if (dir2 != NULL) syslog(LOG_ERR, - "statd: pathname too long: %s/%s/%s\n", - dir1, dir2, name); + "statd: pathname too long: %s/%s/%s\n", + dir1, dir2, name); else syslog(LOG_ERR, - "statd: pathname too long: %s/%s\n", - dir1, name); + "statd: pathname too long: %s/%s\n", + dir1, name); return; } @@ -1078,12 +1080,13 @@ remove_single_name(char *name, char *dir1, char *dir2) if (debug >= 2) { if (error < 0) { (void) printf( - "remove_name: can't unlink %s\n", - dirpath); + "remove_name: can't " + "unlink %s\n", + dirpath); } else { (void) printf( - "remove_name: unlinked %s\n", - dirpath); + "remove_name: unlinked ", + "%s\n", dirpath); } } } @@ -1093,7 +1096,7 @@ remove_single_name(char *name, char *dir1, char *dir2) * here for analysis by the system administrator. */ syslog(LOG_ERR, - "statd: can't read link %s: %m\n", path); + "statd: can't read link %s: %m\n", path); } } @@ -1122,13 +1125,13 @@ count_symlinks(char *dir, char *name, int *count) if ((dp = opendir(dir)) == (DIR *)NULL) { syslog(LOG_ERR, "count_symlinks: open %s dir, error %m\n", - dir); + dir); return (-1); } while ((dirp = readdir(dp)) != NULL) { if (strcmp(dirp->d_name, ".") == 0 || - strcmp(dirp->d_name, "..") == 0) { + strcmp(dirp->d_name, "..") == 0) { continue; } @@ -1141,8 +1144,8 @@ count_symlinks(char *dir, char *name, int *count) if (n <= 0) { if (debug >= 2) { (void) printf( - "count_symlinks: can't read link %s\n", - lpath); + "count_symlinks: can't read link " + "%s\n", lpath); } continue; } @@ -1329,15 +1332,15 @@ record_addr(char *name, sa_family_t family, struct netobj *ah) (void) printf("record_addr: addr= %x\n", addr.s_addr); else if (family == AF_INET6) (void) printf("record_addr: addr= %x\n", \ - ((struct in6_addr *)addr6)->s6_addr); + ((struct in6_addr *)addr6)->s6_addr); } if (family == AF_INET) { if (addr.s_addr == INADDR_ANY || ((addr.s_addr && 0xff000000) == 0)) { syslog(LOG_DEBUG, - "record_addr: illegal IP address %x\n", - addr.s_addr); + "record_addr: illegal IP address %x\n", + addr.s_addr); return; } } @@ -1346,28 +1349,27 @@ record_addr(char *name, sa_family_t family, struct netobj *ah) famstr = family2string(family); if (famstr == NULL) { syslog(LOG_DEBUG, - "record_addr: unsupported address family %d\n", - family); + "record_addr: unsupported address family %d\n", + family); return; } switch (family) { char abuf[INET6_ADDRSTRLEN]; - case AF_INET: + case AF_INET: (void) sprintf(ascii_addr, "%s.%s", famstr, inet_ntoa(addr)); break; - case AF_INET6: + case AF_INET6: (void) sprintf(ascii_addr, "%s.%s", famstr,\ inet_ntop(family, addr6, abuf, sizeof (abuf))); break; - default: + default: if (debug) { (void) printf( - "record_addr: family2string supports unknown family %d (%s)\n", - family, - famstr); + "record_addr: family2string supports unknown " + "family %d (%s)\n", family, famstr); } free(famstr); return; @@ -1389,13 +1391,13 @@ record_addr(char *name, sa_family_t family, struct netobj *ah) */ for (i = 0; i < pathix; i++) { path_len = strlen(path_name[i]) + - strlen("/statmon/sm/") + - strlen(name) + 1; + strlen("/statmon/sm/") + + strlen(name) + 1; if (path_len > MAXPATHLEN) { syslog(LOG_ERR, - "statd: pathname too long: %s/statmon/sm/%s\n", - path_name[i], name); + "statd: pathname too long: %s/statmon/sm/%s\n", + path_name[i], name); continue; } (void) strcpy(path, path_name[i]); diff --git a/usr/src/cmd/fs.d/nfs/statd/sm_statd.h b/usr/src/cmd/fs.d/nfs/statd/sm_statd.h index 8a5b73d144..e1a5974678 100644 --- a/usr/src/cmd/fs.d/nfs/statd/sm_statd.h +++ b/usr/src/cmd/fs.d/nfs/statd/sm_statd.h @@ -37,11 +37,14 @@ * contributors. */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #ifndef _SM_STATD_H #define _SM_STATD_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -182,20 +185,27 @@ extern int create_file(char *name); extern void delete_file(char *name); extern void record_name(char *name, int op); extern void sm_crash(void); -extern void sm_notify(stat_chge *ntfp); extern void statd_init(); extern void merge_hosts(void); -extern CLIENT *create_client(char *, int, int, struct timeval *); +extern void merge_ips(void); +extern CLIENT *create_client(char *, int, int, char *, struct timeval *); extern char *xmalloc(unsigned); -extern void sm_status(sm_name *namep, sm_stat_res *resp); -extern void sm_mon(mon *monp, sm_stat_res *resp); -extern void sm_unmon(mon_id *monidp, sm_stat *resp); -extern void sm_unmon_all(my_id *myidp, sm_stat *resp); -extern void sm_simu_crash(void *myidp); + +/* + * RPC service functions, slightly different here than the + * generated ones in sm_inter.h + */ +extern void nsmaddrproc1_reg(reg1args *, reg1res *); +extern void sm_stat_svc(sm_name *namep, sm_stat_res *resp); +extern void sm_mon_svc(mon *monp, sm_stat_res *resp); +extern void sm_unmon_svc(mon_id *monidp, sm_stat *resp); +extern void sm_unmon_all_svc(my_id *myidp, sm_stat *resp); +extern void sm_simu_crash_svc(void *myidp); +extern void sm_notify_svc(stat_chge *ntfp); + extern void sm_inithash(); extern void copydir_from_to(char *from_dir, char *to_dir); extern int str_cmp_unqual_hostname(char *, char *); -extern void nsmaddrproc1_reg(reg1args *, reg1res *); extern void record_addr(char *name, sa_family_t family, struct netobj *ah); extern int is_symlink(char *file); extern int create_symlink(char *todir, char *rname, char *lname); diff --git a/usr/src/cmd/fs.d/nfs/statd/sm_svc.c b/usr/src/cmd/fs.d/nfs/statd/sm_svc.c index b2d8a8171c..1f657a0a48 100644 --- a/usr/src/cmd/fs.d/nfs/statd/sm_svc.c +++ b/usr/src/cmd/fs.d/nfs/statd/sm_svc.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -35,6 +36,10 @@ * contributors. */ +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + #include <stdio.h> #include <stdio_ext.h> #include <stdlib.h> @@ -219,11 +224,13 @@ sm_prog_1(rqstp, transp) local = (char *(*)()) nsmaddrproc1_reg; break; + case NSMADDRPROC1_UNREG: /* Not impl. */ default: svcerr_noproc(transp); return; } } else { + /* Must be SM_PROG */ switch (rqstp->rq_proc) { case NULLPROC: svc_sendreply(transp, xdr_void, (caddr_t)NULL); @@ -232,37 +239,37 @@ sm_prog_1(rqstp, transp) case SM_STAT: xdr_argument = xdr_sm_name; xdr_result = xdr_sm_stat_res; - local = (char *(*)()) sm_status; + local = (char *(*)()) sm_stat_svc; break; case SM_MON: xdr_argument = xdr_mon; xdr_result = xdr_sm_stat_res; - local = (char *(*)()) sm_mon; + local = (char *(*)()) sm_mon_svc; break; case SM_UNMON: xdr_argument = xdr_mon_id; xdr_result = xdr_sm_stat; - local = (char *(*)()) sm_unmon; + local = (char *(*)()) sm_unmon_svc; break; case SM_UNMON_ALL: xdr_argument = xdr_my_id; xdr_result = xdr_sm_stat; - local = (char *(*)()) sm_unmon_all; + local = (char *(*)()) sm_unmon_all_svc; break; case SM_SIMU_CRASH: xdr_argument = xdr_void; xdr_result = xdr_void; - local = (char *(*)()) sm_simu_crash; + local = (char *(*)()) sm_simu_crash_svc; break; case SM_NOTIFY: xdr_argument = xdr_stat_chge; xdr_result = xdr_void; - local = (char *(*)()) sm_notify; + local = (char *(*)()) sm_notify_svc; break; default: @@ -284,8 +291,8 @@ sm_prog_1(rqstp, transp) } if (!svc_freeargs(transp, xdr_argument, (caddr_t)&argument)) { - syslog(LOG_ERR, "statd: unable to free arguments\n"); - } + syslog(LOG_ERR, "statd: unable to free arguments\n"); + } } /* @@ -584,6 +591,9 @@ main(int argc, char *argv[]) /* Get other aliases from each interface. */ merge_hosts(); + /* Get all of the configured IP addresses. */ + merge_ips(); + /* * Set to automatic mode such that threads are automatically * created diff --git a/usr/src/head/Makefile b/usr/src/head/Makefile index 62f8ee08d7..21a5dfc1f9 100644 --- a/usr/src/head/Makefile +++ b/usr/src/head/Makefile @@ -263,7 +263,6 @@ RPCSVC_GEN_HDRS = \ nfs_prot.h \ nfs4_prot.h \ nis.h \ - nlm_prot.h \ rex.h \ rquota.h \ rstat.h \ diff --git a/usr/src/lib/librpcsvc/common/mapfile-vers b/usr/src/lib/librpcsvc/common/mapfile-vers index 400ad97038..1925ffb4d9 100644 --- a/usr/src/lib/librpcsvc/common/mapfile-vers +++ b/usr/src/lib/librpcsvc/common/mapfile-vers @@ -20,6 +20,7 @@ # # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. # # @@ -76,8 +77,6 @@ SYMBOL_VERSION SUNWprivate_1.1 { xdr_fhandle; xdr_fhandle3; xdr_fhstatus; - xdr_fsh4_access; - xdr_fsh4_mode; xdr_fsh_access; xdr_fsh_mode; xdr_groupnode; @@ -127,17 +126,17 @@ SYMBOL_VERSION SUNWprivate_1.1 { xdr_ppathcnf; xdr_reg1args; xdr_reg1res; - xdr_res; xdr_rstat_timeval; xdr_rusers_utmp; xdr_sm_name; + xdr_sm_res; xdr_sm_stat; xdr_sm_stat_res; + xdr_sm_status; xdr_sprayarr; xdr_spraycumul; xdr_spraytimeval; xdr_stat_chge; - xdr_status; xdr_timeval; xdr_uint32; xdr_uint64; diff --git a/usr/src/uts/Makefile b/usr/src/uts/Makefile index 4def667e08..9197d3c8e6 100644 --- a/usr/src/uts/Makefile +++ b/usr/src/uts/Makefile @@ -135,6 +135,7 @@ COMMON_HDRDIRS= common/avs \ common/fs \ common/gssapi \ common/idmap \ + common/klm \ common/inet \ common/inet/ipf/netinet \ common/inet/kssl \ @@ -161,11 +162,14 @@ $(CLOSED_BUILD)COMMON_HDRDIRS += $(CLOSED)/uts/common/sys # # Subset of COMMON_HDRDIRS in which at least one header is generated -# at runtime (e.g., rpcgen). (This is a partial list; there are -# other directories that should be included and do not yet have the -# necessary Makefile support. See 6414855.) +# at runtime (e.g., rpcgen), and in which "make clean" should run. +# Other directories should be included here, but do not yet have the +# necessary Makefile support (make clean). See 6414855. # -DYNHDRDIRS = common/rpcsvc common/idmap common/sys +DYNHDRDIRS = common/idmap \ + common/klm \ + common/rpcsvc \ + common/sys sparc_HDRDIRS= sun/sys i386_HDRDIRS= i86pc/vm i86xpv/vm @@ -185,6 +189,7 @@ all_h: FRC @cd common/rpcsvc; pwd; $(MAKE) $@ @cd common/gssapi; pwd; $(MAKE) $@ @cd common/idmap; pwd; $(MAKE) $@ + @cd common/klm; pwd; $(MAKE) $@ clean clobber: $($(MACH)_ARCHITECTURES) $(DYNHDRDIRS) @if [ '$(PATCH_BUILD)' != '#' ] ; then \ diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 9981ef6e9c..a5616b9ad4 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1467,6 +1467,23 @@ RPCMOD_OBJS += rpcmod.o clnt_cots.o clnt_clts.o \ rpcsys.o xdr_sizeof.o clnt_rdma.o svc_rdma.o \ xdr_rdma.o rdma_subr.o xdrrdma_sizeof.o +KLMMOD_OBJS += klmmod.o \ + nlm_impl.o \ + nlm_rpc_handle.o \ + nlm_dispatch.o \ + nlm_rpc_svc.o \ + nlm_client.o \ + nlm_service.o \ + nlm_prot_clnt.o \ + nlm_prot_xdr.o \ + nlm_rpc_clnt.o \ + nsm_addr_clnt.o \ + nsm_addr_xdr.o \ + sm_inter_clnt.o \ + sm_inter_xdr.o + +KLMOPS_OBJS += klmops.o + TLIMOD_OBJS += tlimod.o t_kalloc.o t_kbind.o t_kclose.o \ t_kconnect.o t_kfree.o t_kgtstate.o t_kopen.o \ t_krcvudat.o t_ksndudat.o t_kspoll.o t_kunbind.o \ diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index cf94832f03..b7e8aa2ea3 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -1392,6 +1392,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/kiconv/kiconv_tc/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/klm/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/kmdb/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -2592,6 +2596,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/kiconv/kiconv_sc/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/common/kiconv/kiconv_tc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/klm/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/kmdb/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/io/tl.c b/usr/src/uts/common/io/tl.c index d675187e71..f5cd181284 100644 --- a/usr/src/uts/common/io/tl.c +++ b/usr/src/uts/common/io/tl.c @@ -24,6 +24,7 @@ */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ /* @@ -4252,7 +4253,7 @@ tl_addr_req(mblk_t *mp, tl_endpt_t *tep) static void tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) { - tl_endpt_t *peer_tep; + tl_endpt_t *peer_tep = tep->te_conp; size_t ack_sz; mblk_t *ackmp; struct T_addr_ack *taa; @@ -4263,11 +4264,15 @@ tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) return; } + if (peer_tep == NULL || peer_tep->te_closing) { + tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ); + return; + } + ASSERT(tep->te_state >= TS_IDLE); ack_sz = sizeof (struct T_addr_ack); ack_sz += T_ALIGN(tep->te_alen); - peer_tep = tep->te_conp; ack_sz += peer_tep->te_alen; ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); diff --git a/usr/src/uts/common/klm/Makefile b/usr/src/uts/common/klm/Makefile new file mode 100644 index 0000000000..3c6d5eb673 --- /dev/null +++ b/usr/src/uts/common/klm/Makefile @@ -0,0 +1,73 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2010 Nexenta Systems, Inc. All rights reserved. +# Copyright (c) 2012 by Delphix. All rights reserved. +# + +include ../../../Makefile.master + +NLM_PROT_X= ../rpcsvc/nlm_prot.x +SM_INTER_X= ../rpcsvc/sm_inter.x +NSM_ADDR_X= ../rpcsvc/nsm_addr.x + +RPCGENFLAGS = -C -M -i 0 +SED_INCL='/^.include/s:\.\..*/rpcsvc:rpcsvc:' + +DERIVED_FILES= nlm_prot_clnt.c nlm_prot_xdr.c \ + sm_inter_clnt.c sm_inter_xdr.c \ + nsm_addr_clnt.c nsm_addr_xdr.c + +install_h: all_h + +all_h: $(DERIVED_FILES) + +nlm_prot_clnt.c : $(NLM_PROT_X) nlm_prot_clnt.sed + $(RPCGEN) $(RPCGENFLAGS) -l -o $@.tmp $(NLM_PROT_X) + sed -f nlm_prot_clnt.sed < $@.tmp > $@ + $(RM) -f $@.tmp + +nlm_prot_xdr.c : $(NLM_PROT_X) + $(RPCGEN) $(RPCGENFLAGS) -c -o $@.tmp $(NLM_PROT_X) + sed -e $(SED_INCL) < $@.tmp > $@ + $(RM) -f $@.tmp + +sm_inter_clnt.c : $(SM_INTER_X) sm_inter_clnt.sed + $(RPCGEN) $(RPCGENFLAGS) -l -o $@.tmp $(SM_INTER_X) + sed -f sm_inter_clnt.sed < $@.tmp > $@ + $(RM) -f $@.tmp + +sm_inter_xdr.c : $(SM_INTER_X) + $(RPCGEN) $(RPCGENFLAGS) -c -o $@.tmp $(SM_INTER_X) + sed -e $(SED_INCL) < $@.tmp > $@ + $(RM) -f $@.tmp + +nsm_addr_clnt.c : $(NSM_ADDR_X) nsm_addr_clnt.sed + $(RPCGEN) $(RPCGENFLAGS) -l -o $@.tmp $(NSM_ADDR_X) + sed -f nsm_addr_clnt.sed < $@.tmp > $@ + $(RM) -f $@.tmp + +nsm_addr_xdr.c : $(NSM_ADDR_X) + $(RPCGEN) $(RPCGENFLAGS) -c -o $@.tmp $(NSM_ADDR_X) + sed -e $(SED_INCL) < $@.tmp > $@ + $(RM) -f $@.tmp + +check: + +clean: + $(RM) $(DERIVED_FILES) + +clobber: clean + +lint: + +.KEEP_STATE: diff --git a/usr/src/uts/common/klm/klmmod.c b/usr/src/uts/common/klm/klmmod.c new file mode 100644 index 0000000000..51ed43e198 --- /dev/null +++ b/usr/src/uts/common/klm/klmmod.c @@ -0,0 +1,533 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy is of the CDDL is also available via the Internet + * at http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * NFS Lock Manager, server-side and common. + * + * This file contains all the external entry points of klmmod. + * Basically, this is the "glue" to the BSD nlm code. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/modctl.h> +#include <sys/flock.h> + +#include <nfs/nfs.h> +#include <nfs/nfssys.h> +#include <nfs/lm.h> +#include <rpcsvc/nlm_prot.h> +#include "nlm_impl.h" + +static struct modlmisc modlmisc = { + &mod_miscops, "lock mgr common module" +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modlmisc, NULL +}; + +/* + * Cluster node ID. Zero unless we're part of a cluster. + * Set by lm_set_nlmid_flk. Pass to lm_set_nlm_status. + * We're not yet doing "clustered" NLM stuff. + */ +int lm_global_nlmid = 0; + +/* + * Call-back hook for clusters: Set lock manager status. + * If this hook is set, call this instead of the ususal + * flk_set_lockmgr_status(FLK_LOCKMGR_UP / DOWN); + */ +void (*lm_set_nlm_status)(int nlm_id, flk_nlm_status_t) = NULL; + +/* + * Call-back hook for clusters: Delete all locks held by sysid. + * Call from code that drops all client locks (for which we're + * the server) i.e. after the SM tells us a client has crashed. + */ +void (*lm_remove_file_locks)(int) = NULL; + +krwlock_t lm_lck; +zone_key_t nlm_zone_key; + +/* + * Init/fini per-zone stuff for klm + */ +/* ARGSUSED */ +void * +lm_zone_init(zoneid_t zoneid) +{ + struct nlm_globals *g; + + g = kmem_zalloc(sizeof (*g), KM_SLEEP); + + avl_create(&g->nlm_hosts_tree, nlm_host_cmp, + sizeof (struct nlm_host), + offsetof(struct nlm_host, nh_by_addr)); + + g->nlm_hosts_hash = mod_hash_create_idhash("nlm_host_by_sysid", + 64, mod_hash_null_valdtor); + + TAILQ_INIT(&g->nlm_idle_hosts); + TAILQ_INIT(&g->nlm_slocks); + + mutex_init(&g->lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&g->nlm_gc_sched_cv, NULL, CV_DEFAULT, NULL); + cv_init(&g->nlm_gc_finish_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&g->clean_lock, NULL, MUTEX_DEFAULT, NULL); + + g->lockd_pid = 0; + g->run_status = NLM_ST_DOWN; + + nlm_globals_register(g); + return (g); +} + +/* ARGSUSED */ +void +lm_zone_fini(zoneid_t zoneid, void *data) +{ + struct nlm_globals *g = data; + + ASSERT(avl_is_empty(&g->nlm_hosts_tree)); + avl_destroy(&g->nlm_hosts_tree); + mod_hash_destroy_idhash(g->nlm_hosts_hash); + + ASSERT(g->nlm_gc_thread == NULL); + mutex_destroy(&g->lock); + cv_destroy(&g->nlm_gc_sched_cv); + cv_destroy(&g->nlm_gc_finish_cv); + mutex_destroy(&g->clean_lock); + + nlm_globals_unregister(g); + kmem_free(g, sizeof (*g)); +} + + + +/* + * **************************************************************** + * module init, fini, info + */ +int +_init() +{ + int retval; + + rw_init(&lm_lck, NULL, RW_DEFAULT, NULL); + nlm_init(); + + zone_key_create(&nlm_zone_key, lm_zone_init, NULL, lm_zone_fini); + /* Per-zone lockmgr data. See: os/flock.c */ + zone_key_create(&flock_zone_key, flk_zone_init, NULL, flk_zone_fini); + + retval = mod_install(&modlinkage); + if (retval == 0) + return (0); + + /* + * mod_install failed! undo above, reverse order + */ + + (void) zone_key_delete(flock_zone_key); + flock_zone_key = ZONE_KEY_UNINITIALIZED; + (void) zone_key_delete(nlm_zone_key); + rw_destroy(&lm_lck); + + return (retval); +} + +int +_fini() +{ + /* Don't unload. */ + return (EBUSY); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + + + +/* + * **************************************************************** + * Stubs listed in modstubs.s + */ + +/* + * klm system calls. Start service on some endpoint. + * Called by nfssys() LM_SVC, from lockd. + */ +int +lm_svc(struct lm_svc_args *args) +{ + struct knetconfig knc; + const char *netid; + struct nlm_globals *g; + struct file *fp = NULL; + int err = 0; + + /* Get our "globals" */ + g = zone_getspecific(nlm_zone_key, curzone); + + /* + * Check version of lockd calling. + */ + if (args->version != LM_SVC_CUR_VERS) { + NLM_ERR("lm_svc: Version mismatch " + "(given 0x%x, expected 0x%x)\n", + args->version, LM_SVC_CUR_VERS); + return (EINVAL); + } + + /* + * Build knetconfig, checking arg values. + * Also come up with the "netid" string. + * (With some knowledge of /etc/netconfig) + */ + bzero(&knc, sizeof (knc)); + switch (args->n_proto) { + case LM_TCP: + knc.knc_semantics = NC_TPI_COTS_ORD; + knc.knc_proto = NC_TCP; + break; + case LM_UDP: + knc.knc_semantics = NC_TPI_CLTS; + knc.knc_proto = NC_UDP; + break; + default: + NLM_ERR("nlm_build_knetconfig: Unknown " + "lm_proto=0x%x\n", args->n_proto); + return (EINVAL); + } + + switch (args->n_fmly) { + case LM_INET: + knc.knc_protofmly = NC_INET; + break; + case LM_INET6: + knc.knc_protofmly = NC_INET6; + break; + case LM_LOOPBACK: + knc.knc_protofmly = NC_LOOPBACK; + /* Override what we set above. */ + knc.knc_proto = NC_NOPROTO; + break; + default: + NLM_ERR("nlm_build_knetconfig: Unknown " + "lm_fmly=0x%x\n", args->n_fmly); + return (EINVAL); + } + + knc.knc_rdev = args->n_rdev; + netid = nlm_knc_to_netid(&knc); + if (!netid) + return (EINVAL); + + /* + * Setup service on the passed transport. + * NB: must releasef(fp) after this. + */ + if ((fp = getf(args->fd)) == NULL) + return (EBADF); + + mutex_enter(&g->lock); + /* + * Don't try to start while still shutting down, + * or lots of things will fail... + */ + if (g->run_status == NLM_ST_STOPPING) { + err = EAGAIN; + goto out; + } + + /* + * There is no separate "initialize" sub-call for nfssys, + * and we want to do some one-time work when the first + * binding comes in from lockd. + */ + if (g->run_status == NLM_ST_DOWN) { + g->run_status = NLM_ST_STARTING; + g->lockd_pid = curproc->p_pid; + + /* Save the options. */ + g->cn_idle_tmo = args->timout; + g->grace_period = args->grace; + g->retrans_tmo = args->retransmittimeout; + + /* See nfs_sys.c (not yet per-zone) */ + if (INGLOBALZONE(curproc)) { + rfs4_grace_period = args->grace; + rfs4_lease_time = args->grace; + } + + mutex_exit(&g->lock); + err = nlm_svc_starting(g, fp, netid, &knc); + mutex_enter(&g->lock); + } else { + /* + * If KLM is not started and the very first endpoint lockd + * tries to add is not a loopback device, report an error. + */ + if (g->run_status != NLM_ST_UP) { + err = ENOTACTIVE; + goto out; + } + if (g->lockd_pid != curproc->p_pid) { + /* Check if caller has the same PID lockd does */ + err = EPERM; + goto out; + } + + err = nlm_svc_add_ep(fp, netid, &knc); + } + +out: + mutex_exit(&g->lock); + if (fp != NULL) + releasef(args->fd); + + return (err); +} + +/* + * klm system calls. Kill the lock manager. + * Called by nfssys() KILL_LOCKMGR, + * liblm:lm_shutdown() <- unused? + */ +int +lm_shutdown(void) +{ + struct nlm_globals *g; + proc_t *p; + pid_t pid; + + /* Get our "globals" */ + g = zone_getspecific(nlm_zone_key, curzone); + + mutex_enter(&g->lock); + if (g->run_status != NLM_ST_UP) { + mutex_exit(&g->lock); + return (EBUSY); + } + + g->run_status = NLM_ST_STOPPING; + pid = g->lockd_pid; + mutex_exit(&g->lock); + nlm_svc_stopping(g); + + mutex_enter(&pidlock); + p = prfind(pid); + if (p != NULL) + psignal(p, SIGTERM); + + mutex_exit(&pidlock); + return (0); +} + +/* + * Cleanup remote locks on FS un-export. + * + * NOTE: called from nfs_export.c:unexport() + * right before the share is going to + * be unexported. + */ +void +lm_unexport(struct exportinfo *exi) +{ + nlm_unexport(exi); +} + +/* + * CPR suspend/resume hooks. + * See:cpr_suspend, cpr_resume + * + * Before suspend, get current state from "statd" on + * all remote systems for which we have locks. + * + * After resume, check with those systems again, + * and either reclaim locks, or do SIGLOST. + */ +void +lm_cprsuspend(void) +{ + nlm_cprsuspend(); +} + +void +lm_cprresume(void) +{ + nlm_cprresume(); +} + +/* + * Add the nlm_id bits to the sysid (by ref). + */ +void +lm_set_nlmid_flk(int *new_sysid) +{ + if (lm_global_nlmid != 0) + *new_sysid |= (lm_global_nlmid << BITS_IN_SYSID); +} + +/* + * It seems that closed source klmmod used + * this function to release knetconfig stored + * in mntinfo structure (see mntinfo's mi_klmconfig + * field). + * We store knetconfigs differently, thus we don't + * need this function. + */ +void +lm_free_config(struct knetconfig *knc) +{ + _NOTE(ARGUNUSED(knc)); +} + +/* + * Called by NFS4 delegation code to check if there are any + * NFSv2/v3 locks for the file, so it should not delegate. + * + * NOTE: called from NFSv4 code + * (see nfs4_srv_deleg.c:rfs4_bgrant_delegation()) + */ +int +lm_vp_active(const vnode_t *vp) +{ + return (nlm_vp_active(vp)); +} + +/* + * Find or create a "sysid" for given knc+addr. + * name is optional. Sets nc_changed if the + * found knc_proto is different from passed. + * Increments the reference count. + * + * Called internally, and in nfs4_find_sysid() + */ +struct lm_sysid * +lm_get_sysid(struct knetconfig *knc, struct netbuf *addr, + char *name, bool_t *nc_changed) +{ + struct nlm_globals *g; + const char *netid; + struct nlm_host *hostp; + + _NOTE(ARGUNUSED(nc_changed)); + netid = nlm_knc_to_netid(knc); + if (netid == NULL) + return (NULL); + + g = zone_getspecific(nlm_zone_key, curzone); + + hostp = nlm_host_findcreate(g, name, netid, addr); + if (hostp == NULL) + return (NULL); + + return ((struct lm_sysid *)hostp); +} + +/* + * Release a reference on a "sysid". + */ +void +lm_rel_sysid(struct lm_sysid *sysid) +{ + struct nlm_globals *g; + + g = zone_getspecific(nlm_zone_key, curzone); + nlm_host_release(g, (struct nlm_host *)sysid); +} + +/* + * Alloc/free a sysid_t (a unique number between + * LM_SYSID and LM_SYSID_MAX). + * + * Used by NFSv4 rfs4_op_lockt and smbsrv/smb_fsop_frlock, + * both to represent non-local locks outside of klm. + * + * NOTE: called from NFSv4 and SMBFS to allocate unique + * sysid. + */ +sysid_t +lm_alloc_sysidt(void) +{ + return (nlm_sysid_alloc()); +} + +void +lm_free_sysidt(sysid_t sysid) +{ + nlm_sysid_free(sysid); +} + +/* Access private member lms->sysid */ +sysid_t +lm_sysidt(struct lm_sysid *lms) +{ + return (((struct nlm_host *)lms)->nh_sysid); +} + +/* + * Called by nfs_frlock to check lock constraints. + * Return non-zero if the lock request is "safe", i.e. + * the range is not mapped, not MANDLOCK, etc. + * + * NOTE: callde from NFSv3/NFSv2 frlock() functions to + * determine whether it's safe to add new lock. + */ +int +lm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr) +{ + return (nlm_safelock(vp, fl, cr)); +} + +/* + * Called by nfs_lockcompletion to check whether it's "safe" + * to map the file (and cache it's data). Walks the list of + * file locks looking for any that are not "whole file". + * + * NOTE: called from nfs_client.c:nfs_lockcompletion() + */ +int +lm_safemap(const vnode_t *vp) +{ + return (nlm_safemap(vp)); +} + +/* + * Called by nfs_map() for the MANDLOCK case. + * Return non-zero if the file has any locks with a + * blocked request (sleep). + * + * NOTE: called from NFSv3/NFSv2 map() functions in + * order to determine whether it's safe to add new + * mapping. + */ +int +lm_has_sleep(const vnode_t *vp) +{ + return (nlm_has_sleep(vp)); +} + +/* + * **************************************************************** + * Stuff needed by klmops? + */ diff --git a/usr/src/uts/common/klm/klmops.c b/usr/src/uts/common/klm/klmops.c new file mode 100644 index 0000000000..a8adbe76c1 --- /dev/null +++ b/usr/src/uts/common/klm/klmops.c @@ -0,0 +1,170 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy is of the CDDL is also available via the Internet + * at http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * NFS Lock Manager, client-side + * Note: depends on (links with) klmmod + * + * This file contains all the external entry points of klmops. + * Basically, this is the "glue" to the BSD nlm code. + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/modctl.h> +#include <sys/flock.h> + +#include <nfs/lm.h> +#include <rpcsvc/nlm_prot.h> +#include "nlm_impl.h" + + +static struct modlmisc modlmisc = { + &mod_miscops, "lock mgr calls" +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modlmisc, NULL +}; + + + +/* + * **************************************************************** + * module init, fini, info + */ +int +_init() +{ + return (mod_install(&modlinkage)); +} + +int +_fini() +{ + /* Don't unload. */ + return (EBUSY); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + + + +/* + * **************************************************************** + * Stubs listed in modstubs.s + * These are called from fs/nfs + */ + +/* + * NFSv2 lock/unlock. Called by nfs_frlock() + * Uses NLM version 1 (NLM_VERS) + */ +int +lm_frlock(struct vnode *vp, int cmd, struct flock64 *flk, int flags, + u_offset_t off, struct cred *cr, struct netobj *fh, + struct flk_callback *flcb) +{ + return (nlm_frlock(vp, cmd, flk, flags, off, + cr, fh, flcb, NLM_VERS)); +} + +/* + * NFSv3 lock/unlock. Called by nfs3_frlock() + * Uses NLM version 4 (NLM4_VERS) + */ +int +lm4_frlock(struct vnode *vp, int cmd, struct flock64 *flk, int flags, + u_offset_t off, struct cred *cr, struct netobj *fh, + struct flk_callback *flcb) +{ + int err; + err = nlm_frlock(vp, cmd, flk, flags, off, + cr, fh, flcb, NLM4_VERS); + return (err); +} + +/* + * NFSv2 shrlk/unshrlk. See nfs_shrlock + * Uses NLM version 3 (NLM_VERSX) + */ +int +lm_shrlock(struct vnode *vp, int cmd, + struct shrlock *shr, int flags, struct netobj *fh) +{ + return (nlm_shrlock(vp, cmd, shr, flags, fh, NLM_VERSX)); +} + +/* + * NFSv3 shrlk/unshrlk. See nfs3_shrlock + * Uses NLM version 4 (NLM4_VERS) + */ +int +lm4_shrlock(struct vnode *vp, int cmd, + struct shrlock *shr, int flags, struct netobj *fh) +{ + return (nlm_shrlock(vp, cmd, shr, flags, fh, NLM4_VERS)); +} + +/* + * Helper for lm_frlock, lm4_frlock, nfs_lockrelease + * After getting a lock from a remote lock manager, + * register the lock locally. + */ +void +lm_register_lock_locally(struct vnode *vp, struct lm_sysid *ls, + struct flock64 *flk, int flags, u_offset_t offset) +{ + nlm_register_lock_locally(vp, (struct nlm_host *)ls, + flk, flags, offset); +} + +/* + * Old RPC service dispatch functions, no longer used. + * Here only to satisfy modstubs.s references. + */ +void +lm_nlm_dispatch(struct svc_req *req, SVCXPRT *xprt) +{ + _NOTE(ARGUNUSED(req, xprt)) +} + +void +lm_nlm4_dispatch(struct svc_req *req, SVCXPRT *xprt) +{ + _NOTE(ARGUNUSED(req, xprt)) +} + +/* + * Old internal functions used for reclaiming locks + * our NFS client holds after some server restarts. + * The new NLM code does this differently, so these + * are here only to satisfy modstubs.s references. + */ +void +lm_nlm_reclaim(struct vnode *vp, struct flock64 *flkp) +{ + _NOTE(ARGUNUSED(vp, flkp)) +} + +void +lm_nlm4_reclaim(struct vnode *vp, struct flock64 *flkp) +{ + _NOTE(ARGUNUSED(vp, flkp)) +} diff --git a/usr/src/uts/common/klm/mapfile-mod b/usr/src/uts/common/klm/mapfile-mod new file mode 100644 index 0000000000..0debe6d986 --- /dev/null +++ b/usr/src/uts/common/klm/mapfile-mod @@ -0,0 +1,55 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + + +$mapfile_version 2 + +SYMBOL_SCOPE { + global: +# loadable module linkage + _fini; + _info; + _init; +# These are all the symbols referenced in ml/modstubs.s +# If we want to remain a drop-in replacment for the old +# (closed source) klm, we need to define all of these. + lm_alloc_sysidt; + lm_cprresume; + lm_cprsuspend; + lm_free_config; + lm_free_sysidt; + lm_get_sysid; + lm_global_nlmid; + lm_has_sleep; + lm_rel_sysid; + lm_remove_file_locks; + lm_safelock; + lm_safemap; + lm_set_nlmid_flk; + lm_shutdown; + lm_svc; + lm_sysidt; + lm_unexport; + lm_vp_active; +# The following three functions are not mentioned in modstubs.s +# files, because they are not an entry points to KLM. They +# are called from klmops only. + nlm_frlock; + nlm_register_lock_locally; + nlm_shrlock; + + local: + *; +}; diff --git a/usr/src/uts/common/klm/mapfile-ops b/usr/src/uts/common/klm/mapfile-ops new file mode 100644 index 0000000000..7696c1a7ce --- /dev/null +++ b/usr/src/uts/common/klm/mapfile-ops @@ -0,0 +1,40 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + + +$mapfile_version 2 + +SYMBOL_SCOPE { + global: +# loadable module linkage + _fini; + _info; + _init; +# These are all the symbols referenced in ml/modstubs.s +# If we want to remain a drop-in replacment for the old +# (closed source) klm, we need to define all of these. + + lm4_frlock; + lm4_shrlock; + lm_frlock; + lm_nlm4_dispatch; + lm_nlm4_reclaim; + lm_nlm_dispatch; + lm_nlm_reclaim; + lm_register_lock_locally; + + local: + *; +}; diff --git a/usr/src/uts/common/klm/nlm_client.c b/usr/src/uts/common/klm/nlm_client.c new file mode 100644 index 0000000000..ca513afb15 --- /dev/null +++ b/usr/src/uts/common/klm/nlm_client.c @@ -0,0 +1,1622 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * Client-side support for (NFS) VOP_FRLOCK, VOP_SHRLOCK. + * (called via klmops.c: lm_frlock, lm4_frlock) + * + * Source code derived from FreeBSD nlm_advlock.c + */ + +#include <sys/param.h> +#include <sys/fcntl.h> +#include <sys/lock.h> +#include <sys/flock.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/share.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/queue.h> +#include <sys/sdt.h> +#include <netinet/in.h> + +#include <fs/fs_subr.h> +#include <rpcsvc/nlm_prot.h> + +#include <nfs/nfs.h> +#include <nfs/nfs_clnt.h> +#include <nfs/export.h> +#include <nfs/rnode.h> +#include <nfs/lm.h> + +#include "nlm_impl.h" + +/* Extra flags for nlm_call_lock() - xflags */ +#define NLM_X_RECLAIM 1 +#define NLM_X_BLOCKING 2 + +/* + * Max. number of retries nlm_call_cancel() does + * when NLM server is in grace period or doesn't + * respond correctly. + */ +#define NLM_CANCEL_NRETRS 5 + +/* + * Determines wether given lock "flp" is safe. + * The lock is considered to be safe when it + * acquires the whole file (i.e. its start + * and len are zeroes). + */ +#define NLM_FLOCK_IS_SAFE(flp) \ + ((flp)->l_start == 0 && (flp)->l_len == 0) + +static volatile uint32_t nlm_xid = 1; + +static int nlm_init_fh_by_vp(vnode_t *, struct netobj *, rpcvers_t *); +static int nlm_map_status(nlm4_stats); +static int nlm_map_clnt_stat(enum clnt_stat); +static void nlm_send_siglost(pid_t); + +static int nlm_frlock_getlk(struct nlm_host *, vnode_t *, + struct flock64 *, int, u_offset_t, struct netobj *, int); + +static int nlm_frlock_setlk(struct nlm_host *, vnode_t *, + struct flock64 *, int, u_offset_t, struct netobj *, + struct flk_callback *, int, bool_t); + +static int nlm_reclaim_lock(struct nlm_host *, vnode_t *, + struct flock64 *, int32_t); + +static void nlm_init_lock(struct nlm4_lock *, + const struct flock64 *, struct netobj *, + struct nlm_owner_handle *); + +static int nlm_call_lock(vnode_t *, struct flock64 *, + struct nlm_host *, struct netobj *, + struct flk_callback *, int, int); +static int nlm_call_unlock(struct flock64 *, struct nlm_host *, + struct netobj *, int); +static int nlm_call_test(struct flock64 *, struct nlm_host *, + struct netobj *, int); +static int nlm_call_cancel(struct nlm4_lockargs *, + struct nlm_host *, int); + +static int nlm_local_getlk(vnode_t *, struct flock64 *, int); +static int nlm_local_setlk(vnode_t *, struct flock64 *, int); +static void nlm_local_cancelk(vnode_t *, struct flock64 *); + +static void nlm_init_share(struct nlm4_share *, + const struct shrlock *, struct netobj *); + +static int nlm_call_share(struct shrlock *, struct nlm_host *, + struct netobj *, int, int); +static int nlm_call_unshare(struct shrlock *, struct nlm_host *, + struct netobj *, int); +static int nlm_reclaim_share(struct nlm_host *, vnode_t *, + struct shrlock *, uint32_t); +static int nlm_local_shrlock(vnode_t *, struct shrlock *, int, int); +static void nlm_local_shrcancel(vnode_t *, struct shrlock *); + +/* + * Reclaim locks/shares acquired by the client side + * on the given server represented by hostp. + * The function is called from a dedicated thread + * when server reports us that it's entered grace + * period. + */ +void +nlm_reclaim_client(struct nlm_globals *g, struct nlm_host *hostp) +{ + int32_t state; + int error, sysid; + struct locklist *llp_head, *llp; + struct nlm_shres *nsp_head, *nsp; + bool_t restart; + + sysid = hostp->nh_sysid | LM_SYSID_CLIENT; + do { + error = 0; + restart = FALSE; + state = nlm_host_get_state(hostp); + + DTRACE_PROBE3(reclaim__iter, struct nlm_globals *, g, + struct nlm_host *, hostp, int, state); + + /* + * We cancel all sleeping locks that were + * done by the host, because we don't allow + * reclamation of sleeping locks. The reason + * we do this is that allowing of sleeping locks + * reclamation can potentially break locks recovery + * order. + * + * Imagine that we have two client machines A and B + * and an NLM server machine. A adds a non sleeping + * lock to the file F and aquires this file. Machine + * B in its turn adds sleeping lock to the file + * F and blocks because F is already aquired by + * the machine A. Then server crashes and after the + * reboot it notifies its clients about the crash. + * If we would allow sleeping locks reclamation, + * there would be possible that machine B recovers + * its lock faster than machine A (by some reason). + * So that B aquires the file F after server crash and + * machine A (that by some reason recovers slower) fails + * to recover its non sleeping lock. Thus the original + * locks order becames broken. + */ + nlm_host_cancel_slocks(g, hostp); + + /* + * Try to reclaim all active locks we have + */ + llp_head = llp = flk_get_active_locks(sysid, NOPID); + while (llp != NULL) { + error = nlm_reclaim_lock(hostp, llp->ll_vp, + &llp->ll_flock, state); + + if (error == 0) { + llp = llp->ll_next; + continue; + } else if (error == ERESTART) { + restart = TRUE; + break; + } else { + /* + * Critical error occurred, the lock + * can not be recovered, just take it away. + */ + nlm_local_cancelk(llp->ll_vp, &llp->ll_flock); + } + + llp = llp->ll_next; + } + + flk_free_locklist(llp_head); + if (restart) { + /* + * Lock reclamation fucntion reported us that + * the server state was changed (again), so + * try to repeat the whole reclamation process. + */ + continue; + } + + nsp_head = nsp = nlm_get_active_shres(hostp); + while (nsp != NULL) { + error = nlm_reclaim_share(hostp, nsp->ns_vp, + nsp->ns_shr, state); + + if (error == 0) { + nsp = nsp->ns_next; + continue; + } else if (error == ERESTART) { + break; + } else { + /* Failed to reclaim share */ + nlm_shres_untrack(hostp, nsp->ns_vp, + nsp->ns_shr); + nlm_local_shrcancel(nsp->ns_vp, + nsp->ns_shr); + } + + nsp = nsp->ns_next; + } + + nlm_free_shrlist(nsp_head); + } while (state != nlm_host_get_state(hostp)); +} + +/* + * nlm_frlock -- + * NFS advisory byte-range locks. + * Called in klmops.c + * + * Note that the local locking code (os/flock.c) is used to + * keep track of remote locks granted by some server, so we + * can reclaim those locks after a server restarts. We can + * also sometimes use this as a cache of lock information. + * + * Was: nlm_advlock() + */ +/* ARGSUSED */ +int +nlm_frlock(struct vnode *vp, int cmd, struct flock64 *flkp, + int flags, u_offset_t offset, struct cred *crp, + struct netobj *fhp, struct flk_callback *flcb, int vers) +{ + mntinfo_t *mi; + servinfo_t *sv; + const char *netid; + struct nlm_host *hostp; + int error; + struct nlm_globals *g; + + mi = VTOMI(vp); + sv = mi->mi_curr_serv; + + netid = nlm_knc_to_netid(sv->sv_knconf); + if (netid == NULL) { + NLM_ERR("nlm_frlock: unknown NFS netid"); + return (ENOSYS); + } + + g = zone_getspecific(nlm_zone_key, curzone); + hostp = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr); + if (hostp == NULL) + return (ENOSYS); + + /* + * Purge cached attributes in order to make sure that + * future calls of convoff()/VOP_GETATTR() will get the + * latest data. + */ + if (flkp->l_whence == SEEK_END) + PURGE_ATTRCACHE(vp); + + /* Now flk0 is the zero-based lock request. */ + switch (cmd) { + case F_GETLK: + error = nlm_frlock_getlk(hostp, vp, flkp, flags, + offset, fhp, vers); + break; + + case F_SETLK: + case F_SETLKW: + error = nlm_frlock_setlk(hostp, vp, flkp, flags, + offset, fhp, flcb, vers, (cmd == F_SETLKW)); + if (error == 0) + nlm_host_monitor(g, hostp, 0); + break; + + default: + error = EINVAL; + break; + } + + nlm_host_release(g, hostp); + return (error); +} + +static int +nlm_frlock_getlk(struct nlm_host *hostp, vnode_t *vp, + struct flock64 *flkp, int flags, u_offset_t offset, + struct netobj *fhp, int vers) +{ + struct flock64 flk0; + int error; + + /* + * Check local (cached) locks first. + * If we find one, no need for RPC. + */ + flk0 = *flkp; + flk0.l_pid = curproc->p_pid; + error = nlm_local_getlk(vp, &flk0, flags); + if (error != 0) + return (error); + if (flk0.l_type != F_UNLCK) { + *flkp = flk0; + return (0); + } + + /* Not found locally. Try remote. */ + flk0 = *flkp; + flk0.l_pid = curproc->p_pid; + error = convoff(vp, &flk0, 0, (offset_t)offset); + if (error != 0) + return (error); + + error = nlm_call_test(&flk0, hostp, fhp, vers); + if (error != 0) + return (error); + + if (flk0.l_type == F_UNLCK) { + /* + * Update the caller's *flkp with information + * on the conflicting lock (or lack thereof). + */ + flkp->l_type = F_UNLCK; + } else { + /* + * Found a conflicting lock. Set the + * caller's *flkp with the info, first + * converting to the caller's whence. + */ + (void) convoff(vp, &flk0, flkp->l_whence, (offset_t)offset); + *flkp = flk0; + } + + return (0); +} + +static int +nlm_frlock_setlk(struct nlm_host *hostp, vnode_t *vp, + struct flock64 *flkp, int flags, u_offset_t offset, + struct netobj *fhp, struct flk_callback *flcb, + int vers, bool_t do_block) +{ + int error, xflags; + + error = convoff(vp, flkp, 0, (offset_t)offset); + if (error != 0) + return (error); + + /* + * NFS v2 clients should not request locks where any part + * of the lock range is beyond 0xffffffff. The NFS code + * checks that (see nfs_frlock, flk_check_lock_data), but + * as that's outside this module, let's check here too. + * This check ensures that we will be able to convert this + * lock request into 32-bit form without change, and that + * (more importantly) when the granted call back arrives, + * it's unchanged when converted back into 64-bit form. + * If this lock range were to change in any way during + * either of those conversions, the "granted" call back + * from the NLM server would not find our sleeping lock. + */ + if (vers < NLM4_VERS) { + if (flkp->l_start > MAX_UOFF32 || + flkp->l_start + flkp->l_len > MAX_UOFF32 + 1) + return (EINVAL); + } + + /* + * Fill in l_sysid for the local locking calls. + * Also, let's not trust the caller's l_pid. + */ + flkp->l_sysid = hostp->nh_sysid | LM_SYSID_CLIENT; + flkp->l_pid = curproc->p_pid; + + if (flkp->l_type == F_UNLCK) { + /* + * Purge local (cached) lock information first, + * then clear the remote lock. + */ + (void) nlm_local_setlk(vp, flkp, flags); + error = nlm_call_unlock(flkp, hostp, fhp, vers); + + return (error); + } + + if (!do_block) { + /* + * This is a non-blocking "set" request, + * so we can check locally first, and + * sometimes avoid an RPC call. + */ + struct flock64 flk0; + + flk0 = *flkp; + error = nlm_local_getlk(vp, &flk0, flags); + if (error != 0 && flk0.l_type != F_UNLCK) { + /* Found a conflicting lock. */ + return (EAGAIN); + } + + xflags = 0; + } else { + xflags = NLM_X_BLOCKING; + } + + nfs_add_locking_id(vp, curproc->p_pid, RLMPL_PID, + (char *)&curproc->p_pid, sizeof (pid_t)); + + error = nlm_call_lock(vp, flkp, hostp, fhp, flcb, vers, xflags); + if (error != 0) + return (error); + + /* + * Save the lock locally. This should not fail, + * because the server is authoritative about locks + * and it just told us we have the lock! + */ + error = nlm_local_setlk(vp, flkp, flags); + if (error != 0) { + /* + * That's unexpected situation. Just ignore the error. + */ + NLM_WARN("nlm_frlock_setlk: Failed to set local lock. " + "[err=%d]\n", error); + error = 0; + } + + return (error); +} + +/* + * Cancel all client side remote locks/shares on the + * given host. Report to the processes that own + * cancelled locks that they are removed by force + * by sending SIGLOST. + */ +void +nlm_client_cancel_all(struct nlm_globals *g, struct nlm_host *hostp) +{ + struct locklist *llp_head, *llp; + struct nlm_shres *nsp_head, *nsp; + struct netobj lm_fh; + rpcvers_t vers; + int error, sysid; + + sysid = hostp->nh_sysid | LM_SYSID_CLIENT; + nlm_host_cancel_slocks(g, hostp); + + /* + * Destroy all active locks + */ + llp_head = llp = flk_get_active_locks(sysid, NOPID); + while (llp != NULL) { + llp->ll_flock.l_type = F_UNLCK; + + error = nlm_init_fh_by_vp(llp->ll_vp, &lm_fh, &vers); + if (error == 0) + (void) nlm_call_unlock(&llp->ll_flock, hostp, + &lm_fh, vers); + + nlm_local_cancelk(llp->ll_vp, &llp->ll_flock); + llp = llp->ll_next; + } + + flk_free_locklist(llp_head); + + /* + * Destroy all active share reservations + */ + nsp_head = nsp = nlm_get_active_shres(hostp); + while (nsp != NULL) { + error = nlm_init_fh_by_vp(nsp->ns_vp, &lm_fh, &vers); + if (error == 0) + (void) nlm_call_unshare(nsp->ns_shr, hostp, + &lm_fh, vers); + + nlm_local_shrcancel(nsp->ns_vp, nsp->ns_shr); + nlm_shres_untrack(hostp, nsp->ns_vp, nsp->ns_shr); + nsp = nsp->ns_next; + } + + nlm_free_shrlist(nsp_head); +} + +/* + * The function determines whether the lock "fl" can + * be safely applied to the file vnode "vp" corresponds to. + * The lock can be "safely" applied if all the conditions + * above are held: + * - It's not a mandatory lock + * - The vnode wasn't mapped by anyone + * - The vnode was mapped, but it hasn't any locks on it. + * - The vnode was mapped and all locks it has occupies + * the whole file. + */ +int +nlm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr) +{ + rnode_t *rp = VTOR(vp); + struct vattr va; + int err; + + if ((rp->r_mapcnt > 0) && (fl->l_start != 0 || fl->l_len != 0)) + return (0); + + va.va_mask = AT_MODE; + err = VOP_GETATTR(vp, &va, 0, cr, NULL); + if (err != 0) + return (0); + + /* NLM4 doesn't allow mandatory file locking */ + if (MANDLOCK(vp, va.va_mode)) + return (0); + + return (1); +} + +/* + * The function determines whether it's safe to map + * a file correspoding to vnode vp. + * The mapping is considered to be "safe" if file + * either has no any locks on it or all locks it + * has occupy the whole file. + */ +int +nlm_safemap(const vnode_t *vp) +{ + struct locklist *llp, *llp_next; + struct nlm_slock *nslp; + struct nlm_globals *g; + int safe = 1; + + /* Check active locks at first */ + llp = flk_active_locks_for_vp(vp); + while (llp != NULL) { + if ((llp->ll_vp == vp) && + !NLM_FLOCK_IS_SAFE(&llp->ll_flock)) + safe = 0; + + llp_next = llp->ll_next; + VN_RELE(llp->ll_vp); + kmem_free(llp, sizeof (*llp)); + llp = llp_next; + } + if (!safe) + return (safe); + + /* Then check sleeping locks if any */ + g = zone_getspecific(nlm_zone_key, curzone); + mutex_enter(&g->lock); + TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { + if (nslp->nsl_state == NLM_SL_BLOCKED && + nslp->nsl_vp == vp && + (nslp->nsl_lock.l_offset != 0 || + nslp->nsl_lock.l_len != 0)) { + safe = 0; + break; + } + } + + mutex_exit(&g->lock); + return (safe); +} + +int +nlm_has_sleep(const vnode_t *vp) +{ + struct nlm_globals *g; + struct nlm_slock *nslp; + int has_slocks = FALSE; + + g = zone_getspecific(nlm_zone_key, curzone); + mutex_enter(&g->lock); + TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { + if (nslp->nsl_state == NLM_SL_BLOCKED && + nslp->nsl_vp == vp) { + has_slocks = TRUE; + break; + } + } + + mutex_exit(&g->lock); + return (has_slocks); +} + +void +nlm_register_lock_locally(struct vnode *vp, struct nlm_host *hostp, + struct flock64 *flk, int flags, u_offset_t offset) +{ + int sysid = 0; + + if (hostp != NULL) { + sysid = hostp->nh_sysid | LM_SYSID_CLIENT; + } + + flk->l_sysid = sysid; + (void) convoff(vp, flk, 0, (offset_t)offset); + (void) nlm_local_setlk(vp, flk, flags); +} + + +/* + * The BSD code had functions here to "reclaim" (destroy) + * remote locks when a vnode is being forcibly destroyed. + * We just keep vnodes around until statd tells us the + * client has gone away. + */ + +static int +nlm_reclaim_lock(struct nlm_host *hostp, vnode_t *vp, + struct flock64 *flp, int32_t orig_state) +{ + struct netobj lm_fh; + int error, state; + rpcvers_t vers; + + /* + * If the remote NSM state changes during recovery, the host + * must have rebooted a second time. In that case, we must + * restart the recovery. + */ + state = nlm_host_get_state(hostp); + if (state != orig_state) + return (ERESTART); + + error = nlm_init_fh_by_vp(vp, &lm_fh, &vers); + if (error != 0) + return (error); + + return (nlm_call_lock(vp, flp, hostp, &lm_fh, + NULL, vers, NLM_X_RECLAIM)); +} + +/* + * Get local lock information for some NFS server. + * + * This gets (checks for) a local conflicting lock. + * Note: Modifies passed flock, if a conflict is found, + * but the caller expects that. + */ +static int +nlm_local_getlk(vnode_t *vp, struct flock64 *fl, int flags) +{ + VERIFY(fl->l_whence == SEEK_SET); + return (reclock(vp, fl, 0, flags, 0, NULL)); +} + +/* + * Set local lock information for some NFS server. + * + * Called after a lock request (set or clear) succeeded. We record the + * details in the local lock manager. Note that since the remote + * server has granted the lock, we can be sure that it doesn't + * conflict with any other locks we have in the local lock manager. + * + * Since it is possible that host may also make NLM client requests to + * our NLM server, we use a different sysid value to record our own + * client locks. + * + * Note that since it is possible for us to receive replies from the + * server in a different order than the locks were granted (e.g. if + * many local threads are contending for the same lock), we must use a + * blocking operation when registering with the local lock manager. + * We expect that any actual wait will be rare and short hence we + * ignore signals for this. + */ +static int +nlm_local_setlk(vnode_t *vp, struct flock64 *fl, int flags) +{ + VERIFY(fl->l_whence == SEEK_SET); + return (reclock(vp, fl, SETFLCK, flags, 0, NULL)); +} + +/* + * Cancel local lock and send send SIGLOST signal + * to the lock owner. + * + * NOTE: modifies flp + */ +static void +nlm_local_cancelk(vnode_t *vp, struct flock64 *flp) +{ + flp->l_type = F_UNLCK; + (void) nlm_local_setlk(vp, flp, FREAD | FWRITE); + nlm_send_siglost(flp->l_pid); +} + +/* + * Do NLM_LOCK call. + * Was: nlm_setlock() + * + * NOTE: nlm_call_lock() function should care about locking/unlocking + * of rnode->r_lkserlock which should be released before nlm_call_lock() + * sleeps on waiting lock and acquired when it wakes up. + */ +static int +nlm_call_lock(vnode_t *vp, struct flock64 *flp, + struct nlm_host *hostp, struct netobj *fhp, + struct flk_callback *flcb, int vers, int xflags) +{ + struct nlm4_lockargs args; + struct nlm_owner_handle oh; + struct nlm_globals *g; + rnode_t *rnp = VTOR(vp); + struct nlm_slock *nslp = NULL; + uint32_t xid; + int error = 0; + + bzero(&args, sizeof (args)); + g = zone_getspecific(nlm_zone_key, curzone); + nlm_init_lock(&args.alock, flp, fhp, &oh); + + args.exclusive = (flp->l_type == F_WRLCK); + args.reclaim = xflags & NLM_X_RECLAIM; + args.state = g->nsm_state; + args.cookie.n_len = sizeof (xid); + args.cookie.n_bytes = (char *)&xid; + + oh.oh_sysid = hostp->nh_sysid; + xid = atomic_inc_32_nv(&nlm_xid); + + if (xflags & NLM_X_BLOCKING) { + args.block = TRUE; + nslp = nlm_slock_register(g, hostp, &args.alock, vp); + } + + for (;;) { + nlm_rpc_t *rpcp; + enum clnt_stat stat; + struct nlm4_res res; + enum nlm4_stats nlm_err; + + error = nlm_host_get_rpc(hostp, vers, &rpcp); + if (error != 0) { + error = ENOLCK; + goto out; + } + + bzero(&res, sizeof (res)); + stat = nlm_lock_rpc(&args, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(hostp, rpcp); + + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + goto out; + } + + DTRACE_PROBE1(lock__res, enum nlm4_stats, res.stat.stat); + nlm_err = res.stat.stat; + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res); + if (nlm_err == nlm4_denied_grace_period) { + if (args.reclaim) { + error = ENOLCK; + goto out; + } + + error = nlm_host_wait_grace(hostp); + if (error != 0) + goto out; + + continue; + } + + switch (nlm_err) { + case nlm4_granted: + case nlm4_blocked: + error = 0; + break; + + case nlm4_denied: + if (nslp != NULL) { + NLM_WARN("nlm_call_lock: got nlm4_denied for " + "blocking lock\n"); + } + + error = EAGAIN; + break; + + default: + error = nlm_map_status(nlm_err); + } + + /* + * If we deal with either non-blocking lock or + * with a blocking locks that wasn't blocked on + * the server side (by some reason), our work + * is finished. + */ + if (nslp == NULL || + nlm_err != nlm4_blocked || + error != 0) + goto out; + + /* + * Before releasing the r_lkserlock of rnode, we should + * check whether the new lock is "safe". If it's not + * safe, disable caching for the given vnode. That is done + * for sleeping locks only that are waiting for a GRANT reply + * from the NLM server. + * + * NOTE: the vnode cache can be enabled back later if an + * unsafe lock will be merged with existent locks so that + * it will become safe. This condition is checked in the + * NFSv3 code (see nfs_lockcompletion). + */ + if (!NLM_FLOCK_IS_SAFE(flp)) { + mutex_enter(&vp->v_lock); + vp->v_flag &= ~VNOCACHE; + mutex_exit(&vp->v_lock); + } + + /* + * The server should call us back with a + * granted message when the lock succeeds. + * In order to deal with broken servers, + * lost granted messages, or server reboots, + * we will also re-try every few seconds. + * + * Note: We're supposed to call these + * flk_invoke_callbacks when blocking. + * Take care on rnode->r_lkserlock, we should + * release it before going to sleep. + */ + (void) flk_invoke_callbacks(flcb, FLK_BEFORE_SLEEP); + nfs_rw_exit(&rnp->r_lkserlock); + + error = nlm_slock_wait(g, nslp, g->retrans_tmo); + + /* + * NFS expects that we return with rnode->r_lkserlock + * locked on write, lock it back. + * + * NOTE: nfs_rw_enter_sig() can be either interruptible + * or not. It depends on options of NFS mount. Here + * we're _always_ uninterruptible (independently of mount + * options), because nfs_frlock/nfs3_frlock expects that + * we return with rnode->r_lkserlock acquired. So we don't + * want our lock attempt to be interrupted by a signal. + */ + (void) nfs_rw_enter_sig(&rnp->r_lkserlock, RW_WRITER, 0); + (void) flk_invoke_callbacks(flcb, FLK_AFTER_SLEEP); + + if (error == 0) { + break; + } else if (error == EINTR) { + /* + * We need to call the server to cancel our + * lock request. + */ + DTRACE_PROBE1(cancel__lock, int, error); + (void) nlm_call_cancel(&args, hostp, vers); + break; + } else { + /* + * Timeout happened, resend the lock request to + * the server. Well, we're a bit paranoid here, + * but keep in mind previous request could lost + * (especially with conectionless transport). + */ + + ASSERT(error == ETIMEDOUT); + continue; + } + } + + /* + * We could disable the vnode cache for the given _sleeping_ + * (codition: nslp != NULL) lock if it was unsafe. Normally, + * nfs_lockcompletion() function can enable the vnode cache + * back if the lock becomes safe after activativation. But it + * will not happen if any error occurs on the locking path. + * + * Here we enable the vnode cache back if the error occurred + * and if there aren't any unsafe locks on the given vnode. + * Note that if error happened, sleeping lock was derigistered. + */ + if (error != 0 && nslp != NULL && nlm_safemap(vp)) { + mutex_enter(&vp->v_lock); + vp->v_flag |= VNOCACHE; + mutex_exit(&vp->v_lock); + } + +out: + if (nslp != NULL) + nlm_slock_unregister(g, nslp); + + return (error); +} + +/* + * Do NLM_CANCEL call. + * Helper for nlm_call_lock() error recovery. + */ +static int +nlm_call_cancel(struct nlm4_lockargs *largs, + struct nlm_host *hostp, int vers) +{ + nlm4_cancargs cargs; + uint32_t xid; + int error, retries; + + bzero(&cargs, sizeof (cargs)); + + xid = atomic_inc_32_nv(&nlm_xid); + cargs.cookie.n_len = sizeof (xid); + cargs.cookie.n_bytes = (char *)&xid; + cargs.block = largs->block; + cargs.exclusive = largs->exclusive; + cargs.alock = largs->alock; + + /* + * Unlike all other nlm_call_* functions, nlm_call_cancel + * doesn't spin forever until it gets reasonable response + * from NLM server. It makes limited number of retries and + * if server doesn't send a reasonable reply, it returns an + * error. It behaves like that because it's called from nlm_call_lock + * with blocked signals and thus it can not be interrupted from + * user space. + */ + for (retries = 0; retries < NLM_CANCEL_NRETRS; retries++) { + nlm_rpc_t *rpcp; + enum clnt_stat stat; + struct nlm4_res res; + + error = nlm_host_get_rpc(hostp, vers, &rpcp); + if (error != 0) + return (ENOLCK); + + bzero(&res, sizeof (res)); + stat = nlm_cancel_rpc(&cargs, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(hostp, rpcp); + + DTRACE_PROBE1(cancel__rloop_end, enum clnt_stat, stat); + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + return (error); + } + + DTRACE_PROBE1(cancel__res, enum nlm4_stats, res.stat.stat); + switch (res.stat.stat) { + /* + * There was nothing to cancel. We are going to go ahead + * and assume we got the lock. + */ + case nlm_denied: + /* + * The server has recently rebooted. Treat this as a + * successful cancellation. + */ + case nlm4_denied_grace_period: + /* + * We managed to cancel. + */ + case nlm4_granted: + error = 0; + break; + + default: + /* + * Broken server implementation. Can't really do + * anything here. + */ + error = EIO; + break; + } + + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res); + break; + } + + return (error); +} + +/* + * Do NLM_UNLOCK call. + * Was: nlm_clearlock + */ +static int +nlm_call_unlock(struct flock64 *flp, struct nlm_host *hostp, + struct netobj *fhp, int vers) +{ + struct nlm4_unlockargs args; + struct nlm_owner_handle oh; + enum nlm4_stats nlm_err; + uint32_t xid; + int error; + + bzero(&args, sizeof (args)); + nlm_init_lock(&args.alock, flp, fhp, &oh); + + oh.oh_sysid = hostp->nh_sysid; + xid = atomic_inc_32_nv(&nlm_xid); + args.cookie.n_len = sizeof (xid); + args.cookie.n_bytes = (char *)&xid; + + for (;;) { + nlm_rpc_t *rpcp; + struct nlm4_res res; + enum clnt_stat stat; + + error = nlm_host_get_rpc(hostp, vers, &rpcp); + if (error != 0) + return (ENOLCK); + + bzero(&res, sizeof (res)); + stat = nlm_unlock_rpc(&args, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(hostp, rpcp); + + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + return (error); + } + + DTRACE_PROBE1(unlock__res, enum nlm4_stats, res.stat.stat); + nlm_err = res.stat.stat; + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res); + if (nlm_err == nlm4_denied_grace_period) { + error = nlm_host_wait_grace(hostp); + if (error != 0) + return (error); + + continue; + } + + break; + } + + /* special cases */ + switch (nlm_err) { + case nlm4_denied: + error = EINVAL; + break; + default: + error = nlm_map_status(nlm_err); + break; + } + + return (error); +} + +/* + * Do NLM_TEST call. + * Was: nlm_getlock() + */ +static int +nlm_call_test(struct flock64 *flp, struct nlm_host *hostp, + struct netobj *fhp, int vers) +{ + struct nlm4_testargs args; + struct nlm4_holder h; + struct nlm_owner_handle oh; + enum nlm4_stats nlm_err; + uint32_t xid; + int error; + + bzero(&args, sizeof (args)); + nlm_init_lock(&args.alock, flp, fhp, &oh); + + args.exclusive = (flp->l_type == F_WRLCK); + oh.oh_sysid = hostp->nh_sysid; + xid = atomic_inc_32_nv(&nlm_xid); + args.cookie.n_len = sizeof (xid); + args.cookie.n_bytes = (char *)&xid; + + for (;;) { + nlm_rpc_t *rpcp; + struct nlm4_testres res; + enum clnt_stat stat; + + error = nlm_host_get_rpc(hostp, vers, &rpcp); + if (error != 0) + return (ENOLCK); + + bzero(&res, sizeof (res)); + stat = nlm_test_rpc(&args, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(hostp, rpcp); + + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + return (error); + } + + DTRACE_PROBE1(test__res, enum nlm4_stats, res.stat.stat); + nlm_err = res.stat.stat; + bcopy(&res.stat.nlm4_testrply_u.holder, &h, sizeof (h)); + xdr_free((xdrproc_t)xdr_nlm4_testres, (void *)&res); + if (nlm_err == nlm4_denied_grace_period) { + error = nlm_host_wait_grace(hostp); + if (error != 0) + return (error); + + continue; + } + + break; + } + + switch (nlm_err) { + case nlm4_granted: + flp->l_type = F_UNLCK; + error = 0; + break; + + case nlm4_denied: + flp->l_start = h.l_offset; + flp->l_len = h.l_len; + flp->l_pid = h.svid; + flp->l_type = (h.exclusive) ? F_WRLCK : F_RDLCK; + flp->l_whence = SEEK_SET; + flp->l_sysid = 0; + error = 0; + break; + + default: + error = nlm_map_status(nlm_err); + break; + } + + return (error); +} + + +static void +nlm_init_lock(struct nlm4_lock *lock, + const struct flock64 *fl, struct netobj *fh, + struct nlm_owner_handle *oh) +{ + + /* Caller converts to zero-base. */ + VERIFY(fl->l_whence == SEEK_SET); + bzero(lock, sizeof (*lock)); + bzero(oh, sizeof (*oh)); + + lock->caller_name = uts_nodename(); + lock->fh.n_len = fh->n_len; + lock->fh.n_bytes = fh->n_bytes; + lock->oh.n_len = sizeof (*oh); + lock->oh.n_bytes = (void *)oh; + lock->svid = fl->l_pid; + lock->l_offset = fl->l_start; + lock->l_len = fl->l_len; +} + +/* ************************************************************** */ + +int +nlm_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, + int flags, struct netobj *fh, int vers) +{ + struct shrlock shlk; + mntinfo_t *mi; + servinfo_t *sv; + const char *netid; + struct nlm_host *host = NULL; + int error; + struct nlm_globals *g; + + mi = VTOMI(vp); + sv = mi->mi_curr_serv; + + netid = nlm_knc_to_netid(sv->sv_knconf); + if (netid == NULL) { + NLM_ERR("nlm_shrlock: unknown NFS netid\n"); + return (ENOSYS); + } + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr); + if (host == NULL) + return (ENOSYS); + + /* + * Fill in s_sysid for the local locking calls. + * Also, let's not trust the caller's l_pid. + */ + shlk = *shr; + shlk.s_sysid = host->nh_sysid | LM_SYSID_CLIENT; + shlk.s_pid = curproc->p_pid; + + if (cmd == F_UNSHARE) { + /* + * Purge local (cached) share information first, + * then clear the remote share. + */ + (void) nlm_local_shrlock(vp, &shlk, cmd, flags); + nlm_shres_untrack(host, vp, &shlk); + error = nlm_call_unshare(&shlk, host, fh, vers); + goto out; + } + + nfs_add_locking_id(vp, curproc->p_pid, RLMPL_OWNER, + shr->s_owner, shr->s_own_len); + + error = nlm_call_share(&shlk, host, fh, vers, FALSE); + if (error != 0) + goto out; + + /* + * Save the share locally. This should not fail, + * because the server is authoritative about shares + * and it just told us we have the share reservation! + */ + error = nlm_local_shrlock(vp, shr, cmd, flags); + if (error != 0) { + /* + * Oh oh, we really don't expect an error here. + */ + NLM_WARN("nlm_shrlock: set locally, err %d\n", error); + error = 0; + } + + nlm_shres_track(host, vp, &shlk); + nlm_host_monitor(g, host, 0); + +out: + nlm_host_release(g, host); + + return (error); +} + +static int +nlm_reclaim_share(struct nlm_host *hostp, vnode_t *vp, + struct shrlock *shr, uint32_t orig_state) +{ + struct netobj lm_fh; + int error, state; + rpcvers_t vers; + + state = nlm_host_get_state(hostp); + if (state != orig_state) { + /* + * It seems that NLM server rebooted while + * we were busy with recovery. + */ + return (ERESTART); + } + + error = nlm_init_fh_by_vp(vp, &lm_fh, &vers); + if (error != 0) + return (error); + + return (nlm_call_share(shr, hostp, &lm_fh, vers, 1)); +} + +/* + * Set local share information for some NFS server. + * + * Called after a share request (set or clear) succeeded. We record + * the details in the local lock manager. Note that since the remote + * server has granted the share, we can be sure that it doesn't + * conflict with any other shares we have in the local lock manager. + * + * Since it is possible that host may also make NLM client requests to + * our NLM server, we use a different sysid value to record our own + * client shares. + */ +int +nlm_local_shrlock(vnode_t *vp, struct shrlock *shr, int cmd, int flags) +{ + return (fs_shrlock(vp, cmd, shr, flags, CRED(), NULL)); +} + +static void +nlm_local_shrcancel(vnode_t *vp, struct shrlock *shr) +{ + (void) nlm_local_shrlock(vp, shr, F_UNSHARE, FREAD | FWRITE); + nlm_send_siglost(shr->s_pid); +} + +/* + * Do NLM_SHARE call. + * Was: nlm_setshare() + */ +static int +nlm_call_share(struct shrlock *shr, struct nlm_host *host, + struct netobj *fh, int vers, int reclaim) +{ + struct nlm4_shareargs args; + enum nlm4_stats nlm_err; + uint32_t xid; + int error; + + bzero(&args, sizeof (args)); + nlm_init_share(&args.share, shr, fh); + + args.reclaim = reclaim; + xid = atomic_inc_32_nv(&nlm_xid); + args.cookie.n_len = sizeof (xid); + args.cookie.n_bytes = (char *)&xid; + + + for (;;) { + nlm_rpc_t *rpcp; + struct nlm4_shareres res; + enum clnt_stat stat; + + error = nlm_host_get_rpc(host, vers, &rpcp); + if (error != 0) + return (ENOLCK); + + bzero(&res, sizeof (res)); + stat = nlm_share_rpc(&args, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(host, rpcp); + + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + return (error); + } + + DTRACE_PROBE1(share__res, enum nlm4_stats, res.stat); + nlm_err = res.stat; + xdr_free((xdrproc_t)xdr_nlm4_shareres, (void *)&res); + if (nlm_err == nlm4_denied_grace_period) { + if (args.reclaim) + return (ENOLCK); + + error = nlm_host_wait_grace(host); + if (error != 0) + return (error); + + continue; + } + + break; + } + + switch (nlm_err) { + case nlm4_granted: + error = 0; + break; + case nlm4_blocked: + case nlm4_denied: + error = EAGAIN; + break; + case nlm4_denied_nolocks: + case nlm4_deadlck: + error = ENOLCK; + break; + default: + error = EINVAL; + break; + } + + return (error); +} + +/* + * Do NLM_UNSHARE call. + */ +static int +nlm_call_unshare(struct shrlock *shr, struct nlm_host *host, + struct netobj *fh, int vers) +{ + struct nlm4_shareargs args; + enum nlm4_stats nlm_err; + uint32_t xid; + int error; + + bzero(&args, sizeof (args)); + nlm_init_share(&args.share, shr, fh); + + xid = atomic_inc_32_nv(&nlm_xid); + args.cookie.n_len = sizeof (xid); + args.cookie.n_bytes = (char *)&xid; + + for (;;) { + nlm_rpc_t *rpcp; + struct nlm4_shareres res; + enum clnt_stat stat; + + error = nlm_host_get_rpc(host, vers, &rpcp); + if (error != 0) + return (ENOLCK); + + bzero(&res, sizeof (res)); + stat = nlm_unshare_rpc(&args, &res, rpcp->nr_handle, vers); + nlm_host_rele_rpc(host, rpcp); + + error = nlm_map_clnt_stat(stat); + if (error != 0) { + if (error == EAGAIN) + continue; + + return (error); + } + + DTRACE_PROBE1(unshare__res, enum nlm4_stats, res.stat); + nlm_err = res.stat; + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res); + if (nlm_err == nlm4_denied_grace_period) { + error = nlm_host_wait_grace(host); + if (error != 0) + return (error); + + continue; + } + + break; + } + + switch (nlm_err) { + case nlm4_granted: + error = 0; + break; + case nlm4_denied: + error = EAGAIN; + break; + case nlm4_denied_nolocks: + error = ENOLCK; + break; + default: + error = EINVAL; + break; + } + + return (error); +} + +static void +nlm_init_share(struct nlm4_share *args, + const struct shrlock *shr, struct netobj *fh) +{ + + bzero(args, sizeof (*args)); + + args->caller_name = uts_nodename(); + args->fh.n_len = fh->n_len; + args->fh.n_bytes = fh->n_bytes; + args->oh.n_len = shr->s_own_len; + args->oh.n_bytes = (void *)shr->s_owner; + + switch (shr->s_deny) { + default: + case F_NODNY: + args->mode = fsm_DN; + break; + case F_RDDNY: + args->mode = fsm_DR; + break; + case F_WRDNY: + args->mode = fsm_DW; + break; + case F_RWDNY: + args->mode = fsm_DRW; + break; + } + + switch (shr->s_access) { + default: + case 0: /* seen with F_UNSHARE */ + args->access = fsa_NONE; + break; + case F_RDACC: + args->access = fsa_R; + break; + case F_WRACC: + args->access = fsa_W; + break; + case F_RWACC: + args->access = fsa_RW; + break; + } +} + +/* + * Initialize filehandle according to the version + * of NFS vnode was created on. The version of + * NLM that can be used with given NFS version + * is saved to lm_vers. + */ +static int +nlm_init_fh_by_vp(vnode_t *vp, struct netobj *fh, rpcvers_t *lm_vers) +{ + mntinfo_t *mi = VTOMI(vp); + + /* + * Too bad the NFS code doesn't just carry the FH + * in a netobj or a netbuf. + */ + switch (mi->mi_vers) { + case NFS_V3: + /* See nfs3_frlock() */ + *lm_vers = NLM4_VERS; + fh->n_len = VTOFH3(vp)->fh3_length; + fh->n_bytes = (char *)&(VTOFH3(vp)->fh3_u.data); + break; + + case NFS_VERSION: + /* See nfs_frlock() */ + *lm_vers = NLM_VERS; + fh->n_len = sizeof (fhandle_t); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + fh->n_bytes = (char *)VTOFH(vp); + break; + default: + return (ENOSYS); + } + + return (0); +} + +/* + * Send SIGLOST to the process identified by pid. + * NOTE: called when NLM decides to remove lock + * or share reservation ownder by the process + * by force. + */ +static void +nlm_send_siglost(pid_t pid) +{ + proc_t *p; + + mutex_enter(&pidlock); + p = prfind(pid); + if (p != NULL) + psignal(p, SIGLOST); + + mutex_exit(&pidlock); +} + +static int +nlm_map_clnt_stat(enum clnt_stat stat) +{ + switch (stat) { + case RPC_SUCCESS: + return (0); + + case RPC_TIMEDOUT: + case RPC_PROGUNAVAIL: + return (EAGAIN); + + case RPC_INTR: + return (EINTR); + + default: + return (EINVAL); + } +} + +static int +nlm_map_status(enum nlm4_stats stat) +{ + switch (stat) { + case nlm4_granted: + return (0); + + case nlm4_denied: + return (EAGAIN); + + case nlm4_denied_nolocks: + return (ENOLCK); + + case nlm4_blocked: + return (EAGAIN); + + case nlm4_denied_grace_period: + return (EAGAIN); + + case nlm4_deadlck: + return (EDEADLK); + + case nlm4_rofs: + return (EROFS); + + case nlm4_stale_fh: + return (ESTALE); + + case nlm4_fbig: + return (EFBIG); + + case nlm4_failed: + return (EACCES); + + default: + return (EINVAL); + } +} diff --git a/usr/src/uts/common/klm/nlm_dispatch.c b/usr/src/uts/common/klm/nlm_dispatch.c new file mode 100644 index 0000000000..a0ca2a56c4 --- /dev/null +++ b/usr/src/uts/common/klm/nlm_dispatch.c @@ -0,0 +1,755 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy is of the CDDL is also available via the Internet + * at http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * NFS Lock Manager, server-side dispatch tables and + * dispatch programs: nlm_prog_3, nlm_prog4 + * + * These are called by RPC framework after the RPC service + * endpoints setup done in nlm_impl.c: nlm_svc_add_ep(). + * + * Originally from rpcgen, then reduced. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sdt.h> +#include <rpcsvc/nlm_prot.h> +#include "nlm_impl.h" + +/* + * Dispatch entry function pointers. + */ +typedef bool_t (*nlm_svc_func_t)(void *, void *, struct svc_req *); +typedef void (*nlm_freeres_func_t)(void *); + +/* + * Entries in the dispatch tables below. + */ +struct dispatch_entry { + nlm_svc_func_t de_svc; /* service routine function */ + xdrproc_t de_xargs; /* XDR args decode function */ + xdrproc_t de_xres; /* XDR res encode function */ + nlm_freeres_func_t de_resfree; /* free res function */ + int de_ressz; /* size of result */ + uint_t de_flags; /* flags */ +}; + +/* Flag bits in de_flags */ +#define NLM_DISP_NOREMOTE 1 /* Local calls only */ + +/* + * Cast macros for dispatch table function pointers. + */ +#define NLM_SVC_FUNC(func) (nlm_svc_func_t)func +#define NLM_FREERES_FUNC(func) (nlm_freeres_func_t)func + +/* ARGSUSED */ +static bool_t +nlm_null_svc(void *args, void *resp, struct svc_req *sr) +{ + return (TRUE); +} + +/* + * The common NLM service dispatch function, used by + * both: nlm_prog_3, nlm_prog_4 + */ +void +nlm_dispatch( + struct svc_req *rqstp, + SVCXPRT *transp, + const struct dispatch_entry *de) +{ + union { + /* All the arg types */ + nlm_cancargs au_cancargs; + nlm_lockargs au_lockargs; + nlm_notify au_notify; + nlm_res au_res; + nlm_shareargs au_shareargs; + nlm_sm_status au_sm_status; + nlm_testargs au_testargs; + nlm_testres au_testres; + nlm_unlockargs au_unlockargs; + nlm4_cancargs au_cancargs4; + nlm4_lockargs au_lockargs4; + nlm4_notify au_notify4; + nlm4_res au_res4; + nlm4_shareargs au_shareargs4; + nlm4_testargs au_testargs4; + nlm4_testres au_testres4; + nlm4_unlockargs au_unlockargs4; + } argu; + void *args = &argu; + union { + /* All the ret types */ + int ru_int; + nlm_res ru_res; + nlm_shareres ru_shareres; + nlm_testres ru_testres; + nlm4_res ru_res4; + nlm4_shareres ru_shareres4; + nlm4_testres ru_testres4; + + } resu; + void *res = &resu; + nlm_svc_func_t func; + bool_t do_reply = FALSE; + bool_t dupcached = FALSE; + struct dupreq *dr; + int dupstat; + + if ((func = de->de_svc) == NULL) { + svcerr_noproc(transp); + return; + } + + if ((de->de_flags & NLM_DISP_NOREMOTE) && + !nlm_caller_is_local(transp)) { + svcerr_noproc(transp); + return; + } + + /* + * This section from rpcgen, and then modified slightly. + * + * Dispatch entries that should _never_ send a response + * (i.e. all the _MSG and _RES entries) put NULL in the + * de_xres field to indicate that. For such entries, we + * will NOT call svc_sendreply nor xdr_free(). Normal + * dispatch entries skip svc_sendreply if the dispatch + * function returns zero, but always call xdr_free(). + * + * There are more complex cases where some dispatch + * functions need to send their own reply. We chose + * to indicate those by returning false from the + * service routine. + */ + bzero(&argu, sizeof (argu)); + if (!SVC_GETARGS(transp, de->de_xargs, args)) { + svcerr_decode(transp); + return; + } + + /* + * Duplicate request cache. + * + * Since none of the NLM replies are very large we have simplified the + * DRC by not distinguishing between idempotent and non-idempotent + * requests. + */ + dupstat = SVC_DUP_EXT(transp, rqstp, res, de->de_ressz, &dr, + &dupcached); + + switch (dupstat) { + case DUP_ERROR: + svcerr_systemerr(transp); + break; + case DUP_INPROGRESS: + break; + case DUP_NEW: + case DUP_DROP: + /* + * When UFS is quiescing it uses lockfs to block vnode + * operations until it has finished quiescing. Set the + * thread's T_DONTPEND flag to prevent the service routine + * from blocking due to a lockfs lock. (See ufs_check_lockfs) + */ + curthread->t_flag |= T_DONTPEND; + + bzero(&resu, sizeof (resu)); + do_reply = (*func)(args, res, rqstp); + + curthread->t_flag &= ~T_DONTPEND; + if (curthread->t_flag & T_WOULDBLOCK) { + curthread->t_flag &= ~T_WOULDBLOCK; + SVC_DUPDONE_EXT(transp, dr, res, NULL, + de->de_ressz, DUP_DROP); + do_reply = FALSE; + break; + } + SVC_DUPDONE_EXT(transp, dr, res, de->de_resfree, + de->de_ressz, DUP_DONE); + dupcached = TRUE; + break; + case DUP_DONE: + /* + * The service routine may have been responsible for sending + * the reply for the original request but for a re-xmitted + * request we don't invoke the service routine so we must + * re-xmit the reply from the dispatch function. + * + * If de_xres is NULL this is a one-way message so no reply is + * needed. + */ + if (de->de_xres != NULL_xdrproc_t) { + do_reply = TRUE; + } + break; + } + + if (do_reply) { + ASSERT(de->de_xres != NULL_xdrproc_t); + DTRACE_PROBE3(sendreply, struct svc_req *, rqstp, + SVCXPRT *, transp, struct dispatch_entry *, de); + + if (!svc_sendreply(transp, de->de_xres, res)) { + svcerr_systemerr(transp); + NLM_ERR("nlm_dispatch(): svc_sendreply() failed!\n"); + } + + if (!dupcached) { + xdr_free(de->de_xres, res); + } + } + + if (!SVC_FREEARGS(transp, de->de_xargs, args)) + NLM_WARN("nlm_dispatch(): unable to free arguments"); +} + +/* + * Result free functions. The functions are called by the RPC duplicate + * request cache code when an entry is being evicted from the cache. + */ +static void +nlm_res_free(nlm_res *resp) +{ + xdr_free(xdr_nlm_res, (char *)resp); +} + +static void +nlm_shareres_free(nlm_shareres *resp) +{ + xdr_free(xdr_nlm_shareres, (char *)resp); +} + +static void +nlm_testres_free(nlm_testres *resp) +{ + xdr_free(xdr_nlm_testres, (char *)resp); +} + +static void +nlm4_res_free(nlm4_res *resp) +{ + xdr_free(xdr_nlm4_res, (char *)resp); +} + +static void +nlm4_shareres_free(nlm4_shareres *resp) +{ + xdr_free(xdr_nlm4_shareres, (char *)resp); +} + +static void +nlm4_testres_free(nlm4_testres *resp) +{ + xdr_free(xdr_nlm4_testres, (char *)resp); +} + +/* + * Dispatch tables for each program version. + * + * The tables here were all originally from rpcgen, + * but then arg/resp sizes removed, flags added. + */ + +/* + * Dispatch table for versions 1, 2, 3 + * (NLM_VERS, NLM_SM, NLM_VERSX) + */ +static const struct dispatch_entry +nlm_prog_3_dtable[] = { + + /* + * Version 1 (NLM_VERS) entries. + */ + + { /* 0: NULLPROC */ + NLM_SVC_FUNC(nlm_null_svc), + (xdrproc_t)xdr_void, + (xdrproc_t)xdr_void, + NULL, + 0, + 0 }, + + { /* 1: NLM_TEST */ + NLM_SVC_FUNC(nlm_test_1_svc), + (xdrproc_t)xdr_nlm_testargs, + (xdrproc_t)xdr_nlm_testres, + NLM_FREERES_FUNC(nlm_testres_free), + sizeof (nlm_testres), + 0 }, + + { /* 2: NLM_LOCK */ + NLM_SVC_FUNC(nlm_lock_1_svc), + (xdrproc_t)xdr_nlm_lockargs, + (xdrproc_t)xdr_nlm_res, + NLM_FREERES_FUNC(nlm_res_free), + sizeof (nlm_res), + 0 }, + + { /* 3: NLM_CANCEL */ + NLM_SVC_FUNC(nlm_cancel_1_svc), + (xdrproc_t)xdr_nlm_cancargs, + (xdrproc_t)xdr_nlm_res, + NLM_FREERES_FUNC(nlm_res_free), + sizeof (nlm_res), + 0 }, + + { /* 4: NLM_UNLOCK */ + NLM_SVC_FUNC(nlm_unlock_1_svc), + (xdrproc_t)xdr_nlm_unlockargs, + (xdrproc_t)xdr_nlm_res, + NLM_FREERES_FUNC(nlm_res_free), + sizeof (nlm_res), + 0 }, + + { /* 5: NLM_GRANTED */ + NLM_SVC_FUNC(nlm_granted_1_svc), + (xdrproc_t)xdr_nlm_testargs, + (xdrproc_t)xdr_nlm_res, + NLM_FREERES_FUNC(nlm_res_free), + sizeof (nlm_res), + 0 }, + + /* + * All the _MSG and _RES entries are "one way" calls that + * skip the usual RPC reply. We give them a null xdr_res + * function so the dispatcher will not send a reply. + */ + + { /* 6: NLM_TEST_MSG */ + NLM_SVC_FUNC(nlm_test_msg_1_svc), + (xdrproc_t)xdr_nlm_testargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 7: NLM_LOCK_MSG */ + NLM_SVC_FUNC(nlm_lock_msg_1_svc), + (xdrproc_t)xdr_nlm_lockargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 8: NLM_CANCEL_MSG */ + NLM_SVC_FUNC(nlm_cancel_msg_1_svc), + (xdrproc_t)xdr_nlm_cancargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 9: NLM_UNLOCK_MSG */ + NLM_SVC_FUNC(nlm_unlock_msg_1_svc), + (xdrproc_t)xdr_nlm_unlockargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 10: NLM_GRANTED_MSG */ + NLM_SVC_FUNC(nlm_granted_msg_1_svc), + (xdrproc_t)xdr_nlm_testargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 11: NLM_TEST_RES */ + NLM_SVC_FUNC(nlm_test_res_1_svc), + (xdrproc_t)xdr_nlm_testres, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 12: NLM_LOCK_RES */ + NLM_SVC_FUNC(nlm_lock_res_1_svc), + (xdrproc_t)xdr_nlm_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 13: NLM_CANCEL_RES */ + NLM_SVC_FUNC(nlm_cancel_res_1_svc), + (xdrproc_t)xdr_nlm_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 14: NLM_UNLOCK_RES */ + NLM_SVC_FUNC(nlm_unlock_res_1_svc), + (xdrproc_t)xdr_nlm_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 15: NLM_GRANTED_RES */ + NLM_SVC_FUNC(nlm_granted_res_1_svc), + (xdrproc_t)xdr_nlm_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 16: not used */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 17: NLM_SM_NOTIFY1 */ + NLM_SVC_FUNC(nlm_sm_notify1_2_svc), + (xdrproc_t)xdr_nlm_sm_status, + (xdrproc_t)xdr_void, + NULL, + 0, + NLM_DISP_NOREMOTE }, + + { /* 18: NLM_SM_NOTIFY2 */ + NLM_SVC_FUNC(nlm_sm_notify2_2_svc), + (xdrproc_t)xdr_nlm_sm_status, + (xdrproc_t)xdr_void, + NULL, + 0, + NLM_DISP_NOREMOTE }, + + /* + * Version 3 (NLM_VERSX) entries. + */ + + { /* 19: not used */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 20: NLM_SHARE */ + NLM_SVC_FUNC(nlm_share_3_svc), + (xdrproc_t)xdr_nlm_shareargs, + (xdrproc_t)xdr_nlm_shareres, + NLM_FREERES_FUNC(nlm_shareres_free), + sizeof (nlm_shareres), + 0 }, + + { /* 21: NLM_UNSHARE */ + NLM_SVC_FUNC(nlm_unshare_3_svc), + (xdrproc_t)xdr_nlm_shareargs, + (xdrproc_t)xdr_nlm_shareres, + NLM_FREERES_FUNC(nlm_shareres_free), + sizeof (nlm_shareres), + 0 }, + + { /* 22: NLM_NM_LOCK */ + NLM_SVC_FUNC(nlm_nm_lock_3_svc), + (xdrproc_t)xdr_nlm_lockargs, + (xdrproc_t)xdr_nlm_res, + NLM_FREERES_FUNC(nlm_res_free), + sizeof (nlm_res), + 0 }, + + { /* 23: NLM_FREE_ALL */ + NLM_SVC_FUNC(nlm_free_all_3_svc), + (xdrproc_t)xdr_nlm_notify, + (xdrproc_t)xdr_void, + NULL, + 0, + 0 }, +}; +static int nlm_prog_3_dtsize = + sizeof (nlm_prog_3_dtable) / + sizeof (nlm_prog_3_dtable[0]); + +/* + * RPC dispatch function for nlm_prot versions: 1,2,3 + */ +void +nlm_prog_3(struct svc_req *rqstp, register SVCXPRT *transp) +{ + const struct dispatch_entry *de; + rpcproc_t max_proc; + + switch (rqstp->rq_vers) { + case NLM_VERS: + max_proc = NLM_GRANTED_RES; + break; + case NLM_SM: + max_proc = NLM_SM_NOTIFY2; + break; + case NLM_VERSX: + max_proc = NLM_FREE_ALL; + break; + default: + /* Our svc registration should prevent this. */ + ASSERT(0); /* paranoid */ + svcerr_noprog(transp); + return; + } + ASSERT(max_proc < nlm_prog_3_dtsize); + + if (rqstp->rq_proc > max_proc) { + svcerr_noproc(transp); + return; + } + + de = &nlm_prog_3_dtable[rqstp->rq_proc]; + + nlm_dispatch(rqstp, transp, de); +} + +/* + * Dispatch table for version 4 (NLM4_VERS) + */ +static const struct dispatch_entry +nlm_prog_4_dtable[] = { + + { /* 0: NULLPROC */ + NLM_SVC_FUNC(nlm_null_svc), + (xdrproc_t)xdr_void, + (xdrproc_t)xdr_void, + NULL, + 0, + 0 }, + + { /* 1: NLM4_TEST */ + NLM_SVC_FUNC(nlm4_test_4_svc), + (xdrproc_t)xdr_nlm4_testargs, + (xdrproc_t)xdr_nlm4_testres, + NLM_FREERES_FUNC(nlm4_testres_free), + sizeof (nlm4_testres), + 0 }, + + { /* 2: NLM4_LOCK */ + NLM_SVC_FUNC(nlm4_lock_4_svc), + (xdrproc_t)xdr_nlm4_lockargs, + (xdrproc_t)xdr_nlm4_res, + NLM_FREERES_FUNC(nlm4_res_free), + sizeof (nlm4_res), + 0 }, + + { /* 3: NLM4_CANCEL */ + NLM_SVC_FUNC(nlm4_cancel_4_svc), + (xdrproc_t)xdr_nlm4_cancargs, + (xdrproc_t)xdr_nlm4_res, + NLM_FREERES_FUNC(nlm4_res_free), + sizeof (nlm4_res), + 0 }, + + { /* 4: NLM4_UNLOCK */ + NLM_SVC_FUNC(nlm4_unlock_4_svc), + (xdrproc_t)xdr_nlm4_unlockargs, + (xdrproc_t)xdr_nlm4_res, + NLM_FREERES_FUNC(nlm4_res_free), + sizeof (nlm4_res), + 0 }, + + { /* 5: NLM4_GRANTED */ + NLM_SVC_FUNC(nlm4_granted_4_svc), + (xdrproc_t)xdr_nlm4_testargs, + (xdrproc_t)xdr_nlm4_res, + NLM_FREERES_FUNC(nlm4_res_free), + sizeof (nlm4_res), + 0 }, + + /* + * All the _MSG and _RES entries are "one way" calls that + * skip the usual RPC reply. We give them a null xdr_res + * function so the dispatcher will not send a reply. + */ + + { /* 6: NLM4_TEST_MSG */ + NLM_SVC_FUNC(nlm4_test_msg_4_svc), + (xdrproc_t)xdr_nlm4_testargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 7: NLM4_LOCK_MSG */ + NLM_SVC_FUNC(nlm4_lock_msg_4_svc), + (xdrproc_t)xdr_nlm4_lockargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 8: NLM4_CANCEL_MSG */ + NLM_SVC_FUNC(nlm4_cancel_msg_4_svc), + (xdrproc_t)xdr_nlm4_cancargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 9: NLM4_UNLOCK_MSG */ + NLM_SVC_FUNC(nlm4_unlock_msg_4_svc), + (xdrproc_t)xdr_nlm4_unlockargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 10: NLM4_GRANTED_MSG */ + NLM_SVC_FUNC(nlm4_granted_msg_4_svc), + (xdrproc_t)xdr_nlm4_testargs, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 11: NLM4_TEST_RES */ + NLM_SVC_FUNC(nlm4_test_res_4_svc), + (xdrproc_t)xdr_nlm4_testres, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 12: NLM4_LOCK_RES */ + NLM_SVC_FUNC(nlm4_lock_res_4_svc), + (xdrproc_t)xdr_nlm4_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 13: NLM4_CANCEL_RES */ + NLM_SVC_FUNC(nlm4_cancel_res_4_svc), + (xdrproc_t)xdr_nlm4_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 14: NLM4_UNLOCK_RES */ + NLM_SVC_FUNC(nlm4_unlock_res_4_svc), + (xdrproc_t)xdr_nlm4_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 15: NLM4_GRANTED_RES */ + NLM_SVC_FUNC(nlm4_granted_res_4_svc), + (xdrproc_t)xdr_nlm4_res, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 16: not used */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 17: NLM_SM_NOTIFY1 (not in v4) */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 18: NLM_SM_NOTIFY2 (not in v4) */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 19: not used */ + NLM_SVC_FUNC(0), + (xdrproc_t)0, + (xdrproc_t)0, + NULL, + 0, + 0 }, + + { /* 20: NLM4_SHARE */ + NLM_SVC_FUNC(nlm4_share_4_svc), + (xdrproc_t)xdr_nlm4_shareargs, + (xdrproc_t)xdr_nlm4_shareres, + NLM_FREERES_FUNC(nlm4_shareres_free), + sizeof (nlm4_shareres), + 0 }, + + { /* 21: NLM4_UNSHARE */ + NLM_SVC_FUNC(nlm4_unshare_4_svc), + (xdrproc_t)xdr_nlm4_shareargs, + (xdrproc_t)xdr_nlm4_shareres, + NLM_FREERES_FUNC(nlm4_shareres_free), + sizeof (nlm4_shareres), + 0 }, + + { /* 22: NLM4_NM_LOCK */ + NLM_SVC_FUNC(nlm4_nm_lock_4_svc), + (xdrproc_t)xdr_nlm4_lockargs, + (xdrproc_t)xdr_nlm4_res, + NLM_FREERES_FUNC(nlm4_res_free), + sizeof (nlm4_res), + 0 }, + + { /* 23: NLM4_FREE_ALL */ + NLM_SVC_FUNC(nlm4_free_all_4_svc), + (xdrproc_t)xdr_nlm4_notify, + (xdrproc_t)xdr_void, + NULL, + 0, + 0 }, +}; +static int nlm_prog_4_dtsize = + sizeof (nlm_prog_4_dtable) / + sizeof (nlm_prog_4_dtable[0]); + +/* + * RPC dispatch function for nlm_prot version 4. + */ +void +nlm_prog_4(struct svc_req *rqstp, register SVCXPRT *transp) +{ + const struct dispatch_entry *de; + + if (rqstp->rq_vers != NLM4_VERS) { + /* Our svc registration should prevent this. */ + ASSERT(0); /* paranoid */ + svcerr_noprog(transp); + return; + } + + if (rqstp->rq_proc >= nlm_prog_4_dtsize) { + svcerr_noproc(transp); + return; + } + + de = &nlm_prog_4_dtable[rqstp->rq_proc]; + + nlm_dispatch(rqstp, transp, de); +} diff --git a/usr/src/uts/common/klm/nlm_impl.c b/usr/src/uts/common/klm/nlm_impl.c new file mode 100644 index 0000000000..16051875aa --- /dev/null +++ b/usr/src/uts/common/klm/nlm_impl.c @@ -0,0 +1,2740 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * NFS LockManager, start/stop, support functions, etc. + * Most of the interesting code is here. + * + * Source code derived from FreeBSD nlm_prot_impl.c + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/thread.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/mount.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/share.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/class.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/queue.h> +#include <sys/bitmap.h> +#include <sys/sdt.h> +#include <netinet/in.h> + +#include <rpc/rpc.h> +#include <rpc/xdr.h> +#include <rpc/pmap_prot.h> +#include <rpc/pmap_clnt.h> +#include <rpc/rpcb_prot.h> + +#include <rpcsvc/nlm_prot.h> +#include <rpcsvc/sm_inter.h> +#include <rpcsvc/nsm_addr.h> + +#include <nfs/nfs.h> +#include <nfs/nfs_clnt.h> +#include <nfs/export.h> +#include <nfs/rnode.h> +#include <nfs/lm.h> + +#include "nlm_impl.h" + +struct nlm_knc { + struct knetconfig n_knc; + const char *n_netid; +}; + +/* + * Number of attempts NLM tries to obtain RPC binding + * of local statd. + */ +#define NLM_NSM_RPCBIND_RETRIES 10 + +/* + * Timeout (in seconds) NLM waits before making another + * attempt to obtain RPC binding of local statd. + */ +#define NLM_NSM_RPCBIND_TIMEOUT 5 + +/* + * Total number of sysids in NLM sysid bitmap + */ +#define NLM_BMAP_NITEMS (LM_SYSID_MAX + 1) + +/* + * Number of ulong_t words in bitmap that is used + * for allocation of sysid numbers. + */ +#define NLM_BMAP_WORDS (NLM_BMAP_NITEMS / BT_NBIPUL) + +/* + * Given an integer x, the macro returns + * -1 if x is negative, + * 0 if x is zero + * 1 if x is positive + */ +#define SIGN(x) (((x) > 0) - ((x) < 0)) + +#define ARRSIZE(arr) (sizeof (arr) / sizeof ((arr)[0])) +#define NLM_KNCS ARRSIZE(nlm_netconfigs) + +krwlock_t lm_lck; + +/* + * Zero timeout for asynchronous NLM RPC operations + */ +static const struct timeval nlm_rpctv_zero = { 0, 0 }; + +/* + * List of all Zone globals nlm_globals instences + * linked together. + */ +static struct nlm_globals_list nlm_zones_list; /* (g) */ + +/* + * NLM kmem caches + */ +static struct kmem_cache *nlm_hosts_cache = NULL; +static struct kmem_cache *nlm_vhold_cache = NULL; + +/* + * A bitmap for allocation of new sysids. + * Sysid is a unique number between LM_SYSID + * and LM_SYSID_MAX. Sysid represents unique remote + * host that does file locks on the given host. + */ +static ulong_t nlm_sysid_bmap[NLM_BMAP_WORDS]; /* (g) */ +static int nlm_sysid_nidx; /* (g) */ + +/* + * RPC service registration for all transports + */ +static SVC_CALLOUT nlm_svcs[] = { + { NLM_PROG, 4, 4, nlm_prog_4 }, /* NLM4_VERS */ + { NLM_PROG, 1, 3, nlm_prog_3 } /* NLM_VERS - NLM_VERSX */ +}; + +static SVC_CALLOUT_TABLE nlm_sct = { + ARRSIZE(nlm_svcs), + FALSE, + nlm_svcs +}; + +/* + * Static table of all netid/knetconfig network + * lock manager can work with. nlm_netconfigs table + * is used when we need to get valid knetconfig by + * netid and vice versa. + * + * Knetconfigs are activated either by the call from + * user-space lockd daemon (server side) or by taking + * knetconfig from NFS mountinfo (client side) + */ +static struct nlm_knc nlm_netconfigs[] = { /* (g) */ + /* UDP */ + { + { NC_TPI_CLTS, NC_INET, NC_UDP, NODEV }, + "udp", + }, + /* TCP */ + { + { NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV }, + "tcp", + }, + /* UDP over IPv6 */ + { + { NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV }, + "udp6", + }, + /* TCP over IPv6 */ + { + { NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV }, + "tcp6", + }, + /* ticlts (loopback over UDP) */ + { + { NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV }, + "ticlts", + }, + /* ticotsord (loopback over TCP) */ + { + { NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV }, + "ticotsord", + }, +}; + +/* + * NLM misc. function + */ +static void nlm_copy_netbuf(struct netbuf *, struct netbuf *); +static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *); +static void nlm_kmem_reclaim(void *); +static void nlm_pool_shutdown(void); +static void nlm_suspend_zone(struct nlm_globals *); +static void nlm_resume_zone(struct nlm_globals *); +static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *); +static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *); + +/* + * NLM thread functions + */ +static void nlm_gc(struct nlm_globals *); +static void nlm_reclaimer(struct nlm_host *); + +/* + * NLM NSM functions + */ +static int nlm_init_local_knc(struct knetconfig *); +static int nlm_nsm_init_local(struct nlm_nsm *); +static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *); +static void nlm_nsm_fini(struct nlm_nsm *); +static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *); +static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *); +static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t); +static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *); + +/* + * NLM host functions + */ +static int nlm_host_ctor(void *, void *, int); +static void nlm_host_dtor(void *, void *); +static void nlm_host_destroy(struct nlm_host *); +static struct nlm_host *nlm_host_create(char *, const char *, + struct knetconfig *, struct netbuf *); +static struct nlm_host *nlm_host_find_locked(struct nlm_globals *, + const char *, struct netbuf *, avl_index_t *); +static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *); +static void nlm_host_gc_vholds(struct nlm_host *); +static bool_t nlm_host_has_srv_locks(struct nlm_host *); +static bool_t nlm_host_has_cli_locks(struct nlm_host *); +static bool_t nlm_host_has_locks(struct nlm_host *); + +/* + * NLM vhold functions + */ +static int nlm_vhold_ctor(void *, void *, int); +static void nlm_vhold_dtor(void *, void *); +static void nlm_vhold_destroy(struct nlm_host *, + struct nlm_vhold *); +static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *); +static void nlm_vhold_clean(struct nlm_vhold *, int); + +/* + * NLM client/server sleeping locks/share reservation functions + */ +struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *, + struct nlm_vhold *, struct flock64 *); +static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *); +static void nlm_shres_destroy_item(struct nlm_shres *); +static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *); + +/* + * NLM initialization functions. + */ +void +nlm_init(void) +{ + nlm_hosts_cache = kmem_cache_create("nlm_host_cache", + sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor, + nlm_kmem_reclaim, NULL, NULL, 0); + + nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache", + sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor, + NULL, NULL, NULL, 0); + + nlm_rpc_init(); + TAILQ_INIT(&nlm_zones_list); + + /* initialize sysids bitmap */ + bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap)); + nlm_sysid_nidx = 1; + + /* + * Reserv the sysid #0, because it's associated + * with local locks only. Don't let to allocate + * it for remote locks. + */ + BT_SET(nlm_sysid_bmap, 0); +} + +void +nlm_globals_register(struct nlm_globals *g) +{ + rw_enter(&lm_lck, RW_WRITER); + TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link); + rw_exit(&lm_lck); +} + +void +nlm_globals_unregister(struct nlm_globals *g) +{ + rw_enter(&lm_lck, RW_WRITER); + TAILQ_REMOVE(&nlm_zones_list, g, nlm_link); + rw_exit(&lm_lck); +} + +/* ARGSUSED */ +static void +nlm_kmem_reclaim(void *cdrarg) +{ + struct nlm_globals *g; + + rw_enter(&lm_lck, RW_READER); + TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) + cv_broadcast(&g->nlm_gc_sched_cv); + + rw_exit(&lm_lck); +} + +/* + * NLM garbage collector thread (GC). + * + * NLM GC periodically checks whether there're any host objects + * that can be cleaned up. It also releases stale vnodes that + * live on the server side (under protection of vhold objects). + * + * NLM host objects are cleaned up from GC thread because + * operations helping us to determine whether given host has + * any locks can be quite expensive and it's not good to call + * them every time the very last reference to the host is dropped. + * Thus we use "lazy" approach for hosts cleanup. + * + * The work of GC is to release stale vnodes on the server side + * and destroy hosts that haven't any locks and any activity for + * some time (i.e. idle hosts). + */ +static void +nlm_gc(struct nlm_globals *g) +{ + struct nlm_host *hostp; + clock_t now, idle_period; + + idle_period = SEC_TO_TICK(g->cn_idle_tmo); + mutex_enter(&g->lock); + for (;;) { + /* + * GC thread can be explicitly scheduled from + * memory reclamation function. + */ + (void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock, + ddi_get_lbolt() + idle_period); + + /* + * NLM is shutting down, time to die. + */ + if (g->run_status == NLM_ST_STOPPING) + break; + + now = ddi_get_lbolt(); + DTRACE_PROBE2(gc__start, struct nlm_globals *, g, + clock_t, now); + + /* + * Handle all hosts that are unused at the moment + * until we meet one with idle timeout in future. + */ + while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) { + bool_t has_locks = FALSE; + + if (hostp->nh_idle_timeout > now) + break; + + /* + * Drop global lock while doing expensive work + * on this host. We'll re-check any conditions + * that might change after retaking the global + * lock. + */ + mutex_exit(&g->lock); + mutex_enter(&hostp->nh_lock); + + /* + * nlm_globals lock was dropped earlier because + * garbage collecting of vholds and checking whether + * host has any locks/shares are expensive operations. + */ + nlm_host_gc_vholds(hostp); + has_locks = nlm_host_has_locks(hostp); + + mutex_exit(&hostp->nh_lock); + mutex_enter(&g->lock); + + /* + * While we were doing expensive operations outside of + * nlm_globals critical section, somebody could + * take the host, add lock/share to one of its vnodes + * and release the host back. If so, host's idle timeout + * is renewed and our information about locks on the + * given host is outdated. + */ + if (hostp->nh_idle_timeout > now) + continue; + + /* + * If either host has locks or somebody has began to + * use it while we were outside the nlm_globals critical + * section. In both cases we have to renew host's + * timeout and put it to the end of LRU list. + */ + if (has_locks || hostp->nh_refs > 0) { + TAILQ_REMOVE(&g->nlm_idle_hosts, + hostp, nh_link); + hostp->nh_idle_timeout = now + idle_period; + TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, + hostp, nh_link); + continue; + } + + /* + * We're here if all the following conditions hold: + * 1) Host hasn't any locks or share reservations + * 2) Host is unused + * 3) Host wasn't touched by anyone at least for + * g->cn_idle_tmo seconds. + * + * So, now we can destroy it. + */ + nlm_host_unregister(g, hostp); + mutex_exit(&g->lock); + + nlm_host_unmonitor(g, hostp); + nlm_host_destroy(hostp); + mutex_enter(&g->lock); + if (g->run_status == NLM_ST_STOPPING) + break; + + } + + DTRACE_PROBE(gc__end); + } + + DTRACE_PROBE1(gc__exit, struct nlm_globals *, g); + + /* Let others know that GC has died */ + g->nlm_gc_thread = NULL; + mutex_exit(&g->lock); + + cv_broadcast(&g->nlm_gc_finish_cv); + zthread_exit(); +} + +/* + * Thread reclaim locks/shares acquired by the client side + * on the given server represented by hostp. + */ +static void +nlm_reclaimer(struct nlm_host *hostp) +{ + struct nlm_globals *g; + + mutex_enter(&hostp->nh_lock); + hostp->nh_reclaimer = curthread; + mutex_exit(&hostp->nh_lock); + + g = zone_getspecific(nlm_zone_key, curzone); + nlm_reclaim_client(g, hostp); + + mutex_enter(&hostp->nh_lock); + hostp->nh_flags &= ~NLM_NH_RECLAIM; + hostp->nh_reclaimer = NULL; + cv_broadcast(&hostp->nh_recl_cv); + mutex_exit(&hostp->nh_lock); + + /* + * Host was explicitly referenced before + * nlm_reclaim() was called, release it + * here. + */ + nlm_host_release(g, hostp); + zthread_exit(); +} + +/* + * Copy a struct netobj. (see xdr.h) + */ +void +nlm_copy_netobj(struct netobj *dst, struct netobj *src) +{ + dst->n_len = src->n_len; + dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP); + bcopy(src->n_bytes, dst->n_bytes, src->n_len); +} + +/* + * An NLM specificw replacement for clnt_call(). + * nlm_clnt_call() is used by all RPC functions generated + * from nlm_prot.x specification. The function is aware + * about some pitfalls of NLM RPC procedures and has a logic + * that handles them properly. + */ +enum clnt_stat +nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args, + caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait) +{ + k_sigset_t oldmask; + enum clnt_stat stat; + bool_t sig_blocked = FALSE; + + /* + * If NLM RPC procnum is one of the NLM _RES procedures + * that are used to reply to asynchronous NLM RPC + * (MSG calls), explicitly set RPC timeout to zero. + * Client doesn't send a reply to RES procedures, so + * we don't need to wait anything. + * + * NOTE: we ignore NLM4_*_RES procnums because they are + * equal to NLM_*_RES numbers. + */ + if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES) + wait = nlm_rpctv_zero; + + /* + * We need to block signals in case of NLM_CANCEL RPC + * in order to prevent interruption of network RPC + * calls. + */ + if (procnum == NLM_CANCEL) { + k_sigset_t newmask; + + sigfillset(&newmask); + sigreplace(&newmask, &oldmask); + sig_blocked = TRUE; + } + + stat = clnt_call(clnt, procnum, xdr_args, + argsp, xdr_result, resultp, wait); + + /* + * Restore signal mask back if signals were blocked + */ + if (sig_blocked) + sigreplace(&oldmask, (k_sigset_t *)NULL); + + return (stat); +} + +/* + * Suspend NLM client/server in the given zone. + * + * During suspend operation we mark those hosts + * that have any locks with NLM_NH_SUSPEND flags, + * so that they can be checked later, when resume + * operation occurs. + */ +static void +nlm_suspend_zone(struct nlm_globals *g) +{ + struct nlm_host *hostp; + struct nlm_host_list all_hosts; + + /* + * Note that while we're doing suspend, GC thread is active + * and it can destroy some hosts while we're walking through + * the hosts tree. To prevent that and make suspend logic + * a bit more simple we put all hosts to local "all_hosts" + * list and increment reference counter of each host. + * This guaranties that no hosts will be released while + * we're doing suspend. + * NOTE: reference of each host must be dropped during + * resume operation. + */ + TAILQ_INIT(&all_hosts); + mutex_enter(&g->lock); + for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL; + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) { + /* + * If host is idle, remove it from idle list and + * clear idle flag. That is done to prevent GC + * from touching this host. + */ + if (hostp->nh_flags & NLM_NH_INIDLE) { + TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); + hostp->nh_flags &= ~NLM_NH_INIDLE; + } + + hostp->nh_refs++; + TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link); + } + + /* + * Now we can walk through all hosts on the system + * with zone globals lock released. The fact the + * we have taken a reference to each host guaranties + * that no hosts can be destroyed during that process. + */ + mutex_exit(&g->lock); + while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) { + mutex_enter(&hostp->nh_lock); + if (nlm_host_has_locks(hostp)) + hostp->nh_flags |= NLM_NH_SUSPEND; + + mutex_exit(&hostp->nh_lock); + TAILQ_REMOVE(&all_hosts, hostp, nh_link); + } +} + +/* + * Resume NLM hosts for the given zone. + * + * nlm_resume_zone() is called after hosts were suspended + * (see nlm_suspend_zone) and its main purpose to check + * whether remote locks owned by hosts are still in consistent + * state. If they aren't, resume function tries to reclaim + * reclaim locks (for client side hosts) and clean locks (for + * server side hosts). + */ +static void +nlm_resume_zone(struct nlm_globals *g) +{ + struct nlm_host *hostp, *h_next; + + mutex_enter(&g->lock); + hostp = avl_first(&g->nlm_hosts_tree); + + /* + * In nlm_suspend_zone() the reference counter of each + * host was incremented, so we can safely iterate through + * all hosts without worrying that any host we touch will + * be removed at the moment. + */ + while (hostp != NULL) { + struct nlm_nsm nsm; + enum clnt_stat stat; + int32_t sm_state; + int error; + bool_t resume_failed = FALSE; + + h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp); + mutex_exit(&g->lock); + + DTRACE_PROBE1(resume__host, struct nlm_host *, hostp); + + /* + * Suspend operation marked that the host doesn't + * have any locks. Skip it. + */ + if (!(hostp->nh_flags & NLM_NH_SUSPEND)) + goto cycle_end; + + error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr); + if (error != 0) { + NLM_ERR("Resume: Failed to contact to NSM of host %s " + "[error=%d]\n", hostp->nh_name, error); + resume_failed = TRUE; + goto cycle_end; + } + + stat = nlm_nsm_stat(&nsm, &sm_state); + if (stat != RPC_SUCCESS) { + NLM_ERR("Resume: Failed to call SM_STAT operation for " + "host %s [stat=%d]\n", hostp->nh_name, stat); + resume_failed = TRUE; + nlm_nsm_fini(&nsm); + goto cycle_end; + } + + if (sm_state != hostp->nh_state) { + /* + * Current SM state of the host isn't equal + * to the one host had when it was suspended. + * Probably it was rebooted. Try to reclaim + * locks if the host has any on its client side. + * Also try to clean up its server side locks + * (if the host has any). + */ + nlm_host_notify_client(hostp, sm_state); + nlm_host_notify_server(hostp, sm_state); + } + + nlm_nsm_fini(&nsm); + +cycle_end: + if (resume_failed) { + /* + * Resume failed for the given host. + * Just clean up all resources it owns. + */ + nlm_host_notify_server(hostp, 0); + nlm_client_cancel_all(g, hostp); + } + + hostp->nh_flags &= ~NLM_NH_SUSPEND; + nlm_host_release(g, hostp); + hostp = h_next; + mutex_enter(&g->lock); + } + + mutex_exit(&g->lock); +} + +/* + * NLM functions responsible for operations on NSM handle. + */ + +/* + * Initialize knetconfig that is used for communication + * with local statd via loopback interface. + */ +static int +nlm_init_local_knc(struct knetconfig *knc) +{ + int error; + vnode_t *vp; + + bzero(knc, sizeof (*knc)); + error = lookupname("/dev/tcp", UIO_SYSSPACE, + FOLLOW, NULLVPP, &vp); + if (error != 0) + return (error); + + knc->knc_semantics = NC_TPI_COTS; + knc->knc_protofmly = NC_INET; + knc->knc_proto = NC_TCP; + knc->knc_rdev = vp->v_rdev; + VN_RELE(vp); + + + return (0); +} + +/* + * Initialize NSM handle that will be used to talk + * to local statd via loopback interface. + */ +static int +nlm_nsm_init_local(struct nlm_nsm *nsm) +{ + int error; + struct knetconfig knc; + struct sockaddr_in sin; + struct netbuf nb; + + error = nlm_init_local_knc(&knc); + if (error != 0) + return (error); + + bzero(&sin, sizeof (sin)); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sin.sin_family = AF_INET; + + nb.buf = (char *)&sin; + nb.len = nb.maxlen = sizeof (sin); + + return (nlm_nsm_init(nsm, &knc, &nb)); +} + +/* + * Initialize NSM handle used for talking to statd + */ +static int +nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb) +{ + enum clnt_stat stat; + int error, retries; + + bzero(nsm, sizeof (*nsm)); + nsm->ns_knc = *knc; + nlm_copy_netbuf(&nsm->ns_addr, nb); + + /* + * Try several times to get the port of statd service, + * If rpcbind_getaddr returns RPC_PROGNOTREGISTERED, + * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT + * seconds berofore. + */ + for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) { + stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG, + SM_VERS, &nsm->ns_addr); + if (stat != RPC_SUCCESS) { + if (stat == RPC_PROGNOTREGISTERED) { + delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT)); + continue; + } + } + + break; + } + + if (stat != RPC_SUCCESS) { + DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat, + int, retries); + error = ENOENT; + goto error; + } + + /* + * Create an RPC handle that'll be used for communication with local + * statd using the status monitor protocol. + */ + error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS, + 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle); + if (error != 0) + goto error; + + /* + * Create an RPC handle that'll be used for communication with the + * local statd using the address registration protocol. + */ + error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM, + NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle); + if (error != 0) + goto error; + + sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL); + return (0); + +error: + kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); + if (nsm->ns_handle) + CLNT_DESTROY(nsm->ns_handle); + + return (error); +} + +static void +nlm_nsm_fini(struct nlm_nsm *nsm) +{ + kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen); + CLNT_DESTROY(nsm->ns_addr_handle); + nsm->ns_addr_handle = NULL; + CLNT_DESTROY(nsm->ns_handle); + nsm->ns_handle = NULL; + sema_destroy(&nsm->ns_sem); +} + +static enum clnt_stat +nlm_nsm_simu_crash(struct nlm_nsm *nsm) +{ + enum clnt_stat stat; + + sema_p(&nsm->ns_sem); + nlm_nsm_clnt_init(nsm->ns_handle, nsm); + stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle); + sema_v(&nsm->ns_sem); + + return (stat); +} + +static enum clnt_stat +nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat) +{ + struct sm_name args; + struct sm_stat_res res; + enum clnt_stat stat; + + args.mon_name = uts_nodename(); + bzero(&res, sizeof (res)); + + sema_p(&nsm->ns_sem); + nlm_nsm_clnt_init(nsm->ns_handle, nsm); + stat = sm_stat_1(&args, &res, nsm->ns_handle); + sema_v(&nsm->ns_sem); + + if (stat == RPC_SUCCESS) + *out_stat = res.state; + + return (stat); +} + +static enum clnt_stat +nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv) +{ + struct mon args; + struct sm_stat_res res; + enum clnt_stat stat; + + bzero(&args, sizeof (args)); + bzero(&res, sizeof (res)); + + args.mon_id.mon_name = hostname; + args.mon_id.my_id.my_name = uts_nodename(); + args.mon_id.my_id.my_prog = NLM_PROG; + args.mon_id.my_id.my_vers = NLM_SM; + args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1; + bcopy(&priv, args.priv, sizeof (priv)); + + sema_p(&nsm->ns_sem); + nlm_nsm_clnt_init(nsm->ns_handle, nsm); + stat = sm_mon_1(&args, &res, nsm->ns_handle); + sema_v(&nsm->ns_sem); + + return (stat); +} + +static enum clnt_stat +nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname) +{ + struct mon_id args; + struct sm_stat res; + enum clnt_stat stat; + + bzero(&args, sizeof (args)); + bzero(&res, sizeof (res)); + + args.mon_name = hostname; + args.my_id.my_name = uts_nodename(); + args.my_id.my_prog = NLM_PROG; + args.my_id.my_vers = NLM_SM; + args.my_id.my_proc = NLM_SM_NOTIFY1; + + sema_p(&nsm->ns_sem); + nlm_nsm_clnt_init(nsm->ns_handle, nsm); + stat = sm_unmon_1(&args, &res, nsm->ns_handle); + sema_v(&nsm->ns_sem); + + return (stat); +} + +static enum clnt_stat +nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address) +{ + struct reg1args args = { 0 }; + struct reg1res res = { 0 }; + enum clnt_stat stat; + + args.family = family; + args.name = name; + args.address = *address; + + sema_p(&nsm->ns_sem); + nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm); + stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle); + sema_v(&nsm->ns_sem); + + return (stat); +} + +/* + * Get NLM vhold object corresponding to vnode "vp". + * If no such object was found, create a new one. + * + * The purpose of this function is to associate vhold + * object with given vnode, so that: + * 1) vnode is hold (VN_HOLD) while vhold object is alive. + * 2) host has a track of all vnodes it touched by lock + * or share operations. These vnodes are accessible + * via collection of vhold objects. + */ +struct nlm_vhold * +nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp) +{ + struct nlm_vhold *nvp, *new_nvp = NULL; + + mutex_enter(&hostp->nh_lock); + nvp = nlm_vhold_find_locked(hostp, vp); + if (nvp != NULL) + goto out; + + /* nlm_vhold wasn't found, then create a new one */ + mutex_exit(&hostp->nh_lock); + new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP); + + /* + * Check if another thread has already + * created the same nlm_vhold. + */ + mutex_enter(&hostp->nh_lock); + nvp = nlm_vhold_find_locked(hostp, vp); + if (nvp == NULL) { + nvp = new_nvp; + new_nvp = NULL; + + TAILQ_INIT(&nvp->nv_slreqs); + nvp->nv_vp = vp; + nvp->nv_refcnt = 1; + VN_HOLD(nvp->nv_vp); + + VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp, + (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0); + TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link); + } + +out: + mutex_exit(&hostp->nh_lock); + if (new_nvp != NULL) + kmem_cache_free(nlm_vhold_cache, new_nvp); + + return (nvp); +} + +/* + * Drop a reference to vhold object nvp. + */ +void +nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp) +{ + if (nvp == NULL) + return; + + mutex_enter(&hostp->nh_lock); + ASSERT(nvp->nv_refcnt > 0); + nvp->nv_refcnt--; + mutex_exit(&hostp->nh_lock); +} + +/* + * Clean all locks and share reservations on the + * given vhold object that were acquired by the + * given sysid + */ +static void +nlm_vhold_clean(struct nlm_vhold *nvp, int sysid) +{ + cleanlocks(nvp->nv_vp, IGN_PID, sysid); + cleanshares_by_sysid(nvp->nv_vp, sysid); +} + +static void +nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp) +{ + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + + VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp, + (mod_hash_key_t)nvp->nv_vp, + (mod_hash_val_t)&nvp) == 0); + + TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link); + VN_RELE(nvp->nv_vp); + nvp->nv_vp = NULL; + + kmem_cache_free(nlm_vhold_cache, nvp); +} + +/* + * Return TRUE if the given vhold is busy. + * Vhold object is considered to be "busy" when + * all the following conditions hold: + * 1) No one uses it at the moment; + * 2) It hasn't any locks; + * 3) It hasn't any share reservations; + */ +static bool_t +nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp) +{ + vnode_t *vp; + int sysid; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + + if (nvp->nv_refcnt > 0) + return (TRUE); + + vp = nvp->nv_vp; + sysid = hostp->nh_sysid; + if (flk_has_remote_locks_for_sysid(vp, sysid) || + shr_has_remote_shares(vp, sysid)) + return (TRUE); + + return (FALSE); +} + +/* ARGSUSED */ +static int +nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags) +{ + struct nlm_vhold *nvp = (struct nlm_vhold *)datap; + + bzero(nvp, sizeof (*nvp)); + return (0); +} + +/* ARGSUSED */ +static void +nlm_vhold_dtor(void *datap, void *cdrarg) +{ + struct nlm_vhold *nvp = (struct nlm_vhold *)datap; + + ASSERT(nvp->nv_refcnt == 0); + ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs)); + ASSERT(nvp->nv_vp == NULL); +} + +struct nlm_vhold * +nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp) +{ + struct nlm_vhold *nvp = NULL; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + (void) mod_hash_find(hostp->nh_vholds_by_vp, + (mod_hash_key_t)vp, + (mod_hash_val_t)&nvp); + + if (nvp != NULL) + nvp->nv_refcnt++; + + return (nvp); +} + +/* + * NLM host functions + */ +static void +nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src) +{ + ASSERT(src->len <= src->maxlen); + + dst->maxlen = src->maxlen; + dst->len = src->len; + dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP); + bcopy(src->buf, dst->buf, src->len); +} + +/* ARGSUSED */ +static int +nlm_host_ctor(void *datap, void *cdrarg, int kmflags) +{ + struct nlm_host *hostp = (struct nlm_host *)datap; + + bzero(hostp, sizeof (*hostp)); + return (0); +} + +/* ARGSUSED */ +static void +nlm_host_dtor(void *datap, void *cdrarg) +{ + struct nlm_host *hostp = (struct nlm_host *)datap; + ASSERT(hostp->nh_refs == 0); +} + +static void +nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp) +{ + ASSERT(hostp->nh_refs == 0); + + avl_remove(&g->nlm_hosts_tree, hostp); + VERIFY(mod_hash_remove(g->nlm_hosts_hash, + (mod_hash_key_t)(uintptr_t)hostp->nh_sysid, + (mod_hash_val_t)&hostp) == 0); + TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); + hostp->nh_flags &= ~NLM_NH_INIDLE; +} + +/* + * Free resources used by a host. This is called after the reference + * count has reached zero so it doesn't need to worry about locks. + */ +static void +nlm_host_destroy(struct nlm_host *hostp) +{ + ASSERT(hostp->nh_name != NULL); + ASSERT(hostp->nh_netid != NULL); + ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list)); + + strfree(hostp->nh_name); + strfree(hostp->nh_netid); + kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen); + + if (hostp->nh_sysid != LM_NOSYSID) + nlm_sysid_free(hostp->nh_sysid); + + nlm_rpc_cache_destroy(hostp); + + ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list)); + mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp); + + mutex_destroy(&hostp->nh_lock); + cv_destroy(&hostp->nh_rpcb_cv); + cv_destroy(&hostp->nh_recl_cv); + + kmem_cache_free(nlm_hosts_cache, hostp); +} + +/* + * Cleanup SERVER-side state after a client restarts, + * or becomes unresponsive, or whatever. + * + * We unlock any active locks owned by the host. + * When rpc.lockd is shutting down, + * this function is called with newstate set to zero + * which allows us to cancel any pending async locks + * and clear the locking state. + * + * When "state" is 0, we don't update host's state, + * but cleanup all remote locks on the host. + * It's useful to call this function for resources + * cleanup. + */ +void +nlm_host_notify_server(struct nlm_host *hostp, int32_t state) +{ + struct nlm_vhold *nvp; + struct nlm_slreq *slr; + struct nlm_slreq_list slreqs2free; + + TAILQ_INIT(&slreqs2free); + mutex_enter(&hostp->nh_lock); + if (state != 0) + hostp->nh_state = state; + + TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) { + + /* cleanup sleeping requests at first */ + while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) { + TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link); + + /* + * Instead of freeing cancelled sleeping request + * here, we add it to the linked list created + * on the stack in order to do all frees outside + * the critical section. + */ + TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link); + } + + nvp->nv_refcnt++; + mutex_exit(&hostp->nh_lock); + + nlm_vhold_clean(nvp, hostp->nh_sysid); + + mutex_enter(&hostp->nh_lock); + nvp->nv_refcnt--; + } + + mutex_exit(&hostp->nh_lock); + while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) { + TAILQ_REMOVE(&slreqs2free, slr, nsr_link); + kmem_free(slr, sizeof (*slr)); + } +} + +/* + * Cleanup CLIENT-side state after a server restarts, + * or becomes unresponsive, or whatever. + * + * This is called by the local NFS statd when we receive a + * host state change notification. (also nlm_svc_stopping) + * + * Deal with a server restart. If we are stopping the + * NLM service, we'll have newstate == 0, and will just + * cancel all our client-side lock requests. Otherwise, + * start the "recovery" process to reclaim any locks + * we hold on this server. + */ +void +nlm_host_notify_client(struct nlm_host *hostp, int32_t state) +{ + mutex_enter(&hostp->nh_lock); + hostp->nh_state = state; + if (hostp->nh_flags & NLM_NH_RECLAIM) { + /* + * Either host's state is up to date or + * host is already in recovery. + */ + mutex_exit(&hostp->nh_lock); + return; + } + + hostp->nh_flags |= NLM_NH_RECLAIM; + + /* + * Host will be released by the recovery thread, + * thus we need to increment refcount. + */ + hostp->nh_refs++; + mutex_exit(&hostp->nh_lock); + + (void) zthread_create(NULL, 0, nlm_reclaimer, + hostp, 0, minclsyspri); +} + +/* + * The function is called when NLM client detects that + * server has entered in grace period and client needs + * to wait until reclamation process (if any) does + * its job. + */ +int +nlm_host_wait_grace(struct nlm_host *hostp) +{ + struct nlm_globals *g; + int error = 0; + + g = zone_getspecific(nlm_zone_key, curzone); + mutex_enter(&hostp->nh_lock); + + do { + int rc; + + rc = cv_timedwait_sig(&hostp->nh_recl_cv, + &hostp->nh_lock, ddi_get_lbolt() + + SEC_TO_TICK(g->retrans_tmo)); + + if (rc == 0) { + error = EINTR; + break; + } + } while (hostp->nh_flags & NLM_NH_RECLAIM); + + mutex_exit(&hostp->nh_lock); + return (error); +} + +/* + * Create a new NLM host. + * + * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI, + * which needs both a knetconfig and an address when creating + * endpoints. Thus host object stores both knetconfig and + * netid. + */ +static struct nlm_host * +nlm_host_create(char *name, const char *netid, + struct knetconfig *knc, struct netbuf *naddr) +{ + struct nlm_host *host; + + host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP); + + mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL); + cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL); + + host->nh_sysid = LM_NOSYSID; + host->nh_refs = 1; + host->nh_name = strdup(name); + host->nh_netid = strdup(netid); + host->nh_knc = *knc; + nlm_copy_netbuf(&host->nh_addr, naddr); + + host->nh_state = 0; + host->nh_rpcb_state = NRPCB_NEED_UPDATE; + host->nh_flags = 0; + + host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash", + 32, mod_hash_null_valdtor, sizeof (vnode_t)); + + TAILQ_INIT(&host->nh_vholds_list); + TAILQ_INIT(&host->nh_rpchc); + + return (host); +} + +/* + * Cancel all client side sleeping locks owned by given host. + */ +void +nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp) +{ + struct nlm_slock *nslp; + + mutex_enter(&g->lock); + TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { + if (nslp->nsl_host == hostp) { + nslp->nsl_state = NLM_SL_CANCELLED; + cv_broadcast(&nslp->nsl_cond); + } + } + + mutex_exit(&g->lock); +} + +/* + * Garbage collect stale vhold objects. + * + * In other words check whether vnodes that are + * held by vhold objects still have any locks + * or shares or still in use. If they aren't, + * just destroy them. + */ +static void +nlm_host_gc_vholds(struct nlm_host *hostp) +{ + struct nlm_vhold *nvp; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + + nvp = TAILQ_FIRST(&hostp->nh_vholds_list); + while (nvp != NULL) { + struct nlm_vhold *nvp_tmp; + + if (nlm_vhold_busy(hostp, nvp)) { + nvp = TAILQ_NEXT(nvp, nv_link); + continue; + } + + nvp_tmp = TAILQ_NEXT(nvp, nv_link); + nlm_vhold_destroy(hostp, nvp); + nvp = nvp_tmp; + } +} + +/* + * Check whether the given host has any + * server side locks or share reservations. + */ +static bool_t +nlm_host_has_srv_locks(struct nlm_host *hostp) +{ + /* + * It's cheap and simple: if server has + * any locks/shares there must be vhold + * object storing the affected vnode. + * + * NOTE: We don't need to check sleeping + * locks on the server side, because if + * server side sleeping lock is alive, + * there must be a vhold object corresponding + * to target vnode. + */ + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + if (!TAILQ_EMPTY(&hostp->nh_vholds_list)) + return (TRUE); + + return (FALSE); +} + +/* + * Check whether the given host has any client side + * locks or share reservations. + */ +static bool_t +nlm_host_has_cli_locks(struct nlm_host *hostp) +{ + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + + /* + * XXX: It's not the way I'd like to do the check, + * because flk_sysid_has_locks() can be very + * expensive by design. Unfortunatelly it iterates + * through all locks on the system, doesn't matter + * were they made on remote system via NLM or + * on local system via reclock. To understand the + * problem, consider that there're dozens of thousands + * of locks that are made on some ZFS dataset. And there's + * another dataset shared by NFS where NLM client had locks + * some time ago, but doesn't have them now. + * In this case flk_sysid_has_locks() will iterate + * thrught dozens of thousands locks until it returns us + * FALSE. + * Oh, I hope that in shiny future somebody will make + * local lock manager (os/flock.c) better, so that + * it'd be more friedly to remote locks and + * flk_sysid_has_locks() wouldn't be so expensive. + */ + if (flk_sysid_has_locks(hostp->nh_sysid | + LM_SYSID_CLIENT, FLK_QUERY_ACTIVE)) + return (TRUE); + + /* + * Check whether host has any share reservations + * registered on the client side. + */ + if (hostp->nh_shrlist != NULL) + return (TRUE); + + return (FALSE); +} + +/* + * Determine whether the given host owns any + * locks or share reservations. + */ +static bool_t +nlm_host_has_locks(struct nlm_host *hostp) +{ + if (nlm_host_has_srv_locks(hostp)) + return (TRUE); + + return (nlm_host_has_cli_locks(hostp)); +} + +/* + * This function compares only addresses of two netbufs + * that belong to NC_TCP[6] or NC_UDP[6] protofamily. + * Port part of netbuf is ignored. + * + * Return values: + * -1: nb1's address is "smaller" than nb2's + * 0: addresses are equal + * 1: nb1's address is "greater" than nb2's + */ +static int +nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2) +{ + union nlm_addr { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } *na1, *na2; + int res; + + /* LINTED E_BAD_PTR_CAST_ALIGN */ + na1 = (union nlm_addr *)nb1->buf; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + na2 = (union nlm_addr *)nb2->buf; + + if (na1->sa.sa_family < na2->sa.sa_family) + return (-1); + if (na1->sa.sa_family > na2->sa.sa_family) + return (1); + + switch (na1->sa.sa_family) { + case AF_INET: + res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr, + sizeof (na1->sin.sin_addr)); + break; + case AF_INET6: + res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr, + sizeof (na1->sin6.sin6_addr)); + break; + default: + VERIFY(0); + return (0); + } + + return (SIGN(res)); +} + +/* + * Compare two nlm hosts. + * Return values: + * -1: host1 is "smaller" than host2 + * 0: host1 is equal to host2 + * 1: host1 is "greater" than host2 + */ +int +nlm_host_cmp(const void *p1, const void *p2) +{ + struct nlm_host *h1 = (struct nlm_host *)p1; + struct nlm_host *h2 = (struct nlm_host *)p2; + int res; + + res = strcmp(h1->nh_netid, h2->nh_netid); + if (res != 0) + return (SIGN(res)); + + res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr); + return (res); +} + +/* + * Find the host specified by... (see below) + * If found, increment the ref count. + */ +static struct nlm_host * +nlm_host_find_locked(struct nlm_globals *g, const char *netid, + struct netbuf *naddr, avl_index_t *wherep) +{ + struct nlm_host *hostp, key; + avl_index_t pos; + + ASSERT(MUTEX_HELD(&g->lock)); + + key.nh_netid = (char *)netid; + key.nh_addr.buf = naddr->buf; + key.nh_addr.len = naddr->len; + key.nh_addr.maxlen = naddr->maxlen; + + hostp = avl_find(&g->nlm_hosts_tree, &key, &pos); + + if (hostp != NULL) { + /* + * Host is inuse now. Remove it from idle + * hosts list if needed. + */ + if (hostp->nh_flags & NLM_NH_INIDLE) { + TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); + hostp->nh_flags &= ~NLM_NH_INIDLE; + } + + hostp->nh_refs++; + } + if (wherep != NULL) + *wherep = pos; + + return (hostp); +} + +/* + * Find NLM host for the given name and address. + */ +struct nlm_host * +nlm_host_find(struct nlm_globals *g, const char *netid, + struct netbuf *addr) +{ + struct nlm_host *hostp = NULL; + + mutex_enter(&g->lock); + if (g->run_status != NLM_ST_UP) + goto out; + + hostp = nlm_host_find_locked(g, netid, addr, NULL); + +out: + mutex_exit(&g->lock); + return (hostp); +} + + +/* + * Find or create an NLM host for the given name and address. + * + * The remote host is determined by all of: name, netidd, address. + * Note that the netid is whatever nlm_svc_add_ep() gave to + * svc_tli_kcreate() for the service binding. If any of these + * are different, allocate a new host (new sysid). + */ +struct nlm_host * +nlm_host_findcreate(struct nlm_globals *g, char *name, + const char *netid, struct netbuf *addr) +{ + int err; + struct nlm_host *host, *newhost = NULL; + struct knetconfig knc; + avl_index_t where; + + mutex_enter(&g->lock); + if (g->run_status != NLM_ST_UP) { + mutex_exit(&g->lock); + return (NULL); + } + + host = nlm_host_find_locked(g, netid, addr, NULL); + mutex_exit(&g->lock); + if (host != NULL) + return (host); + + err = nlm_knc_from_netid(netid, &knc); + if (err != 0) + return (NULL); + /* + * Do allocations (etc.) outside of mutex, + * and then check again before inserting. + */ + newhost = nlm_host_create(name, netid, &knc, addr); + newhost->nh_sysid = nlm_sysid_alloc(); + if (newhost->nh_sysid == LM_NOSYSID) + goto out; + + mutex_enter(&g->lock); + host = nlm_host_find_locked(g, netid, addr, &where); + if (host == NULL) { + host = newhost; + newhost = NULL; + + /* + * Insert host to the hosts AVL tree that is + * used to lookup by <netid, address> pair. + */ + avl_insert(&g->nlm_hosts_tree, host, where); + + /* + * Insert host ot the hosts hash table that is + * used to lookup host by sysid. + */ + VERIFY(mod_hash_insert(g->nlm_hosts_hash, + (mod_hash_key_t)(uintptr_t)host->nh_sysid, + (mod_hash_val_t)host) == 0); + } + + mutex_exit(&g->lock); + +out: + if (newhost != NULL) + nlm_host_destroy(newhost); + + return (host); +} + +/* + * Find the NLM host that matches the value of 'sysid'. + * If found, return it with a new ref, + * else return NULL. + */ +struct nlm_host * +nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid) +{ + struct nlm_host *hostp = NULL; + + mutex_enter(&g->lock); + if (g->run_status != NLM_ST_UP) + goto out; + + (void) mod_hash_find(g->nlm_hosts_hash, + (mod_hash_key_t)(uintptr_t)sysid, + (mod_hash_val_t)&hostp); + + if (hostp == NULL) + goto out; + + /* + * Host is inuse now. Remove it + * from idle hosts list if needed. + */ + if (hostp->nh_flags & NLM_NH_INIDLE) { + TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link); + hostp->nh_flags &= ~NLM_NH_INIDLE; + } + + hostp->nh_refs++; + +out: + mutex_exit(&g->lock); + return (hostp); +} + +/* + * Release the given host. + * I.e. drop a reference that was taken earlier by one of + * the following functions: nlm_host_findcreate(), nlm_host_find(), + * nlm_host_find_by_sysid(). + * + * When the very last reference is dropped, host is moved to + * so-called "idle state". All hosts that are in idle state + * have an idle timeout. If timeout is expired, GC thread + * checks whether hosts have any locks and if they heven't + * any, it removes them. + * NOTE: only unused hosts can be in idle state. + */ +void +nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp) +{ + if (hostp == NULL) + return; + + mutex_enter(&g->lock); + ASSERT(hostp->nh_refs > 0); + + hostp->nh_refs--; + if (hostp->nh_refs != 0) { + mutex_exit(&g->lock); + return; + } + + /* + * The very last reference to the host was dropped, + * thus host is unused now. Set its idle timeout + * and move it to the idle hosts LRU list. + */ + hostp->nh_idle_timeout = ddi_get_lbolt() + + SEC_TO_TICK(g->cn_idle_tmo); + + ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0); + TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link); + hostp->nh_flags |= NLM_NH_INIDLE; + mutex_exit(&g->lock); +} + +/* + * Unregister this NLM host (NFS client) with the local statd + * due to idleness (no locks held for a while). + */ +void +nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host) +{ + enum clnt_stat stat; + + VERIFY(host->nh_refs == 0); + if (!(host->nh_flags & NLM_NH_MONITORED)) + return; + + host->nh_flags &= ~NLM_NH_MONITORED; + stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name); + if (stat != RPC_SUCCESS) { + NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat); + return; + } +} + +/* + * Ask the local NFS statd to begin monitoring this host. + * It will call us back when that host restarts, using the + * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1, + * which is handled in nlm_do_notify1(). + */ +void +nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state) +{ + int family; + netobj obj; + enum clnt_stat stat; + + if (state != 0 && host->nh_state == 0) { + /* + * This is the first time we have seen an NSM state + * Value for this host. We record it here to help + * detect host reboots. + */ + host->nh_state = state; + } + + mutex_enter(&host->nh_lock); + if (host->nh_flags & NLM_NH_MONITORED) { + mutex_exit(&host->nh_lock); + return; + } + + host->nh_flags |= NLM_NH_MONITORED; + mutex_exit(&host->nh_lock); + + /* + * Before we begin monitoring the host register the network address + * associated with this hostname. + */ + nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj); + stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj); + if (stat != RPC_SUCCESS) { + NLM_WARN("Failed to register address, stat=%d\n", stat); + mutex_enter(&g->lock); + host->nh_flags &= ~NLM_NH_MONITORED; + mutex_exit(&g->lock); + + return; + } + + /* + * Tell statd how to call us with status updates for + * this host. Updates arrive via nlm_do_notify1(). + * + * We put our assigned system ID value in the priv field to + * make it simpler to find the host if we are notified of a + * host restart. + */ + stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid); + if (stat != RPC_SUCCESS) { + NLM_WARN("Failed to contact local NSM, stat=%d\n", stat); + mutex_enter(&g->lock); + host->nh_flags &= ~NLM_NH_MONITORED; + mutex_exit(&g->lock); + + return; + } +} + +int +nlm_host_get_state(struct nlm_host *hostp) +{ + + return (hostp->nh_state); +} + +/* + * NLM client/server sleeping locks + */ + +/* + * Register client side sleeping lock. + * + * Our client code calls this to keep information + * about sleeping lock somewhere. When it receives + * grant callback from server or when it just + * needs to remove all sleeping locks from vnode, + * it uses this information for remove/apply lock + * properly. + */ +struct nlm_slock * +nlm_slock_register( + struct nlm_globals *g, + struct nlm_host *host, + struct nlm4_lock *lock, + struct vnode *vp) +{ + struct nlm_slock *nslp; + + nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP); + cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL); + nslp->nsl_lock = *lock; + nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh); + nslp->nsl_state = NLM_SL_BLOCKED; + nslp->nsl_host = host; + nslp->nsl_vp = vp; + + mutex_enter(&g->lock); + TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link); + mutex_exit(&g->lock); + + return (nslp); +} + +/* + * Remove this lock from the wait list and destroy it. + */ +void +nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp) +{ + mutex_enter(&g->lock); + TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link); + mutex_exit(&g->lock); + + kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len); + cv_destroy(&nslp->nsl_cond); + kmem_free(nslp, sizeof (*nslp)); +} + +/* + * Wait for a granted callback or cancellation event + * for a sleeping lock. + * + * If a signal interrupted the wait or if the lock + * was cancelled, return EINTR - the caller must arrange to send + * a cancellation to the server. + * + * If timeout occurred, return ETIMEDOUT - the caller must + * resend the lock request to the server. + * + * On success return 0. + */ +int +nlm_slock_wait(struct nlm_globals *g, + struct nlm_slock *nslp, uint_t timeo_secs) +{ + clock_t timeo_ticks; + int cv_res, error; + + /* + * If the granted message arrived before we got here, + * nw->nw_state will be GRANTED - in that case, don't sleep. + */ + cv_res = 1; + timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs); + + mutex_enter(&g->lock); + if (nslp->nsl_state == NLM_SL_BLOCKED) { + cv_res = cv_timedwait_sig(&nslp->nsl_cond, + &g->lock, timeo_ticks); + } + + /* + * No matter why we wake up, if the lock was + * cancelled, let the function caller to know + * about it by returning EINTR. + */ + if (nslp->nsl_state == NLM_SL_CANCELLED) { + error = EINTR; + goto out; + } + + if (cv_res <= 0) { + /* We was woken up either by timeout or interrupt */ + error = (cv_res < 0) ? ETIMEDOUT : EINTR; + + /* + * The granted message may arrive after the + * interrupt/timeout but before we manage to lock the + * mutex. Detect this by examining nslp. + */ + if (nslp->nsl_state == NLM_SL_GRANTED) + error = 0; + } else { /* awaken via cv_signal or didn't block */ + error = 0; + VERIFY(nslp->nsl_state == NLM_SL_GRANTED); + } + +out: + mutex_exit(&g->lock); + return (error); +} + +/* + * Mark client side sleeping lock as granted + * and wake up a process blocked on the lock. + * Called from server side NLM_GRANT handler. + * + * If sleeping lock is found return 0, otherwise + * return ENOENT. + */ +int +nlm_slock_grant(struct nlm_globals *g, + struct nlm_host *hostp, struct nlm4_lock *alock) +{ + struct nlm_slock *nslp; + int error = ENOENT; + + mutex_enter(&g->lock); + TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) { + if ((nslp->nsl_state != NLM_SL_BLOCKED) || + (nslp->nsl_host != hostp)) + continue; + + if (alock->svid == nslp->nsl_lock.svid && + alock->l_offset == nslp->nsl_lock.l_offset && + alock->l_len == nslp->nsl_lock.l_len && + alock->fh.n_len == nslp->nsl_lock.fh.n_len && + bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes, + nslp->nsl_lock.fh.n_len) == 0) { + nslp->nsl_state = NLM_SL_GRANTED; + cv_broadcast(&nslp->nsl_cond); + error = 0; + break; + } + } + + mutex_exit(&g->lock); + return (error); +} + +/* + * Register sleeping lock request corresponding to + * flp on the given vhold object. + * On success function returns 0, otherwise (if + * lock request with the same flp is already + * registered) function returns EEXIST. + */ +int +nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp, + struct flock64 *flp) +{ + struct nlm_slreq *slr, *new_slr = NULL; + int ret = EEXIST; + + mutex_enter(&hostp->nh_lock); + slr = nlm_slreq_find_locked(hostp, nvp, flp); + if (slr != NULL) + goto out; + + mutex_exit(&hostp->nh_lock); + new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP); + bcopy(flp, &new_slr->nsr_fl, sizeof (*flp)); + + mutex_enter(&hostp->nh_lock); + slr = nlm_slreq_find_locked(hostp, nvp, flp); + if (slr == NULL) { + slr = new_slr; + new_slr = NULL; + ret = 0; + + TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link); + } + +out: + mutex_exit(&hostp->nh_lock); + if (new_slr != NULL) + kmem_free(new_slr, sizeof (*new_slr)); + + return (ret); +} + +/* + * Unregister sleeping lock request corresponding + * to flp from the given vhold object. + * On success function returns 0, otherwise (if + * lock request corresponding to flp isn't found + * on the given vhold) function returns ENOENT. + */ +int +nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp, + struct flock64 *flp) +{ + struct nlm_slreq *slr; + + mutex_enter(&hostp->nh_lock); + slr = nlm_slreq_find_locked(hostp, nvp, flp); + if (slr == NULL) { + mutex_exit(&hostp->nh_lock); + return (ENOENT); + } + + TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link); + mutex_exit(&hostp->nh_lock); + + kmem_free(slr, sizeof (*slr)); + return (0); +} + +/* + * Find sleeping lock request on the given vhold object by flp. + */ +struct nlm_slreq * +nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp, + struct flock64 *flp) +{ + struct nlm_slreq *slr = NULL; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) { + if (slr->nsr_fl.l_start == flp->l_start && + slr->nsr_fl.l_len == flp->l_len && + slr->nsr_fl.l_pid == flp->l_pid && + slr->nsr_fl.l_type == flp->l_type) + break; + } + + return (slr); +} + +/* + * NLM tracks active share reservations made on the client side. + * It needs to have a track of share reservations for two purposes + * 1) to determine if nlm_host is busy (if it has active locks and/or + * share reservations, it is) + * 2) to recover active share reservations when NLM server reports + * that it has rebooted. + * + * Unfortunately Illumos local share reservations manager (see os/share.c) + * doesn't have an ability to lookup all reservations on the system + * by sysid (like local lock manager) or get all reservations by sysid. + * It tracks reservations per vnode and is able to get/looup them + * on particular vnode. It's not what NLM needs. Thus it has that ugly + * share reservations tracking scheme. + */ + +void +nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp) +{ + struct nlm_shres *nsp, *nsp_new; + + /* + * NFS code must fill the s_owner, so that + * s_own_len is never 0. + */ + ASSERT(shrp->s_own_len > 0); + nsp_new = nlm_shres_create_item(shrp, vp); + + mutex_enter(&hostp->nh_lock); + for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) + if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) + break; + + if (nsp != NULL) { + /* + * Found a duplicate. Do nothing. + */ + + goto out; + } + + nsp = nsp_new; + nsp_new = NULL; + nsp->ns_next = hostp->nh_shrlist; + hostp->nh_shrlist = nsp; + +out: + mutex_exit(&hostp->nh_lock); + if (nsp_new != NULL) + nlm_shres_destroy_item(nsp_new); +} + +void +nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp) +{ + struct nlm_shres *nsp, *nsp_prev = NULL; + + mutex_enter(&hostp->nh_lock); + nsp = hostp->nh_shrlist; + while (nsp != NULL) { + if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) { + struct nlm_shres *nsp_del; + + nsp_del = nsp; + nsp = nsp->ns_next; + if (nsp_prev != NULL) + nsp_prev->ns_next = nsp; + else + hostp->nh_shrlist = nsp; + + nlm_shres_destroy_item(nsp_del); + continue; + } + + nsp_prev = nsp; + nsp = nsp->ns_next; + } + + mutex_exit(&hostp->nh_lock); +} + +/* + * Get a _copy_ of the list of all active share reservations + * made by the given host. + * NOTE: the list function returns _must_ be released using + * nlm_free_shrlist(). + */ +struct nlm_shres * +nlm_get_active_shres(struct nlm_host *hostp) +{ + struct nlm_shres *nsp, *nslist = NULL; + + mutex_enter(&hostp->nh_lock); + for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) { + struct nlm_shres *nsp_new; + + nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp); + nsp_new->ns_next = nslist; + nslist = nsp_new; + } + + mutex_exit(&hostp->nh_lock); + return (nslist); +} + +/* + * Free memory allocated for the active share reservations + * list created by nlm_get_active_shres() function. + */ +void +nlm_free_shrlist(struct nlm_shres *nslist) +{ + struct nlm_shres *nsp; + + while (nslist != NULL) { + nsp = nslist; + nslist = nslist->ns_next; + + nlm_shres_destroy_item(nsp); + } +} + +static bool_t +nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2) +{ + if (shrp1->s_sysid == shrp2->s_sysid && + shrp1->s_pid == shrp2->s_pid && + shrp1->s_own_len == shrp2->s_own_len && + bcmp(shrp1->s_owner, shrp2->s_owner, + shrp1->s_own_len) == 0) + return (TRUE); + + return (FALSE); +} + +static struct nlm_shres * +nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp) +{ + struct nlm_shres *nsp; + + nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP); + nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP); + bcopy(shrp, nsp->ns_shr, sizeof (*shrp)); + nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP); + bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len); + nsp->ns_vp = vp; + + return (nsp); +} + +static void +nlm_shres_destroy_item(struct nlm_shres *nsp) +{ + kmem_free(nsp->ns_shr->s_owner, + nsp->ns_shr->s_own_len); + kmem_free(nsp->ns_shr, sizeof (struct shrlock)); + kmem_free(nsp, sizeof (*nsp)); +} + +/* + * Called by klmmod.c when lockd adds a network endpoint + * on which we should begin RPC services. + */ +int +nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc) +{ + SVCMASTERXPRT *xprt = NULL; + int error; + + error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt, + &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE); + if (error != 0) + return (error); + + (void) nlm_knc_to_netid(knc); + return (0); +} + +/* + * Start NLM service. + */ +int +nlm_svc_starting(struct nlm_globals *g, struct file *fp, + const char *netid, struct knetconfig *knc) +{ + int error; + enum clnt_stat stat; + + VERIFY(g->run_status == NLM_ST_STARTING); + VERIFY(g->nlm_gc_thread == NULL); + + error = nlm_nsm_init_local(&g->nlm_nsm); + if (error != 0) { + NLM_ERR("Failed to initialize NSM handler " + "(error=%d)\n", error); + g->run_status = NLM_ST_DOWN; + return (error); + } + + error = EIO; + + /* + * Create an NLM garbage collector thread that will + * clean up stale vholds and hosts objects. + */ + g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc, + g, 0, minclsyspri); + + /* + * Send SIMU_CRASH to local statd to report that + * NLM started, so that statd can report other hosts + * about NLM state change. + */ + + stat = nlm_nsm_simu_crash(&g->nlm_nsm); + if (stat != RPC_SUCCESS) { + NLM_ERR("Failed to connect to local statd " + "(rpcerr=%d)\n", stat); + goto shutdown_lm; + } + + stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state); + if (stat != RPC_SUCCESS) { + NLM_ERR("Failed to get the status of local statd " + "(rpcerr=%d)\n", stat); + goto shutdown_lm; + } + + g->grace_threshold = ddi_get_lbolt() + + SEC_TO_TICK(g->grace_period); + + /* Register endpoint used for communications with local NLM */ + error = nlm_svc_add_ep(fp, netid, knc); + if (error != 0) + goto shutdown_lm; + + (void) svc_pool_control(NLM_SVCPOOL_ID, + SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown); + g->run_status = NLM_ST_UP; + return (0); + +shutdown_lm: + mutex_enter(&g->lock); + g->run_status = NLM_ST_STOPPING; + mutex_exit(&g->lock); + + nlm_svc_stopping(g); + return (error); +} + +/* + * Called when the server pool is destroyed, so that + * all transports are closed and no any server threads + * exist. + * + * Just call lm_shutdown() to shut NLM down properly. + */ +static void +nlm_pool_shutdown(void) +{ + (void) lm_shutdown(); +} + +/* + * Stop NLM service, cleanup all resources + * NLM owns at the moment. + * + * NOTE: NFS code can call NLM while it's + * stopping or even if it's shut down. Any attempt + * to lock file either on client or on the server + * will fail if NLM isn't in NLM_ST_UP state. + */ +void +nlm_svc_stopping(struct nlm_globals *g) +{ + mutex_enter(&g->lock); + ASSERT(g->run_status == NLM_ST_STOPPING); + + /* + * Ask NLM GC thread to exit and wait until it dies. + */ + cv_signal(&g->nlm_gc_sched_cv); + while (g->nlm_gc_thread != NULL) + cv_wait(&g->nlm_gc_finish_cv, &g->lock); + + mutex_exit(&g->lock); + + /* + * Cleanup locks owned by NLM hosts. + * NOTE: New hosts won't be created while + * NLM is stopping. + */ + while (!avl_is_empty(&g->nlm_hosts_tree)) { + struct nlm_host *hostp; + int busy_hosts = 0; + + /* + * Iterate through all NLM hosts in the system + * and drop the locks they own by force. + */ + hostp = avl_first(&g->nlm_hosts_tree); + while (hostp != NULL) { + /* Cleanup all client and server side locks */ + nlm_client_cancel_all(g, hostp); + nlm_host_notify_server(hostp, 0); + + mutex_enter(&hostp->nh_lock); + nlm_host_gc_vholds(hostp); + if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) { + /* + * Oh, it seems the host is still busy, let + * it some time to release and go to the + * next one. + */ + + mutex_exit(&hostp->nh_lock); + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); + busy_hosts++; + continue; + } + + mutex_exit(&hostp->nh_lock); + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); + } + + /* + * All hosts go to nlm_idle_hosts list after + * all locks they own are cleaned up and last refereces + * were dropped. Just destroy all hosts in nlm_idle_hosts + * list, they can not be removed from there while we're + * in stopping state. + */ + while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) { + nlm_host_unregister(g, hostp); + nlm_host_destroy(hostp); + } + + if (busy_hosts > 0) { + /* + * There're some hosts that weren't cleaned + * up. Probably they're in resource cleanup + * process. Give them some time to do drop + * references. + */ + delay(MSEC_TO_TICK(500)); + } + } + + ASSERT(TAILQ_EMPTY(&g->nlm_slocks)); + + nlm_nsm_fini(&g->nlm_nsm); + g->lockd_pid = 0; + g->run_status = NLM_ST_DOWN; +} + +/* + * Returns TRUE if the given vnode has + * any active or sleeping locks. + */ +int +nlm_vp_active(const vnode_t *vp) +{ + struct nlm_globals *g; + struct nlm_host *hostp; + struct nlm_vhold *nvp; + int active = 0; + + g = zone_getspecific(nlm_zone_key, curzone); + + /* + * Server side NLM has locks on the given vnode + * if there exist a vhold object that holds + * the given vnode "vp" in one of NLM hosts. + */ + mutex_enter(&g->lock); + hostp = avl_first(&g->nlm_hosts_tree); + while (hostp != NULL) { + mutex_enter(&hostp->nh_lock); + nvp = nlm_vhold_find_locked(hostp, vp); + mutex_exit(&hostp->nh_lock); + if (nvp != NULL) { + active = 1; + break; + } + + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); + } + + mutex_exit(&g->lock); + return (active); +} + +/* + * Called right before NFS export is going to + * dissapear. The function finds all vnodes + * belonging to the given export and cleans + * all remote locks and share reservations + * on them. + */ +void +nlm_unexport(struct exportinfo *exi) +{ + struct nlm_globals *g; + struct nlm_host *hostp; + + g = zone_getspecific(nlm_zone_key, curzone); + + mutex_enter(&g->lock); + hostp = avl_first(&g->nlm_hosts_tree); + while (hostp != NULL) { + struct nlm_vhold *nvp; + + mutex_enter(&hostp->nh_lock); + TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) { + vnode_t *vp; + + nvp->nv_refcnt++; + mutex_exit(&hostp->nh_lock); + + vp = nvp->nv_vp; + + if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid)) + goto next_iter; + + /* + * Ok, it we found out that vnode vp is under + * control by the exportinfo exi, now we need + * to drop all locks from this vnode, let's + * do it. + */ + nlm_vhold_clean(nvp, hostp->nh_sysid); + + next_iter: + mutex_enter(&hostp->nh_lock); + nvp->nv_refcnt--; + } + + mutex_exit(&hostp->nh_lock); + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp); + } + + mutex_exit(&g->lock); +} + +/* + * Allocate new unique sysid. + * In case of failure (no available sysids) + * return LM_NOSYSID. + */ +sysid_t +nlm_sysid_alloc(void) +{ + sysid_t ret_sysid = LM_NOSYSID; + + rw_enter(&lm_lck, RW_WRITER); + if (nlm_sysid_nidx > LM_SYSID_MAX) + nlm_sysid_nidx = LM_SYSID; + + if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) { + BT_SET(nlm_sysid_bmap, nlm_sysid_nidx); + ret_sysid = nlm_sysid_nidx++; + } else { + index_t id; + + id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS); + if (id > 0) { + nlm_sysid_nidx = id + 1; + ret_sysid = id; + BT_SET(nlm_sysid_bmap, id); + } + } + + rw_exit(&lm_lck); + return (ret_sysid); +} + +void +nlm_sysid_free(sysid_t sysid) +{ + ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX); + + rw_enter(&lm_lck, RW_WRITER); + ASSERT(BT_TEST(nlm_sysid_bmap, sysid)); + BT_CLEAR(nlm_sysid_bmap, sysid); + rw_exit(&lm_lck); +} + +/* + * Return true if the request came from a local caller. + * By necessity, this "knows" the netid names invented + * in lm_svc() and nlm_netid_from_knetconfig(). + */ +bool_t +nlm_caller_is_local(SVCXPRT *transp) +{ + char *netid; + struct netbuf *rtaddr; + + netid = svc_getnetid(transp); + rtaddr = svc_getrpccaller(transp); + + if (netid == NULL) + return (FALSE); + + if (strcmp(netid, "ticlts") == 0 || + strcmp(netid, "ticotsord") == 0) + return (TRUE); + + if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) { + struct sockaddr_in *sin = (void *)rtaddr->buf; + if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) + return (TRUE); + } + if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) { + struct sockaddr_in6 *sin6 = (void *)rtaddr->buf; + if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) + return (TRUE); + } + + return (FALSE); /* unknown transport */ +} + +/* + * Get netid string correspondig to the given knetconfig. + * If not done already, save knc->knc_rdev in our table. + */ +const char * +nlm_knc_to_netid(struct knetconfig *knc) +{ + int i; + dev_t rdev; + struct nlm_knc *nc; + const char *netid = NULL; + + rw_enter(&lm_lck, RW_READER); + for (i = 0; i < NLM_KNCS; i++) { + nc = &nlm_netconfigs[i]; + + if (nc->n_knc.knc_semantics == knc->knc_semantics && + strcmp(nc->n_knc.knc_protofmly, + knc->knc_protofmly) == 0) { + netid = nc->n_netid; + rdev = nc->n_knc.knc_rdev; + break; + } + } + rw_exit(&lm_lck); + + if (netid != NULL && rdev == NODEV) { + rw_enter(&lm_lck, RW_WRITER); + if (nc->n_knc.knc_rdev == NODEV) + nc->n_knc.knc_rdev = knc->knc_rdev; + rw_exit(&lm_lck); + } + + return (netid); +} + +/* + * Get a knetconfig corresponding to the given netid. + * If there's no knetconfig for this netid, ENOENT + * is returned. + */ +int +nlm_knc_from_netid(const char *netid, struct knetconfig *knc) +{ + int i, ret; + + ret = ENOENT; + for (i = 0; i < NLM_KNCS; i++) { + struct nlm_knc *nknc; + + nknc = &nlm_netconfigs[i]; + if (strcmp(netid, nknc->n_netid) == 0 && + nknc->n_knc.knc_rdev != NODEV) { + *knc = nknc->n_knc; + ret = 0; + break; + } + } + + return (ret); +} + +void +nlm_cprsuspend(void) +{ + struct nlm_globals *g; + + rw_enter(&lm_lck, RW_READER); + TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) + nlm_suspend_zone(g); + + rw_exit(&lm_lck); +} + +void +nlm_cprresume(void) +{ + struct nlm_globals *g; + + rw_enter(&lm_lck, RW_READER); + TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) + nlm_resume_zone(g); + + rw_exit(&lm_lck); +} + +static void +nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm) +{ + (void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0, + NLM_RPC_RETRIES, kcred); +} + +static void +nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj) +{ + /* LINTED pointer alignment */ + struct sockaddr *sa = (struct sockaddr *)addr->buf; + + *family = sa->sa_family; + + switch (sa->sa_family) { + case AF_INET: { + /* LINTED pointer alignment */ + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + obj->n_len = sizeof (sin->sin_addr); + obj->n_bytes = (char *)&sin->sin_addr; + break; + } + + case AF_INET6: { + /* LINTED pointer alignment */ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + obj->n_len = sizeof (sin6->sin6_addr); + obj->n_bytes = (char *)&sin6->sin6_addr; + break; + } + + default: + VERIFY(0); + break; + } +} diff --git a/usr/src/uts/common/klm/nlm_impl.h b/usr/src/uts/common/klm/nlm_impl.h new file mode 100644 index 0000000000..6b2df7f8b0 --- /dev/null +++ b/usr/src/uts/common/klm/nlm_impl.h @@ -0,0 +1,642 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * NFS Lock Manager (NLM) private declarations, etc. + * + * Source code derived from FreeBSD nlm.h + */ + +#ifndef _NLM_NLM_H_ +#define _NLM_NLM_H_ + +#include <sys/cmn_err.h> +#include <sys/queue.h> +#include <sys/modhash.h> +#include <sys/avl.h> + +#define RPC_MSGOUT(args...) cmn_err(CE_NOTE, args) +#define NLM_ERR(...) cmn_err(CE_NOTE, __VA_ARGS__) +#define NLM_WARN(...) cmn_err(CE_WARN, __VA_ARGS__) + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +/* + * Maximum offset supported by NLM calls using the older + * (32-bit) versions of the protocol. + */ +#define MAX_UOFF32 0xffffffffULL + +struct nlm_host; +struct vnode; +struct exportinfo; +struct shrlock; +struct _kthread; + +/* + * How to read the code: probably the best point to start + * it the nlm_host structure that is sort of most major + * structure in klmmod. nlm_host is closely tied with all + * other NLM structures. + * + * There're three major locks we use inside NLM: + * 1) Global read-write lock (lm_lck) that is used to + * protect operations with sysid allocation and + * management of zone globals structures for each + * zone. + * 2) Zone global lock: (nlm_globals->lock) is a mutex + * used to protect all operations inside particular + * zone. + * 3) Host's lock: (nlm_host->nh_lock) is per-host mutex + * used to protect host's internal fields and all + * operations with the given host. + * + * Locks order _must_ obey the following scheme: + * lm_lck then nlm_globals->lock then nlm_host->nh_lock + * + * Locks: + * (g) locked by lm_lck + * (z) locked by nlm_globals->lock + * (l) locked by host->nh_lock + * (c) const until freeing + */ + +/* + * Callback functions for nlm_do_lock() and others. + * + * Calls to nlm_do_lock are unusual, because it needs to handle + * the reply itself, instead of letting it happen the normal way. + * It also needs to make an RPC call _back_ to the client when a + * blocked lock request completes. + * + * We pass three callback functions to nlm_do_lock: + * nlm_reply_cb: send a normal RPC reply + * nlm_res_cb: do a _res (message style) RPC (call) + * nlm_testargs_cb: do a "granted" RPC call (after blocking) + * Only one of the 1st or 2nd is used. + * The 3rd is used only for blocking + * + * We also use callback functions for all the _msg variants + * of the NLM svc calls, where the reply is a reverse call. + * The nlm_testres_cb is used by the _test_msg svc calls. + * The nlm_res_cb type is used by the other _msg calls. + */ +typedef bool_t (*nlm_reply_cb)(SVCXPRT *, nlm4_res *); +typedef enum clnt_stat (*nlm_res_cb)(nlm4_res *, void *, CLIENT *); +typedef enum clnt_stat (*nlm_testargs_cb)(nlm4_testargs *, void *, CLIENT *); +typedef enum clnt_stat (*nlm_testres_cb)(nlm4_testres *, void *, CLIENT *); + +/* + * NLM sleeping lock request. + * + * Sleeping lock requests are server side only objects + * that are created when client asks server to add new + * sleeping lock and when this lock needs to block. + * Server keeps a track of these requests in order to be + * able to cancel them or clean them up. + * + * Sleeping lock requests are closely tiled with particular + * vnode or, strictly speaking, NLM vhold object that holds + * the vnode. + * + * struct nlm_slreq: + * nsr_fl: an information about file lock + * nsr_link: a list node to store lock requests + * in vhold object. + */ +struct nlm_slreq { + struct flock64 nsr_fl; + TAILQ_ENTRY(nlm_slreq) nsr_link; +}; +TAILQ_HEAD(nlm_slreq_list, nlm_slreq); + +/* + * NLM vhold object is a sort of wrapper on vnodes remote + * clients have locked (or added share reservation) + * on NLM server. Vhold keeps vnode held (by VN_HOLD()) + * while vnode has any locks or shares made by parent host. + * Vholds are used for two purposes: + * 1) Hold vnode (with VN_HOLD) while it has any locks; + * 2) Keep a track of all vnodes remote host touched + * with lock/share operations on NLM server, so that NLM + * can know what vnodes are potentially locked; + * + * Vholds are used on server side only. For server side it's really + * important to keep vnodes held while they potentially have + * any locks/shares. In contrast, it's not important for clinet + * side at all. When particular vnode comes to the NLM client side + * code, it's already held (VN_HOLD) by the process calling + * lock/share function (it's referenced because client calls open() + * before making locks or shares). + * + * Each NLM host object has a collection of vholds associated + * with vnodes host touched earlier by adding locks or shares. + * Having this collection allows us to decide if host is still + * in use. When it has any vhold objects it's considered to be + * in use. Otherwise we're free to destroy it. + * + * Vholds are destroyed by the NLM garbage collecter thread that + * periodically checks whether they have any locks or shares. + * Checking occures when parent host is untouched by client + * or server for some period of time. + * + * struct nlm_vhold: + * nv_vp: a pointer to vnode that is hold by given nlm_vhold + * nv_refcnt: reference counter (non zero when vhold is inuse) + * nv_slreqs: sleeping lock requests that were made on the nv_vp + * nv_link: list node to store vholds in host's nh_vnodes_list + */ +struct nlm_vhold { + vnode_t *nv_vp; /* (c) */ + int nv_refcnt; /* (l) */ + struct nlm_slreq_list nv_slreqs; /* (l) */ + TAILQ_ENTRY(nlm_vhold) nv_link; /* (l) */ +}; +TAILQ_HEAD(nlm_vhold_list, nlm_vhold); + +/* + * Client side sleeping lock state. + * - NLM_SL_BLOCKED: some thread is blocked on this lock + * - NLM_SL_GRANTED: server granted us the lock + * - NLM_SL_CANCELLED: the lock is cancelled (i.e. invalid/inactive) + */ +typedef enum nlm_slock_state { + NLM_SL_UNKNOWN = 0, + NLM_SL_BLOCKED, + NLM_SL_GRANTED, + NLM_SL_CANCELLED +} nlm_slock_state_t; + +/* + * A client side sleeping lock request (set by F_SETLKW) + * stored in nlm_slocks collection of nlm_globals. + * + * struct nlm_slock + * nsl_state: Sleeping lock state. + * (see nlm_slock_state for more information) + * nsl_cond: Condvar that is used when sleeping lock + * needs to wait for a GRANT callback + * or cancellation event. + * nsl_lock: nlm4_lock structure that is sent to the server + * nsl_fh: Filehandle that corresponds to nw_vp + * nsl_host: A host owning this sleeping lock + * nsl_vp: A vnode sleeping lock is waiting on. + * nsl_link: A list node for nlm_globals->nlm_slocks list. + */ +struct nlm_slock { + nlm_slock_state_t nsl_state; /* (z) */ + kcondvar_t nsl_cond; /* (z) */ + nlm4_lock nsl_lock; /* (c) */ + struct netobj nsl_fh; /* (c) */ + struct nlm_host *nsl_host; /* (c) */ + struct vnode *nsl_vp; /* (c) */ + TAILQ_ENTRY(nlm_slock) nsl_link; /* (z) */ +}; +TAILQ_HEAD(nlm_slock_list, nlm_slock); + +/* + * Share reservation description. NLM tracks all active + * share reservations made by the client side, so that + * they can be easily recovered if remote NLM server + * reboots. Share reservations tracking is also useful + * when NLM needs to determine whether host owns any + * resources on the system and can't be destroyed. + * + * nlm_shres: + * ns_shr: share reservation description + * ns_vp: a pointer to vnode where share reservation is located + * ns_next: next nlm_shres instance (or NULL if next item isn't + * present). + */ +struct nlm_shres { + struct shrlock *ns_shr; + vnode_t *ns_vp; + struct nlm_shres *ns_next; +}; + +/* + * NLM RPC handle object. + * + * In kRPC subsystem it's unsafe to use one RPC handle by + * several threads simultaneously. It was designed so that + * each thread has to create an RPC handle that it'll use. + * RPC handle creation can be quite expensive operation, especially + * with session oriented protocols (such as TCP) that need to + * establish session at first. NLM RPC handle object is a sort of + * wrapper on kRPC handle object that can be cached and used in + * future. We store all created RPC handles for given host in a + * host's RPC handles cache, so that to make new requests threads + * can simply take ready objects from the cache. That improves + * NLM performance. + * + * nlm_rpc_t: + * nr_handle: a kRPC handle itself. + * nr_vers: a version of NLM protocol kRPC handle was + * created for. + * nr_link: a list node to store NLM RPC handles in the host + * RPC handles cache. + */ +typedef struct nlm_rpc { + CLIENT *nr_handle; /* (l) */ + rpcvers_t nr_vers; /* (c) */ + TAILQ_ENTRY(nlm_rpc) nr_link; /* (l) */ +} nlm_rpc_t; +TAILQ_HEAD(nlm_rpch_list, nlm_rpc); + +/* + * Describes the state of NLM host's RPC binding. + * RPC binding can be in one of three states: + * 1) NRPCB_NEED_UPDATE: + * Binding is either not initialized or stale. + * 2) NRPCB_UPDATE_INPROGRESS: + * When some thread updates host's RPC binding, + * it sets binding's state to NRPCB_UPDATE_INPROGRESS + * which denotes that other threads must wait until + * update process is finished. + * 3) NRPCB_UPDATED: + * Denotes that host's RPC binding is both initialized + * and fresh. + */ +enum nlm_rpcb_state { + NRPCB_NEED_UPDATE = 0, + NRPCB_UPDATE_INPROGRESS, + NRPCB_UPDATED +}; + +/* + * NLM host flags + */ +#define NLM_NH_MONITORED 0x01 +#define NLM_NH_RECLAIM 0x02 +#define NLM_NH_INIDLE 0x04 +#define NLM_NH_SUSPEND 0x08 + +/* + * NLM host object is the most major structure in NLM. + * It identifies remote client or remote server or both. + * NLM host object keep a track of all vnodes client/server + * locked and all sleeping locks it has. All lock/unlock + * operations are done using host object. + * + * nlm_host: + * nh_lock: a mutex protecting host object fields + * nh_refs: reference counter. Identifies how many threads + * uses this host object. + * nh_link: a list node for keeping host in zone-global list. + * nh_by_addr: an AVL tree node for keeping host in zone-global tree. + * Host can be looked up in the tree by <netid, address> + * pair. + * nh_name: host name. + * nh_netid: netid string identifying type of transport host uses. + * nh_knc: host's knetconfig (used by kRPC subsystem). + * nh_addr: host's address (either IPv4 or IPv6). + * nh_sysid: unique sysid associated with this host. + * nh_state: last seen host's state reported by NSM. + * nh_flags: ORed host flags. + * nh_idle_timeout: host idle timeout. When expired host is freed. + * nh_recl_cv: condition variable used for reporting that reclamation + * process is finished. + * nh_rpcb_cv: condition variable that is used to make sure + * that only one thread renews host's RPC binding. + * nh_rpcb_ustat: error code returned by RPC binding update operation. + * nh_rpcb_state: host's RPC binding state (see enum nlm_rpcb_state + * for more details). + * nh_rpchc: host's RPC handles cache. + * nh_vholds_by_vp: a hash table of all vholds host owns. (used for lookup) + * nh_vholds_list: a linked list of all vholds host owns. (used for iteration) + * nh_shrlist: a list of all active share resevations on the client side. + * nh_reclaimer: a pointer to reclamation thread (kthread_t) + * NULL if reclamation thread doesn't exist + */ +struct nlm_host { + kmutex_t nh_lock; /* (c) */ + volatile uint_t nh_refs; /* (z) */ + TAILQ_ENTRY(nlm_host) nh_link; /* (z) */ + avl_node_t nh_by_addr; /* (z) */ + char *nh_name; /* (c) */ + char *nh_netid; /* (c) */ + struct knetconfig nh_knc; /* (c) */ + struct netbuf nh_addr; /* (c) */ + sysid_t nh_sysid; /* (c) */ + int32_t nh_state; /* (z) */ + clock_t nh_idle_timeout; /* (z) */ + uint8_t nh_flags; /* (z) */ + kcondvar_t nh_recl_cv; /* (z) */ + kcondvar_t nh_rpcb_cv; /* (l) */ + enum clnt_stat nh_rpcb_ustat; /* (l) */ + enum nlm_rpcb_state nh_rpcb_state; /* (l) */ + struct nlm_rpch_list nh_rpchc; /* (l) */ + mod_hash_t *nh_vholds_by_vp; /* (l) */ + struct nlm_vhold_list nh_vholds_list; /* (l) */ + struct nlm_shres *nh_shrlist; /* (l) */ + kthread_t *nh_reclaimer; /* (l) */ +}; +TAILQ_HEAD(nlm_host_list, nlm_host); + +/* + * nlm_nsm structure describes RPC client handle that can be + * used to communicate with local NSM via kRPC. + * + * We need to wrap handle with nlm_nsm structure because kRPC + * can not share one handle between several threads. It's assumed + * that NLM uses only one NSM handle per zone, thus all RPC operations + * on NSM's handle are serialized using nlm_nsm->sem semaphore. + * + * nlm_nsm also contains refcnt field used for reference counting. + * It's used because there exist a possibility of simultaneous + * execution of NLM shutdown operation and host monitor/unmonitor + * operations. + * + * struct nlm_nsm: + * ns_sem: a semaphore for serialization network operations to statd + * ns_knc: a kneconfig describing transport that is used for communication + * ns_addr: an address of local statd we're talking to + * ns_handle: an RPC handle used for talking to local statd using the status + * monitor protocol (SM_PROG) + * ns_addr_handle: an RPC handle used for talking to local statd using the + * address registration protocol (NSM_ADDR_PROGRAM) + */ +struct nlm_nsm { + ksema_t ns_sem; + struct knetconfig ns_knc; /* (c) */ + struct netbuf ns_addr; /* (c) */ + CLIENT *ns_handle; /* (c) */ + CLIENT *ns_addr_handle; /* (c) */ +}; + +/* + * Could use flock.h flk_nlm_status_t instead, but + * prefer our own enum with initial zero... + */ +typedef enum { + NLM_ST_DOWN = 0, + NLM_ST_STOPPING, + NLM_ST_UP, + NLM_ST_STARTING +} nlm_run_status_t; + +/* + * nlm_globals structure allows NLM be zone aware. The structure + * collects all "global variables" NLM has for each zone. + * + * struct nlm_globals: + * lock: mutex protecting all operations inside given zone + * grace_threshold: grace period expiration time (in ticks) + * lockd_pid: PID of lockd user space daemon + * run_status: run status of klmmod inside given zone + * nsm_state: state obtained from local statd during klmmod startup + * nlm_gc_thread: garbage collector thread + * nlm_gc_sched_cv: condvar that can be signalled to wakeup GC + * nlm_gc_finish_cv: condvar that is signalled just before GC thread exits + * nlm_nsm: an object describing RPC handle used for talking to local statd + * nlm_hosts_tree: an AVL tree of all hosts in the given zone + * (used for hosts lookup by <netid, address> pair) + * nlm_hosts_hash: a hash table of all hosts in the given zone + * (used for hosts lookup by sysid) + * nlm_idle_hosts: a list of all hosts that are idle state (i.e. unused) + * nlm_slocks: a list of all client-side sleeping locks in the zone + * cn_idle_tmo: a value of idle timeout (in seconds) obtained from lockd + * grace_period: a value of grace period (in seconds) obtained from lockd + * retrans_tmo: a value of retransmission timeout (in seconds) obtained + * from lockd. + * clean_lock: mutex used to serialize clear_locks calls. + * nlm_link: a list node used for keeping all nlm_globals objects + * in one global linked list. + */ +struct nlm_globals { + kmutex_t lock; + clock_t grace_threshold; /* (z) */ + pid_t lockd_pid; /* (z) */ + nlm_run_status_t run_status; /* (z) */ + int32_t nsm_state; /* (z) */ + kthread_t *nlm_gc_thread; /* (z) */ + kcondvar_t nlm_gc_sched_cv; /* (z) */ + kcondvar_t nlm_gc_finish_cv; /* (z) */ + struct nlm_nsm nlm_nsm; /* (z) */ + avl_tree_t nlm_hosts_tree; /* (z) */ + mod_hash_t *nlm_hosts_hash; /* (z) */ + struct nlm_host_list nlm_idle_hosts; /* (z) */ + struct nlm_slock_list nlm_slocks; /* (z) */ + int cn_idle_tmo; /* (z) */ + int grace_period; /* (z) */ + int retrans_tmo; /* (z) */ + kmutex_t clean_lock; /* (c) */ + TAILQ_ENTRY(nlm_globals) nlm_link; /* (g) */ +}; +TAILQ_HEAD(nlm_globals_list, nlm_globals); + + +/* + * This is what we pass as the "owner handle" for NLM_LOCK. + * This lets us find the blocked lock in NLM_GRANTED. + * It also exposes on the wire what we're using as the + * sysid for any server, which can be very helpful for + * problem diagnosis. (Observability is good). + */ +struct nlm_owner_handle { + sysid_t oh_sysid; /* of remote host */ +}; + +/* + * Number retries NLM RPC call is repeatead in case of failure. + * (used in case of conectionless transport). + */ +#define NLM_RPC_RETRIES 5 + +/* + * Klmmod global variables + */ +extern krwlock_t lm_lck; +extern zone_key_t nlm_zone_key; + +/* + * NLM interface functions (called directly by + * either klmmod or klmpos) + */ +extern int nlm_frlock(struct vnode *, int, struct flock64 *, int, u_offset_t, + struct cred *, struct netobj *, struct flk_callback *, int); +extern int nlm_shrlock(struct vnode *, int, struct shrlock *, int, + struct netobj *, int); +extern int nlm_safemap(const vnode_t *); +extern int nlm_safelock(vnode_t *, const struct flock64 *, cred_t *); +extern int nlm_has_sleep(const vnode_t *); +extern void nlm_register_lock_locally(struct vnode *, struct nlm_host *, + struct flock64 *, int, u_offset_t); +int nlm_vp_active(const vnode_t *vp); +void nlm_sysid_free(sysid_t); +int nlm_vp_active(const vnode_t *); +void nlm_unexport(struct exportinfo *); + +/* + * NLM startup/shutdown + */ +int nlm_svc_starting(struct nlm_globals *, struct file *, + const char *, struct knetconfig *); +void nlm_svc_stopping(struct nlm_globals *); +int nlm_svc_add_ep(struct file *, const char *, struct knetconfig *); + +/* + * NLM suspend/resume + */ +void nlm_cprsuspend(void); +void nlm_cprresume(void); + +/* + * NLM internal functions for initialization. + */ +void nlm_init(void); +void nlm_rpc_init(void); +void nlm_rpc_cache_destroy(struct nlm_host *); +void nlm_globals_register(struct nlm_globals *); +void nlm_globals_unregister(struct nlm_globals *); +sysid_t nlm_sysid_alloc(void); + +/* + * Client reclamation/cancelation + */ +void nlm_reclaim_client(struct nlm_globals *, struct nlm_host *); +void nlm_client_cancel_all(struct nlm_globals *, struct nlm_host *); + +/* (nlm_rpc_clnt.c) */ +enum clnt_stat nlm_null_rpc(CLIENT *, rpcvers_t); +enum clnt_stat nlm_test_rpc(nlm4_testargs *, nlm4_testres *, + CLIENT *, rpcvers_t); +enum clnt_stat nlm_lock_rpc(nlm4_lockargs *, nlm4_res *, + CLIENT *, rpcvers_t); +enum clnt_stat nlm_cancel_rpc(nlm4_cancargs *, nlm4_res *, + CLIENT *, rpcvers_t); +enum clnt_stat nlm_unlock_rpc(nlm4_unlockargs *, nlm4_res *, + CLIENT *, rpcvers_t); +enum clnt_stat nlm_share_rpc(nlm4_shareargs *, nlm4_shareres *, + CLIENT *, rpcvers_t); +enum clnt_stat nlm_unshare_rpc(nlm4_shareargs *, nlm4_shareres *, + CLIENT *, rpcvers_t); + + +/* + * RPC service functions. + * nlm_dispatch.c + */ +void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp); +void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp); + +/* + * Functions for working with knetconfigs (nlm_netconfig.c) + */ +const char *nlm_knc_to_netid(struct knetconfig *); +int nlm_knc_from_netid(const char *, struct knetconfig *); + +/* + * NLM host functions (nlm_impl.c) + */ +struct nlm_host *nlm_host_findcreate(struct nlm_globals *, char *, + const char *, struct netbuf *); +struct nlm_host *nlm_host_find(struct nlm_globals *, + const char *, struct netbuf *); +struct nlm_host *nlm_host_find_by_sysid(struct nlm_globals *, sysid_t); +void nlm_host_release(struct nlm_globals *, struct nlm_host *); + +void nlm_host_monitor(struct nlm_globals *, struct nlm_host *, int); +void nlm_host_unmonitor(struct nlm_globals *, struct nlm_host *); + +void nlm_host_notify_server(struct nlm_host *, int32_t); +void nlm_host_notify_client(struct nlm_host *, int32_t); + +int nlm_host_get_state(struct nlm_host *); + +struct nlm_vhold *nlm_vhold_get(struct nlm_host *, vnode_t *); +void nlm_vhold_release(struct nlm_host *, struct nlm_vhold *); +struct nlm_vhold *nlm_vhold_find_locked(struct nlm_host *, const vnode_t *); + +struct nlm_slock *nlm_slock_register(struct nlm_globals *, + struct nlm_host *, struct nlm4_lock *, struct vnode *); +void nlm_slock_unregister(struct nlm_globals *, struct nlm_slock *); +int nlm_slock_wait(struct nlm_globals *, struct nlm_slock *, uint_t); +int nlm_slock_grant(struct nlm_globals *, + struct nlm_host *, struct nlm4_lock *); +void nlm_host_cancel_slocks(struct nlm_globals *, struct nlm_host *); + +int nlm_slreq_register(struct nlm_host *, + struct nlm_vhold *, struct flock64 *); +int nlm_slreq_unregister(struct nlm_host *, + struct nlm_vhold *, struct flock64 *); + +void nlm_shres_track(struct nlm_host *, vnode_t *, struct shrlock *); +void nlm_shres_untrack(struct nlm_host *, vnode_t *, struct shrlock *); +struct nlm_shres *nlm_get_active_shres(struct nlm_host *); +void nlm_free_shrlist(struct nlm_shres *); + +int nlm_host_wait_grace(struct nlm_host *); +int nlm_host_cmp(const void *, const void *); +void nlm_copy_netobj(struct netobj *, struct netobj *); + +int nlm_host_get_rpc(struct nlm_host *, int, nlm_rpc_t **); +void nlm_host_rele_rpc(struct nlm_host *, nlm_rpc_t *); + +/* + * NLM server functions (nlm_service.c) + */ +int nlm_vp_active(const vnode_t *vp); +void nlm_do_notify1(nlm_sm_status *, void *, struct svc_req *); +void nlm_do_notify2(nlm_sm_status *, void *, struct svc_req *); +void nlm_do_test(nlm4_testargs *, nlm4_testres *, + struct svc_req *, nlm_testres_cb); +void nlm_do_lock(nlm4_lockargs *, nlm4_res *, struct svc_req *, + nlm_reply_cb, nlm_res_cb, nlm_testargs_cb); +void nlm_do_cancel(nlm4_cancargs *, nlm4_res *, + struct svc_req *, nlm_res_cb); +void nlm_do_unlock(nlm4_unlockargs *, nlm4_res *, + struct svc_req *, nlm_res_cb); +void nlm_do_granted(nlm4_testargs *, nlm4_res *, + struct svc_req *, nlm_res_cb); +void nlm_do_share(nlm4_shareargs *, nlm4_shareres *, struct svc_req *); +void nlm_do_unshare(nlm4_shareargs *, nlm4_shareres *, struct svc_req *); +void nlm_do_free_all(nlm4_notify *, void *, struct svc_req *); + +/* + * NLM RPC functions + */ +enum clnt_stat nlm_clnt_call(CLIENT *, rpcproc_t, xdrproc_t, + caddr_t, xdrproc_t, caddr_t, struct timeval); +bool_t nlm_caller_is_local(SVCXPRT *); + +#endif /* _NLM_NLM_H_ */ diff --git a/usr/src/uts/common/klm/nlm_prot_clnt.sed b/usr/src/uts/common/klm/nlm_prot_clnt.sed new file mode 100644 index 0000000000..5e1e0ec8cc --- /dev/null +++ b/usr/src/uts/common/klm/nlm_prot_clnt.sed @@ -0,0 +1,31 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +# +# This sed script is run on the client code generated by rpcgen +# from nlm_prot.x before it is compiled. +# + +6{ +i\ +#include <sys/param.h> +i\ +#include <sys/systm.h> +i\ +#include <rpcsvc/nlm_prot.h> +i\ +#include "nlm_impl.h" +} +/^.include/,/^.endif/d +s/clnt_call/nlm_clnt_call/g diff --git a/usr/src/uts/common/klm/nlm_rpc_clnt.c b/usr/src/uts/common/klm/nlm_rpc_clnt.c new file mode 100644 index 0000000000..d7f4420533 --- /dev/null +++ b/usr/src/uts/common/klm/nlm_rpc_clnt.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Client-side RPC wrappers (nlm_..._rpc) + * Called from nlm_client.c + * + * Source code derived from FreeBSD nlm_advlock.c + */ + +#include <sys/param.h> +#include <sys/fcntl.h> +#include <sys/lock.h> +#include <sys/flock.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/queue.h> + +#include <rpcsvc/nlm_prot.h> + +#include <nfs/nfs.h> +#include <nfs/nfs_clnt.h> +#include <nfs/export.h> +#include <nfs/rnode.h> + +#include "nlm_impl.h" + +static void +nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) +{ + dst->caller_name = src->caller_name; + dst->fh = src->fh; + dst->oh = src->oh; + dst->svid = src->svid; + dst->l_offset = src->l_offset; + dst->l_len = src->l_len; +} + +static void +nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src) +{ + dst->exclusive = src->exclusive; + dst->svid = src->svid; + dst->oh = src->oh; + dst->l_offset = src->l_offset; + dst->l_len = src->l_len; +} + +static void +nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src) +{ + dst->cookie = src->cookie; + dst->stat.stat = (enum nlm4_stats) src->stat.stat; +} + +enum clnt_stat +nlm_test_rpc(nlm4_testargs *args, nlm4_testres *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_test_4(args, res, client)); + } else { + nlm_testargs args1; + nlm_testres res1; + enum clnt_stat stat; + + args1.cookie = args->cookie; + args1.exclusive = args->exclusive; + nlm_convert_to_nlm_lock(&args1.alock, &args->alock); + (void) memset(&res1, 0, sizeof (res1)); + + stat = nlm_test_1(&args1, &res1, client); + + if (stat == RPC_SUCCESS) { + res->cookie = res1.cookie; + res->stat.stat = (enum nlm4_stats) res1.stat.stat; + if (res1.stat.stat == nlm_denied) + nlm_convert_to_nlm4_holder( + &res->stat.nlm4_testrply_u.holder, + &res1.stat.nlm_testrply_u.holder); + } + + return (stat); + } +} + +enum clnt_stat +nlm_lock_rpc(nlm4_lockargs *args, nlm4_res *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_lock_4(args, res, client)); + } else { + nlm_lockargs args1; + nlm_res res1; + enum clnt_stat stat; + + args1.cookie = args->cookie; + args1.block = args->block; + args1.exclusive = args->exclusive; + nlm_convert_to_nlm_lock(&args1.alock, &args->alock); + args1.reclaim = args->reclaim; + args1.state = args->state; + (void) memset(&res1, 0, sizeof (res1)); + + stat = nlm_lock_1(&args1, &res1, client); + + if (stat == RPC_SUCCESS) { + nlm_convert_to_nlm4_res(res, &res1); + } + + return (stat); + } +} + +enum clnt_stat +nlm_cancel_rpc(nlm4_cancargs *args, nlm4_res *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_cancel_4(args, res, client)); + } else { + nlm_cancargs args1; + nlm_res res1; + enum clnt_stat stat; + + args1.cookie = args->cookie; + args1.block = args->block; + args1.exclusive = args->exclusive; + nlm_convert_to_nlm_lock(&args1.alock, &args->alock); + (void) memset(&res1, 0, sizeof (res1)); + + stat = nlm_cancel_1(&args1, &res1, client); + + if (stat == RPC_SUCCESS) { + nlm_convert_to_nlm4_res(res, &res1); + } + + return (stat); + } +} + +enum clnt_stat +nlm_unlock_rpc(nlm4_unlockargs *args, nlm4_res *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_unlock_4(args, res, client)); + } else { + nlm_unlockargs args1; + nlm_res res1; + enum clnt_stat stat; + + args1.cookie = args->cookie; + nlm_convert_to_nlm_lock(&args1.alock, &args->alock); + (void) memset(&res1, 0, sizeof (res1)); + + stat = nlm_unlock_1(&args1, &res1, client); + + if (stat == RPC_SUCCESS) { + nlm_convert_to_nlm4_res(res, &res1); + } + + return (stat); + } +} + +enum clnt_stat +nlm_null_rpc(CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) + return (nlm4_null_4(NULL, NULL, client)); + + return (nlm_null_1(NULL, NULL, client)); +} + +/* + * Share reservations + */ + +static void +nlm_convert_to_nlm_share(struct nlm_share *dst, struct nlm4_share *src) +{ + + dst->caller_name = src->caller_name; + dst->fh = src->fh; + dst->oh = src->oh; + dst->mode = src->mode; + dst->access = src->access; +} + +static void +nlm_convert_to_nlm4_shres(struct nlm4_shareres *dst, + struct nlm_shareres *src) +{ + dst->cookie = src->cookie; + dst->stat = (enum nlm4_stats) src->stat; + dst->sequence = src->sequence; +} + + +enum clnt_stat +nlm_share_rpc(nlm4_shareargs *args, nlm4_shareres *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_share_4(args, res, client)); + } else { + nlm_shareargs args3; + nlm_shareres res3; + enum clnt_stat stat; + + args3.cookie = args->cookie; + nlm_convert_to_nlm_share(&args3.share, &args->share); + args3.reclaim = args->reclaim; + (void) memset(&res3, 0, sizeof (res3)); + + stat = nlm_share_3(&args3, &res3, client); + + if (stat == RPC_SUCCESS) { + nlm_convert_to_nlm4_shres(res, &res3); + } + + return (stat); + } +} + +enum clnt_stat +nlm_unshare_rpc(nlm4_shareargs *args, nlm4_shareres *res, + CLIENT *client, rpcvers_t vers) +{ + if (vers == NLM4_VERS) { + return (nlm4_unshare_4(args, res, client)); + } else { + nlm_shareargs args3; + nlm_shareres res3; + enum clnt_stat stat; + + args3.cookie = args->cookie; + nlm_convert_to_nlm_share(&args3.share, &args->share); + args3.reclaim = args->reclaim; + (void) memset(&res3, 0, sizeof (res3)); + + stat = nlm_unshare_3(&args3, &res3, client); + + if (stat == RPC_SUCCESS) { + nlm_convert_to_nlm4_shres(res, &res3); + } + + return (stat); + } +} diff --git a/usr/src/uts/common/klm/nlm_rpc_handle.c b/usr/src/uts/common/klm/nlm_rpc_handle.c new file mode 100644 index 0000000000..611c4b5b2d --- /dev/null +++ b/usr/src/uts/common/klm/nlm_rpc_handle.c @@ -0,0 +1,362 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/socket.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/unistd.h> +#include <sys/queue.h> +#include <sys/sdt.h> +#include <netinet/in.h> + +#include <rpc/rpc.h> +#include <rpc/xdr.h> +#include <rpc/pmap_prot.h> +#include <rpc/pmap_clnt.h> +#include <rpc/rpcb_prot.h> + +#include <rpcsvc/nlm_prot.h> +#include <rpcsvc/sm_inter.h> + +#include "nlm_impl.h" + +/* + * The following errors codes from nlm_null_rpc indicate that the port we have + * cached for the client's NLM service is stale and that we need to establish + * a new RPC client. + */ +#define NLM_STALE_CLNT(_status) \ + ((_status) == RPC_PROGUNAVAIL || \ + (_status) == RPC_PROGVERSMISMATCH || \ + (_status) == RPC_PROCUNAVAIL || \ + (_status) == RPC_CANTCONNECT || \ + (_status) == RPC_XPRTFAILED) + +static struct kmem_cache *nlm_rpch_cache = NULL; + +static int nlm_rpch_ctor(void *, void *, int); +static void nlm_rpch_dtor(void *, void *); +static void destroy_rpch(nlm_rpc_t *); +static nlm_rpc_t *get_nlm_rpc_fromcache(struct nlm_host *, int); +static void update_host_rpcbinding(struct nlm_host *, int); +static int refresh_nlm_rpc(struct nlm_host *, nlm_rpc_t *); +static void nlm_host_rele_rpc_locked(struct nlm_host *, nlm_rpc_t *); + +static nlm_rpc_t * +get_nlm_rpc_fromcache(struct nlm_host *hostp, int vers) +{ + nlm_rpc_t *rpcp; + bool_t found = FALSE; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + if (TAILQ_EMPTY(&hostp->nh_rpchc)) + return (NULL); + + TAILQ_FOREACH(rpcp, &hostp->nh_rpchc, nr_link) { + if (rpcp->nr_vers == vers) { + found = TRUE; + break; + } + } + + if (!found) + return (NULL); + + TAILQ_REMOVE(&hostp->nh_rpchc, rpcp, nr_link); + return (rpcp); +} + +/* + * Update host's RPC binding (host->nh_addr). + * The function is executed by only one thread at time. + */ +static void +update_host_rpcbinding(struct nlm_host *hostp, int vers) +{ + enum clnt_stat stat; + + ASSERT(MUTEX_HELD(&hostp->nh_lock)); + + /* + * Mark RPC binding state as "update in progress" in order + * to say other threads that they need to wait until binding + * is fully updated. + */ + hostp->nh_rpcb_state = NRPCB_UPDATE_INPROGRESS; + hostp->nh_rpcb_ustat = RPC_SUCCESS; + mutex_exit(&hostp->nh_lock); + + stat = rpcbind_getaddr(&hostp->nh_knc, NLM_PROG, vers, &hostp->nh_addr); + mutex_enter(&hostp->nh_lock); + + hostp->nh_rpcb_state = ((stat == RPC_SUCCESS) ? + NRPCB_UPDATED : NRPCB_NEED_UPDATE); + + hostp->nh_rpcb_ustat = stat; + cv_broadcast(&hostp->nh_rpcb_cv); +} + +/* + * Refresh RPC handle taken from host handles cache. + * This function is called when an RPC handle is either + * uninitialized or was initialized using a binding that's + * no longer current. + */ +static int +refresh_nlm_rpc(struct nlm_host *hostp, nlm_rpc_t *rpcp) +{ + int ret; + + if (rpcp->nr_handle == NULL) { + bool_t clset = TRUE; + + ret = clnt_tli_kcreate(&hostp->nh_knc, &hostp->nh_addr, + NLM_PROG, rpcp->nr_vers, 0, NLM_RPC_RETRIES, + CRED(), &rpcp->nr_handle); + + /* + * Set the client's CLSET_NODELAYONERR option to true. The + * RPC clnt_call interface creates an artificial delay for + * certain call errors in order to prevent RPC consumers + * from getting into tight retry loops. Since this function is + * called by the NLM service routines we would like to avoid + * this artificial delay when possible. We do not retry if the + * NULL request fails so it is safe for us to turn this option + * on. + */ + if (clnt_control(rpcp->nr_handle, CLSET_NODELAYONERR, + (char *)&clset)) { + NLM_ERR("Unable to set CLSET_NODELAYONERR\n"); + } + } else { + ret = clnt_tli_kinit(rpcp->nr_handle, &hostp->nh_knc, + &hostp->nh_addr, 0, NLM_RPC_RETRIES, CRED()); + if (ret == 0) { + enum clnt_stat stat; + + /* + * Check whether host's RPC binding is still + * fresh, i.e. if remote program is still sits + * on the same port we assume. Call NULL proc + * to do it. + * + * Note: Even though we set no delay on error on the + * client handle the call to nlm_null_rpc can still + * delay for 10 seconds before returning an error. For + * example the no delay on error option is not honored + * for RPC_XPRTFAILED errors (see clnt_cots_kcallit). + */ + stat = nlm_null_rpc(rpcp->nr_handle, rpcp->nr_vers); + if (NLM_STALE_CLNT(stat)) { + ret = ESTALE; + } + } + } + + return (ret); +} + +/* + * Get RPC handle that can be used to talk to the NLM + * of given version running on given host. + * Saves obtained RPC handle to rpcpp argument. + * + * If error occures, return nonzero error code. + */ +int +nlm_host_get_rpc(struct nlm_host *hostp, int vers, nlm_rpc_t **rpcpp) +{ + nlm_rpc_t *rpcp = NULL; + int rc; + + mutex_enter(&hostp->nh_lock); + + /* + * If this handle is either uninitialized, or was + * initialized using binding that's now stale + * do the init or re-init. + * See comments to enum nlm_rpcb_state for more + * details. + */ +again: + while (hostp->nh_rpcb_state != NRPCB_UPDATED) { + if (hostp->nh_rpcb_state == NRPCB_UPDATE_INPROGRESS) { + rc = cv_wait_sig(&hostp->nh_rpcb_cv, &hostp->nh_lock); + if (rc == 0) { + mutex_exit(&hostp->nh_lock); + return (EINTR); + } + } + + /* + * Check if RPC binding was marked for update. + * If so, start RPC binding update operation. + * NOTE: the operation can be executed by only + * one thread at time. + */ + if (hostp->nh_rpcb_state == NRPCB_NEED_UPDATE) + update_host_rpcbinding(hostp, vers); + + /* + * Check if RPC error occured during RPC binding + * update operation. If so, report a correspoding + * error. + */ + if (hostp->nh_rpcb_ustat != RPC_SUCCESS) { + mutex_exit(&hostp->nh_lock); + return (ENOENT); + } + } + + rpcp = get_nlm_rpc_fromcache(hostp, vers); + mutex_exit(&hostp->nh_lock); + if (rpcp == NULL) { + /* + * There weren't any RPC handles in a host + * cache. No luck, just create a new one. + */ + rpcp = kmem_cache_alloc(nlm_rpch_cache, KM_SLEEP); + rpcp->nr_vers = vers; + } + + /* + * Refresh RPC binding + */ + rc = refresh_nlm_rpc(hostp, rpcp); + if (rc != 0) { + if (rc == ESTALE) { + /* + * Host's RPC binding is stale, we have + * to update it. Put the RPC handle back + * to the cache and mark the host as + * "need update". + */ + mutex_enter(&hostp->nh_lock); + hostp->nh_rpcb_state = NRPCB_NEED_UPDATE; + nlm_host_rele_rpc_locked(hostp, rpcp); + goto again; + } + + destroy_rpch(rpcp); + return (rc); + } + + DTRACE_PROBE2(end, struct nlm_host *, hostp, + nlm_rpc_t *, rpcp); + + *rpcpp = rpcp; + return (0); +} + +void +nlm_host_rele_rpc(struct nlm_host *hostp, nlm_rpc_t *rpcp) +{ + mutex_enter(&hostp->nh_lock); + nlm_host_rele_rpc_locked(hostp, rpcp); + mutex_exit(&hostp->nh_lock); +} + +static void +nlm_host_rele_rpc_locked(struct nlm_host *hostp, nlm_rpc_t *rpcp) +{ + ASSERT(mutex_owned(&hostp->nh_lock)); + TAILQ_INSERT_HEAD(&hostp->nh_rpchc, rpcp, nr_link); +} + +/* + * The function invalidates host's RPC binding by marking it + * as not fresh. In this case another time thread tries to + * get RPC handle from host's handles cache, host's RPC binding + * will be updated. + * + * The function should be executed when RPC call invoked via + * handle taken from RPC cache returns RPC_PROCUNAVAIL. + */ +void +nlm_host_invalidate_binding(struct nlm_host *hostp) +{ + mutex_enter(&hostp->nh_lock); + hostp->nh_rpcb_state = NRPCB_NEED_UPDATE; + mutex_exit(&hostp->nh_lock); +} + +void +nlm_rpc_init(void) +{ + nlm_rpch_cache = kmem_cache_create("nlm_rpch_cache", + sizeof (nlm_rpc_t), 0, nlm_rpch_ctor, nlm_rpch_dtor, + NULL, NULL, NULL, 0); +} + +void +nlm_rpc_cache_destroy(struct nlm_host *hostp) +{ + nlm_rpc_t *rpcp; + + /* + * There's no need to lock host's mutex here, + * nlm_rpc_cache_destroy() should be called from + * only one place: nlm_host_destroy, when all + * resources host owns are already cleaned up. + * So there shouldn't be any raises. + */ + while ((rpcp = TAILQ_FIRST(&hostp->nh_rpchc)) != NULL) { + TAILQ_REMOVE(&hostp->nh_rpchc, rpcp, nr_link); + destroy_rpch(rpcp); + } +} + +/* ARGSUSED */ +static int +nlm_rpch_ctor(void *datap, void *cdrarg, int kmflags) +{ + nlm_rpc_t *rpcp = (nlm_rpc_t *)datap; + + bzero(rpcp, sizeof (*rpcp)); + return (0); +} + +/* ARGSUSED */ +static void +nlm_rpch_dtor(void *datap, void *cdrarg) +{ + nlm_rpc_t *rpcp = (nlm_rpc_t *)datap; + ASSERT(rpcp->nr_handle == NULL); +} + +static void +destroy_rpch(nlm_rpc_t *rpcp) +{ + if (rpcp->nr_handle != NULL) { + AUTH_DESTROY(rpcp->nr_handle->cl_auth); + CLNT_DESTROY(rpcp->nr_handle); + rpcp->nr_handle = NULL; + } + + kmem_cache_free(nlm_rpch_cache, rpcp); +} diff --git a/usr/src/uts/common/klm/nlm_rpc_svc.c b/usr/src/uts/common/klm/nlm_rpc_svc.c new file mode 100644 index 0000000000..2911b31877 --- /dev/null +++ b/usr/src/uts/common/klm/nlm_rpc_svc.c @@ -0,0 +1,932 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * NFS Lock Manager, RPC service functions (nlm_..._svc) + * Called via nlm_dispatch.c tables. + * + * Source code derived from FreeBSD nlm_prot_server.c + * + * The real service functions all use nlm4_... args and return + * data types. These wrappers convert older forms to and from + * the new forms and call the nlm_do_... service functions. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <rpcsvc/nlm_prot.h> +#include "nlm_impl.h" + +/* + * Convert between various versions of the protocol structures. + */ + +/* + * Down-convert, for granted_1 call + * + * This converts a 64-bit lock to 32-bit form for our granted + * call-back when we're dealing with a 32-bit NLM client. + * Our NLM_LOCK handler ensures that any lock we grant to a + * 32-bit client can be represented in 32-bits. If the + * ASSERTs here fire, then the call to nlm_init_flock in + * nlm_do_lock has failed to restrict a 32-bit client to + * 32-bit lock ranges. + */ +static void +nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) +{ + dst->caller_name = src->caller_name; + dst->fh = src->fh; + dst->oh = src->oh; + dst->svid = src->svid; + ASSERT(src->l_offset <= MAX_UOFF32); + dst->l_offset = (uint32_t)src->l_offset; + ASSERT(src->l_len <= MAX_UOFF32); + dst->l_len = (uint32_t)src->l_len; +} + +/* + * Up-convert for v1 svc functions with a 32-bit lock range arg. + * Note that lock range checks (like overflow) are done later, + * in nlm_init_flock(). + */ +static void +nlm_convert_to_nlm4_lock(struct nlm4_lock *dst, struct nlm_lock *src) +{ + + dst->caller_name = src->caller_name; + dst->fh = src->fh; + dst->oh = src->oh; + dst->svid = src->svid; + dst->l_offset = src->l_offset; + dst->l_len = src->l_len; +} + +static void +nlm_convert_to_nlm4_share(struct nlm4_share *dst, struct nlm_share *src) +{ + + dst->caller_name = src->caller_name; + dst->fh = src->fh; + dst->oh = src->oh; + dst->mode = src->mode; + dst->access = src->access; +} + +/* + * Down-convert for v1 NLM_TEST or NLM_TEST_MSG response. + * Note that nlm_do_test is careful to give us lock ranges + * that can be represented with 32-bit values. If the + * ASSERTs here fire, then the code in nlm_do_test that + * builds an nlm4_holder for a 32-bit client has failed to + * restrict the reported conflicting lock range so it's a + * valid 32-bit lock range. + */ +static void +nlm_convert_to_nlm_holder(struct nlm_holder *dst, struct nlm4_holder *src) +{ + dst->exclusive = src->exclusive; + dst->svid = src->svid; + dst->oh = src->oh; + ASSERT(src->l_offset <= MAX_UOFF32); + dst->l_offset = (uint32_t)src->l_offset; + ASSERT(src->l_len <= MAX_UOFF32); + dst->l_len = (uint32_t)src->l_len; +} + +static enum nlm_stats +nlm_convert_to_nlm_stats(enum nlm4_stats src) +{ + if (src > nlm4_deadlck) + return (nlm_denied); + return ((enum nlm_stats)src); +} + +static void +nlm_convert_to_nlm_res(struct nlm_res *dst, struct nlm4_res *src) +{ + dst->cookie = src->cookie; + dst->stat.stat = nlm_convert_to_nlm_stats(src->stat.stat); +} + +/* ******************************************************************** */ + +/* + * Version 1 svc functions + */ + +bool_t +nlm_test_1_svc(struct nlm_testargs *argp, nlm_testres *resp, + struct svc_req *sr) +{ + nlm4_testargs args4; + nlm4_testres res4; + + bzero(&args4, sizeof (args4)); + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_test(&args4, &res4, sr, NULL); + + resp->cookie = res4.cookie; + resp->stat.stat = nlm_convert_to_nlm_stats(res4.stat.stat); + if (resp->stat.stat == nlm_denied) + nlm_convert_to_nlm_holder( + &resp->stat.nlm_testrply_u.holder, + &res4.stat.nlm4_testrply_u.holder); + + return (TRUE); +} + +/* + * Callback functions for nlm_lock_1_svc + */ +static bool_t nlm_lock_1_reply(SVCXPRT *, nlm4_res *); +static enum clnt_stat nlm_granted_1_cb(nlm4_testargs *, void *, CLIENT *); + +bool_t +nlm_lock_1_svc(nlm_lockargs *argp, nlm_res *resp, + struct svc_req *sr) +{ + nlm4_lockargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.block = argp->block; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + args4.reclaim = argp->reclaim; + args4.state = argp->state; + + /* NLM_LOCK */ + nlm_do_lock(&args4, &res4, sr, + nlm_lock_1_reply, NULL, + nlm_granted_1_cb); + + /* for freeresult */ + nlm_convert_to_nlm_res(resp, &res4); + + /* above does its own reply */ + return (FALSE); +} + +static bool_t +nlm_lock_1_reply(SVCXPRT *transp, nlm4_res *resp) +{ + nlm_res res1; + + nlm_convert_to_nlm_res(&res1, resp); + return (svc_sendreply(transp, xdr_nlm_res, (char *)&res1)); +} + +static enum clnt_stat +nlm_granted_1_cb(nlm4_testargs *argp, void *resp, CLIENT *clnt) +{ + nlm_testargs args1; + nlm_res res1; + int rv; + + bzero(&res1, sizeof (res1)); + + args1.cookie = argp->cookie; + args1.exclusive = argp->exclusive; + nlm_convert_to_nlm_lock(&args1.alock, &argp->alock); + + rv = nlm_granted_1(&args1, &res1, clnt); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm_res, (void *)&res1); + (void) resp; + + return (rv); +} + +bool_t +nlm_cancel_1_svc(struct nlm_cancargs *argp, nlm_res *resp, + struct svc_req *sr) +{ + nlm4_cancargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.block = argp->block; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_cancel(&args4, &res4, sr, NULL); + + nlm_convert_to_nlm_res(resp, &res4); + + return (TRUE); +} + +bool_t +nlm_unlock_1_svc(struct nlm_unlockargs *argp, nlm_res *resp, + struct svc_req *sr) +{ + nlm4_unlockargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_unlock(&args4, &res4, sr, NULL); + + nlm_convert_to_nlm_res(resp, &res4); + + return (TRUE); +} + +bool_t +nlm_granted_1_svc(struct nlm_testargs *argp, nlm_res *resp, + struct svc_req *sr) +{ + nlm4_testargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_granted(&args4, &res4, sr, NULL); + + nlm_convert_to_nlm_res(resp, &res4); + + return (TRUE); +} + +/* + * The _msg_ calls get no reply. Instead, these callers + * expect an RPC call to the corresponding _res function. + * We pass this callback function to nlm_do_test so it will + * use it to do the RPC callback, with the correct res type. + * + * The callback functions have nearly the same arg signature + * as the client call functions so that many of those can be + * optimized to nothing by the compiler. Also, passing the + * null result arg for these just to reduce warnings. + * + * See similar callbacks for other _msg functions below. + */ + +static enum clnt_stat nlm_test_res_1_cb(nlm4_testres *, void *, CLIENT *); + +bool_t +nlm_test_msg_1_svc(struct nlm_testargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_testargs args4; + nlm4_testres res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_test(&args4, &res4, sr, + nlm_test_res_1_cb); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_testres, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +static enum clnt_stat +nlm_test_res_1_cb(nlm4_testres *res4, void *null, CLIENT *clnt) +{ + nlm_testres res1; + + res1.cookie = res4->cookie; + res1.stat.stat = nlm_convert_to_nlm_stats(res4->stat.stat); + if (res1.stat.stat == nlm_denied) + nlm_convert_to_nlm_holder( + &res1.stat.nlm_testrply_u.holder, + &res4->stat.nlm4_testrply_u.holder); + + return (nlm_test_res_1(&res1, null, clnt)); +} + +/* + * Callback functions for nlm_lock_msg_1_svc + */ +static enum clnt_stat nlm_lock_res_1_cb(nlm4_res *, void *, CLIENT *); +static enum clnt_stat nlm_granted_msg_1_cb(nlm4_testargs *, void *, CLIENT *); + +bool_t +nlm_lock_msg_1_svc(nlm_lockargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_lockargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.block = argp->block; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + args4.reclaim = argp->reclaim; + args4.state = argp->state; + + /* NLM_LOCK_MSG */ + nlm_do_lock(&args4, &res4, sr, + NULL, nlm_lock_res_1_cb, + nlm_granted_msg_1_cb); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +static enum clnt_stat +nlm_lock_res_1_cb(nlm4_res *resp, void *null, CLIENT *clnt) +{ + nlm_res res1; + + nlm_convert_to_nlm_res(&res1, resp); + return (nlm_lock_res_1(&res1, null, clnt)); +} + +static enum clnt_stat +nlm_granted_msg_1_cb(nlm4_testargs *argp, void *null, CLIENT *clnt) +{ + nlm_testargs args1; + + args1.cookie = argp->cookie; + args1.exclusive = argp->exclusive; + nlm_convert_to_nlm_lock(&args1.alock, &argp->alock); + + return (nlm_granted_msg_1(&args1, null, clnt)); + +} + + +static enum clnt_stat nlm_cancel_res_1_cb(nlm4_res *, void *, CLIENT *); + +bool_t +nlm_cancel_msg_1_svc(struct nlm_cancargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_cancargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.block = argp->block; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_cancel(&args4, &res4, sr, + nlm_cancel_res_1_cb); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +static enum clnt_stat +nlm_cancel_res_1_cb(nlm4_res *res4, void *null, CLIENT *clnt) +{ + nlm_res res1; + + nlm_convert_to_nlm_res(&res1, res4); + return (nlm_cancel_res_1(&res1, null, clnt)); +} + + +static enum clnt_stat nlm_unlock_res_1_cb(nlm4_res *, void *, CLIENT *); + +bool_t +nlm_unlock_msg_1_svc(struct nlm_unlockargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_unlockargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_unlock(&args4, &res4, sr, + nlm_unlock_res_1_cb); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +static enum clnt_stat +nlm_unlock_res_1_cb(nlm4_res *res4, void *null, CLIENT *clnt) +{ + nlm_res res1; + + nlm_convert_to_nlm_res(&res1, res4); + return (nlm_unlock_res_1(&res1, null, clnt)); +} + + +static enum clnt_stat nlm_granted_res_1_cb(nlm4_res *, void *, CLIENT *); + +bool_t +nlm_granted_msg_1_svc(struct nlm_testargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_testargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + + nlm_do_granted(&args4, &res4, sr, + nlm_granted_res_1_cb); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +static enum clnt_stat +nlm_granted_res_1_cb(nlm4_res *res4, void *null, CLIENT *clnt) +{ + nlm_res res1; + + nlm_convert_to_nlm_res(&res1, res4); + return (nlm_granted_res_1(&res1, null, clnt)); +} + +/* + * The _res_ calls get no reply. These RPC calls are + * "call backs" in response to RPC _msg_ calls. + * We don't care about these responses. + */ + +/* ARGSUSED */ +bool_t +nlm_test_res_1_svc(nlm_testres *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm_lock_res_1_svc(nlm_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm_cancel_res_1_svc(nlm_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm_unlock_res_1_svc(nlm_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm_granted_res_1_svc(nlm_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* + * Version 2 svc functions (used by local statd) + */ + +bool_t +nlm_sm_notify1_2_svc(struct nlm_sm_status *argp, void *resp, + struct svc_req *sr) +{ + nlm_do_notify1(argp, resp, sr); + return (TRUE); +} + +bool_t +nlm_sm_notify2_2_svc(struct nlm_sm_status *argp, void *resp, + struct svc_req *sr) +{ + nlm_do_notify2(argp, resp, sr); + return (TRUE); +} + +/* + * Version 3 svc functions + */ + +bool_t +nlm_share_3_svc(nlm_shareargs *argp, nlm_shareres *resp, + struct svc_req *sr) +{ + nlm4_shareargs args4; + nlm4_shareres res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + nlm_convert_to_nlm4_share(&args4.share, &argp->share); + args4.reclaim = argp->reclaim; + + nlm_do_share(&args4, &res4, sr); + + resp->cookie = res4.cookie; + resp->stat = nlm_convert_to_nlm_stats(res4.stat); + resp->sequence = res4.sequence; + + return (TRUE); +} + +bool_t +nlm_unshare_3_svc(nlm_shareargs *argp, nlm_shareres *resp, + struct svc_req *sr) +{ + nlm4_shareargs args4; + nlm4_shareres res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + nlm_convert_to_nlm4_share(&args4.share, &argp->share); + args4.reclaim = argp->reclaim; + + nlm_do_unshare(&args4, &res4, sr); + + resp->cookie = res4.cookie; + resp->stat = nlm_convert_to_nlm_stats(res4.stat); + resp->sequence = res4.sequence; + + return (TRUE); +} + +bool_t +nlm_nm_lock_3_svc(nlm_lockargs *argp, nlm_res *resp, struct svc_req *sr) +{ + nlm4_lockargs args4; + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + + args4.cookie = argp->cookie; + args4.block = argp->block; + args4.exclusive = argp->exclusive; + nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock); + args4.reclaim = argp->reclaim; + args4.state = argp->state; + + /* + * Don't allow blocking for non-monitored (nm_lock) calls. + * These clients don't handle any callbacks, including + * the granted call we make after a blocking lock. + * Same reply callback as nlm_lock_1_svc + */ + args4.block = FALSE; + + /* NLM_NM_LOCK */ + nlm_do_lock(&args4, &res4, sr, + nlm_lock_1_reply, NULL, + NULL); /* indicates non-monitored */ + + /* for freeresult */ + nlm_convert_to_nlm_res(resp, &res4); + + /* above does its own reply */ + return (FALSE); +} + +bool_t +nlm_free_all_3_svc(nlm_notify *argp, void *resp, struct svc_req *sr) +{ + struct nlm4_notify args4; + + args4.name = argp->name; + args4.state = argp->state; + + nlm_do_free_all(&args4, resp, sr); + + return (TRUE); +} + +/* + * Version 4 svc functions + */ + +bool_t +nlm4_test_4_svc(nlm4_testargs *argp, nlm4_testres *resp, struct svc_req *sr) +{ + nlm_do_test(argp, resp, sr, NULL); + return (TRUE); +} + +/* + * Callback functions for nlm4_lock_4_svc + */ +static bool_t nlm4_lock_4_reply(SVCXPRT *, nlm4_res *); +static enum clnt_stat nlm4_granted_4_cb(nlm4_testargs *, void *, CLIENT *); + +bool_t +nlm4_lock_4_svc(nlm4_lockargs *argp, nlm4_res *resp, + struct svc_req *sr) +{ + + /* NLM4_LOCK */ + nlm_do_lock(argp, resp, sr, + nlm4_lock_4_reply, NULL, + nlm4_granted_4_cb); + + /* above does its own reply */ + return (FALSE); +} + +static bool_t +nlm4_lock_4_reply(SVCXPRT *transp, nlm4_res *resp) +{ + return (svc_sendreply(transp, xdr_nlm4_res, (char *)resp)); +} + +static enum clnt_stat +nlm4_granted_4_cb(nlm4_testargs *argp, void *resp, CLIENT *clnt) +{ + nlm4_res res4; + int rv; + + bzero(&res4, sizeof (res4)); + rv = nlm4_granted_4(argp, &res4, clnt); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + return (rv); +} + +bool_t +nlm4_cancel_4_svc(nlm4_cancargs *argp, nlm4_res *resp, struct svc_req *sr) +{ + nlm_do_cancel(argp, resp, sr, NULL); + return (TRUE); +} + +bool_t +nlm4_unlock_4_svc(nlm4_unlockargs *argp, nlm4_res *resp, struct svc_req *sr) +{ + nlm_do_unlock(argp, resp, sr, NULL); + return (TRUE); +} + +bool_t +nlm4_granted_4_svc(nlm4_testargs *argp, nlm4_res *resp, struct svc_req *sr) +{ + nlm_do_granted(argp, resp, sr, NULL); + return (TRUE); +} + +bool_t +nlm4_test_msg_4_svc(nlm4_testargs *argp, void *resp, struct svc_req *sr) +{ + nlm4_testres res4; + + bzero(&res4, sizeof (res4)); + nlm_do_test(argp, &res4, sr, + nlm4_test_res_4); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_testres, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +/* + * Callback functions for nlm4_lock_msg_4_svc + * (using the RPC client stubs directly) + */ + +bool_t +nlm4_lock_msg_4_svc(nlm4_lockargs *argp, void *resp, + struct svc_req *sr) +{ + nlm4_res res4; + + /* NLM4_LOCK_MSG */ + bzero(&res4, sizeof (res4)); + nlm_do_lock(argp, &res4, sr, + NULL, nlm4_lock_res_4, + nlm4_granted_msg_4); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +bool_t +nlm4_cancel_msg_4_svc(nlm4_cancargs *argp, void *resp, struct svc_req *sr) +{ + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + nlm_do_cancel(argp, &res4, sr, + nlm4_cancel_res_4); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +bool_t +nlm4_unlock_msg_4_svc(nlm4_unlockargs *argp, void *resp, struct svc_req *sr) +{ + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + nlm_do_unlock(argp, &res4, sr, + nlm4_unlock_res_4); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +bool_t +nlm4_granted_msg_4_svc(nlm4_testargs *argp, void *resp, struct svc_req *sr) +{ + nlm4_res res4; + + bzero(&res4, sizeof (res4)); + nlm_do_granted(argp, &res4, sr, + nlm4_granted_res_4); + + /* NB: We have a result our caller will not free. */ + xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res4); + (void) resp; + + /* The _msg_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_test_res_4_svc(nlm4_testres *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_lock_res_4_svc(nlm4_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_cancel_res_4_svc(nlm4_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_unlock_res_4_svc(nlm4_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_granted_res_4_svc(nlm4_res *argp, void *resp, struct svc_req *sr) +{ + /* The _res_ calls get no reply. */ + return (FALSE); +} + +/* ARGSUSED */ +bool_t +nlm4_share_4_svc(nlm4_shareargs *argp, nlm4_shareres *resp, + struct svc_req *sr) +{ + nlm_do_share(argp, resp, sr); + return (TRUE); +} + +/* ARGSUSED */ +bool_t +nlm4_unshare_4_svc(nlm4_shareargs *argp, nlm4_shareres *resp, + struct svc_req *sr) +{ + nlm_do_unshare(argp, resp, sr); + return (TRUE); +} + +bool_t +nlm4_nm_lock_4_svc(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr) +{ + + /* + * Don't allow blocking for non-monitored (nm_lock) calls. + * These clients don't handle any callbacks, including + * the granted call we make after a blocking lock. + * Same reply callback as nlm4_lock_4_svc + */ + argp->block = FALSE; + + /* NLM4_NM_LOCK */ + nlm_do_lock(argp, resp, sr, + nlm4_lock_4_reply, NULL, + NULL); /* indicates non-monitored */ + + /* above does its own reply */ + return (FALSE); +} + +bool_t +nlm4_free_all_4_svc(nlm4_notify *argp, void *resp, struct svc_req *sr) +{ + nlm_do_free_all(argp, resp, sr); + return (TRUE); +} diff --git a/usr/src/uts/common/klm/nlm_service.c b/usr/src/uts/common/klm/nlm_service.c new file mode 100644 index 0000000000..6526f244af --- /dev/null +++ b/usr/src/uts/common/klm/nlm_service.c @@ -0,0 +1,1223 @@ +/* + * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ + * Authors: Doug Rabson <dfr@rabson.org> + * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. + */ + +/* + * NFS Lock Manager service functions (nlm_do_...) + * Called from nlm_rpc_svc.c wrappers. + * + * Source code derived from FreeBSD nlm_prot_impl.c + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/thread.h> +#include <sys/fcntl.h> +#include <sys/flock.h> +#include <sys/mount.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/share.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/taskq.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/queue.h> +#include <sys/sdt.h> +#include <netinet/in.h> + +#include <rpc/rpc.h> +#include <rpc/xdr.h> +#include <rpc/pmap_prot.h> +#include <rpc/pmap_clnt.h> +#include <rpc/rpcb_prot.h> + +#include <rpcsvc/nlm_prot.h> +#include <rpcsvc/sm_inter.h> + +#include <nfs/nfs.h> +#include <nfs/nfs_clnt.h> +#include <nfs/export.h> +#include <nfs/rnode.h> + +#include "nlm_impl.h" + +#define NLM_IN_GRACE(g) (ddi_get_lbolt() < (g)->grace_threshold) + +struct nlm_block_cb_data { + struct nlm_host *hostp; + struct nlm_vhold *nvp; + struct flock64 *flp; +}; + +/* + * Invoke an asyncronous RPC callbeck + * (used when NLM server needs to reply to MSG NLM procedure). + */ +#define NLM_INVOKE_CALLBACK(descr, rpcp, resp, callb) \ + do { \ + enum clnt_stat _stat; \ + \ + _stat = (*(callb))(resp, NULL, (rpcp)->nr_handle); \ + if (_stat != RPC_SUCCESS && _stat != RPC_TIMEDOUT) { \ + struct rpc_err _err; \ + \ + CLNT_GETERR((rpcp)->nr_handle, &_err); \ + NLM_ERR("NLM: %s callback failed: " \ + "stat %d, err %d\n", descr, _stat, \ + _err.re_errno); \ + } \ + \ + _NOTE(CONSTCOND) } while (0) + +static void nlm_block( + nlm4_lockargs *lockargs, + struct nlm_host *host, + struct nlm_vhold *nvp, + nlm_rpc_t *rpcp, + struct flock64 *fl, + nlm_testargs_cb grant_cb); + +static vnode_t *nlm_fh_to_vp(struct netobj *); +static struct nlm_vhold *nlm_fh_to_vhold(struct nlm_host *, struct netobj *); +static void nlm_init_shrlock(struct shrlock *, nlm4_share *, struct nlm_host *); +static callb_cpr_t *nlm_block_callback(flk_cb_when_t, void *); +static int nlm_vop_frlock(vnode_t *, int, flock64_t *, int, offset_t, + struct flk_callback *, cred_t *, caller_context_t *); + +/* + * Convert a lock from network to local form, and + * check for valid range (no overflow). + */ +static int +nlm_init_flock(struct flock64 *fl, struct nlm4_lock *nl, + struct nlm_host *host, rpcvers_t vers, short type) +{ + uint64_t off, len; + + bzero(fl, sizeof (*fl)); + off = nl->l_offset; + len = nl->l_len; + + if (vers < NLM4_VERS) { + if (off > MAX_UOFF32 || len > MAX_UOFF32) + return (EINVAL); + if (off + len > MAX_UOFF32 + 1) + return (EINVAL); + } else { + /* + * Check range for 64-bit client (no overflow). + * Again allow len == ~0 to mean lock to EOF. + */ + if (len == MAX_U_OFFSET_T) + len = 0; + if (len != 0 && off + (len - 1) < off) + return (EINVAL); + } + + fl->l_type = type; + fl->l_whence = SEEK_SET; + fl->l_start = off; + fl->l_len = len; + fl->l_sysid = host->nh_sysid; + fl->l_pid = nl->svid; + /* l_pad */ + + return (0); +} + +/* + * Gets vnode from client's filehandle + * NOTE: Holds vnode, it _must_ be explicitly + * released by VN_RELE(). + */ +static vnode_t * +nlm_fh_to_vp(struct netobj *fh) +{ + fhandle_t *fhp; + + /* + * Get a vnode pointer for the given NFS file handle. + * Note that it could be an NFSv2 for NFSv3 handle, + * which means the size might vary. (don't copy) + */ + if (fh->n_len < sizeof (*fhp)) + return (NULL); + + /* We know this is aligned (kmem_alloc) */ + /* LINTED E_BAD_PTR_CAST_ALIGN */ + fhp = (fhandle_t *)fh->n_bytes; + return (lm_fhtovp(fhp)); +} + +/* + * Get vhold from client's filehandle, but in contrast to + * The function tries to check some access rights as well. + * + * NOTE: vhold object _must_ be explicitly released by + * nlm_vhold_release(). + */ +static struct nlm_vhold * +nlm_fh_to_vhold(struct nlm_host *hostp, struct netobj *fh) +{ + vnode_t *vp; + struct nlm_vhold *nvp; + + vp = nlm_fh_to_vp(fh); + if (vp == NULL) + return (NULL); + + + nvp = nlm_vhold_get(hostp, vp); + + /* + * Both nlm_fh_to_vp() and nlm_vhold_get() + * do VN_HOLD(), so we need to drop one + * reference on vnode. + */ + VN_RELE(vp); + return (nvp); +} + +/* ******************************************************************* */ + +/* + * NLM implementation details, called from the RPC svc code. + */ + +/* + * Call-back from NFS statd, used to notify that one of our + * hosts had a status change. The host can be either an + * NFS client, NFS server or both. + * According to NSM protocol description, the state is a + * number that is increases monotonically each time the + * state of host changes. An even number indicates that + * the host is down, while an odd number indicates that + * the host is up. + * + * Here we ignore this even/odd difference of status number + * reported by the NSM, we launch notification handlers + * every time the state is changed. The reason we why do so + * is that client and server can talk to each other using + * connectionless transport and it's easy to lose packet + * containing NSM notification with status number update. + * + * In nlm_host_monitor(), we put the sysid in the private data + * that statd carries in this callback, so we can easliy find + * the host this call applies to. + */ +/* ARGSUSED */ +void +nlm_do_notify1(nlm_sm_status *argp, void *res, struct svc_req *sr) +{ + struct nlm_globals *g; + struct nlm_host *host; + uint16_t sysid; + + g = zone_getspecific(nlm_zone_key, curzone); + bcopy(&argp->priv, &sysid, sizeof (sysid)); + + DTRACE_PROBE2(nsm__notify, uint16_t, sysid, + int, argp->state); + + host = nlm_host_find_by_sysid(g, (sysid_t)sysid); + if (host == NULL) + return; + + nlm_host_notify_server(host, argp->state); + nlm_host_notify_client(host, argp->state); + nlm_host_release(g, host); +} + +/* + * Another available call-back for NFS statd. + * Not currently used. + */ +/* ARGSUSED */ +void +nlm_do_notify2(nlm_sm_status *argp, void *res, struct svc_req *sr) +{ + ASSERT(0); +} + + +/* + * NLM_TEST, NLM_TEST_MSG, + * NLM4_TEST, NLM4_TEST_MSG, + * Client inquiry about locks, non-blocking. + */ +void +nlm_do_test(nlm4_testargs *argp, nlm4_testres *resp, + struct svc_req *sr, nlm_testres_cb cb) +{ + struct nlm_globals *g; + struct nlm_host *host; + struct nlm4_holder *lh; + struct nlm_owner_handle *oh; + nlm_rpc_t *rpcp = NULL; + vnode_t *vp = NULL; + struct netbuf *addr; + char *netid; + char *name; + int error; + struct flock64 fl; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + + name = argp->alock.caller_name; + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, name, netid, addr); + if (host == NULL) { + resp->stat.stat = nlm4_denied_nolocks; + return; + } + if (cb != NULL) { + error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); + if (error != 0) { + resp->stat.stat = nlm4_denied_nolocks; + goto out; + } + } + + vp = nlm_fh_to_vp(&argp->alock.fh); + if (vp == NULL) { + resp->stat.stat = nlm4_stale_fh; + goto out; + } + + if (NLM_IN_GRACE(g)) { + resp->stat.stat = nlm4_denied_grace_period; + goto out; + } + + /* Convert to local form. */ + error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, + (argp->exclusive) ? F_WRLCK : F_RDLCK); + if (error) { + resp->stat.stat = nlm4_failed; + goto out; + } + + /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_GETLK, &fl, F_REMOTE); */ + error = nlm_vop_frlock(vp, F_GETLK, &fl, + F_REMOTELOCK | FREAD | FWRITE, + (u_offset_t)0, NULL, CRED(), NULL); + if (error) { + resp->stat.stat = nlm4_failed; + goto out; + } + + if (fl.l_type == F_UNLCK) { + resp->stat.stat = nlm4_granted; + goto out; + } + resp->stat.stat = nlm4_denied; + + /* + * This lock "test" fails due to a conflicting lock. + * + * If this is a v1 client, make sure the conflicting + * lock range we report can be expressed with 32-bit + * offsets. The lock range requested was expressed + * as 32-bit offset and length, so at least part of + * the conflicting lock should lie below MAX_UOFF32. + * If the conflicting lock extends past that, we'll + * trim the range to end at MAX_UOFF32 so this lock + * can be represented in a 32-bit response. Check + * the start also (paranoid, but a low cost check). + */ + if (sr->rq_vers < NLM4_VERS) { + uint64 maxlen; + if (fl.l_start > MAX_UOFF32) + fl.l_start = MAX_UOFF32; + maxlen = MAX_UOFF32 + 1 - fl.l_start; + if (fl.l_len > maxlen) + fl.l_len = maxlen; + } + + /* + * Build the nlm4_holder result structure. + * + * Note that lh->oh is freed via xdr_free, + * xdr_nlm4_holder, xdr_netobj, xdr_bytes. + */ + oh = kmem_zalloc(sizeof (*oh), KM_SLEEP); + oh->oh_sysid = (sysid_t)fl.l_sysid; + lh = &resp->stat.nlm4_testrply_u.holder; + lh->exclusive = (fl.l_type == F_WRLCK); + lh->svid = fl.l_pid; + lh->oh.n_len = sizeof (*oh); + lh->oh.n_bytes = (void *)oh; + lh->l_offset = fl.l_start; + lh->l_len = fl.l_len; + +out: + /* + * If we have a callback funtion, use that to + * deliver the response via another RPC call. + */ + if (cb != NULL && rpcp != NULL) + NLM_INVOKE_CALLBACK("test", rpcp, resp, cb); + + if (vp != NULL) + VN_RELE(vp); + if (rpcp != NULL) + nlm_host_rele_rpc(host, rpcp); + + nlm_host_release(g, host); +} + +/* + * NLM_LOCK, NLM_LOCK_MSG, NLM_NM_LOCK + * NLM4_LOCK, NLM4_LOCK_MSG, NLM4_NM_LOCK + * + * Client request to set a lock, possibly blocking. + * + * If the lock needs to block, we return status blocked to + * this RPC call, and then later call back the client with + * a "granted" callback. Tricky aspects of this include: + * sending a reply before this function returns, and then + * borrowing this thread from the RPC service pool for the + * wait on the lock and doing the later granted callback. + * + * We also have to keep a list of locks (pending + granted) + * both to handle retransmitted requests, and to keep the + * vnodes for those locks active. + */ +void +nlm_do_lock(nlm4_lockargs *argp, nlm4_res *resp, struct svc_req *sr, + nlm_reply_cb reply_cb, nlm_res_cb res_cb, nlm_testargs_cb grant_cb) +{ + struct nlm_globals *g; + struct flock64 fl; + struct nlm_host *host = NULL; + struct netbuf *addr; + struct nlm_vhold *nvp = NULL; + nlm_rpc_t *rpcp = NULL; + char *netid; + char *name; + int error, flags; + bool_t do_blocking = FALSE; + bool_t do_mon_req = FALSE; + enum nlm4_stats status; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + + name = argp->alock.caller_name; + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, name, netid, addr); + if (host == NULL) { + DTRACE_PROBE4(no__host, struct nlm_globals *, g, + char *, name, char *, netid, struct netbuf *, addr); + status = nlm4_denied_nolocks; + goto doreply; + } + + DTRACE_PROBE3(start, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_lockargs *, argp); + + /* + * If we may need to do _msg_ call needing an RPC + * callback, get the RPC client handle now, + * so we know if we can bind to the NLM service on + * this client. + * + * Note: host object carries transport type. + * One client using multiple transports gets + * separate sysids for each of its transports. + */ + if (res_cb != NULL || (grant_cb != NULL && argp->block == TRUE)) { + error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); + if (error != 0) { + status = nlm4_denied_nolocks; + goto doreply; + } + } + + /* + * During the "grace period", only allow reclaim. + */ + if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { + status = nlm4_denied_grace_period; + goto doreply; + } + + /* + * Check whether we missed host shutdown event + */ + if (nlm_host_get_state(host) != argp->state) + nlm_host_notify_server(host, argp->state); + + /* + * Get a hold on the vnode for a lock operation. + * Only lock() and share() need vhold objects. + */ + nvp = nlm_fh_to_vhold(host, &argp->alock.fh); + if (nvp == NULL) { + status = nlm4_stale_fh; + goto doreply; + } + + /* Convert to local form. */ + error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, + (argp->exclusive) ? F_WRLCK : F_RDLCK); + if (error) { + status = nlm4_failed; + goto doreply; + } + + /* + * Try to lock non-blocking first. If we succeed + * getting the lock, we can reply with the granted + * status directly and avoid the complications of + * making the "granted" RPC callback later. + * + * This also let's us find out now about some + * possible errors like EROFS, etc. + */ + flags = F_REMOTELOCK | FREAD | FWRITE; + error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, flags, + (u_offset_t)0, NULL, CRED(), NULL); + + DTRACE_PROBE3(setlk__res, struct flock64 *, &fl, + int, flags, int, error); + + switch (error) { + case 0: + /* Got it without waiting! */ + status = nlm4_granted; + do_mon_req = TRUE; + break; + + /* EINPROGRESS too? */ + case EAGAIN: + /* We did not get the lock. Should we block? */ + if (argp->block == FALSE || grant_cb == NULL) { + status = nlm4_denied; + break; + } + /* + * Should block. Try to reserve this thread + * so we can use it to wait for the lock and + * later send the granted message. If this + * reservation fails, say "no resources". + */ + if (!svc_reserve_thread(sr->rq_xprt)) { + status = nlm4_denied_nolocks; + break; + } + /* + * OK, can detach this thread, so this call + * will block below (after we reply). + */ + status = nlm4_blocked; + do_blocking = TRUE; + do_mon_req = TRUE; + break; + + case ENOLCK: + /* Failed for lack of resources. */ + status = nlm4_denied_nolocks; + break; + + case EROFS: + /* read-only file system */ + status = nlm4_rofs; + break; + + case EFBIG: + /* file too big */ + status = nlm4_fbig; + break; + + case EDEADLK: + /* dead lock condition */ + status = nlm4_deadlck; + break; + + default: + status = nlm4_denied; + break; + } + +doreply: + resp->stat.stat = status; + + /* + * We get one of two function pointers; one for a + * normal RPC reply, and another for doing an RPC + * "callback" _res reply for a _msg function. + * Use either of those to send the reply now. + * + * If sending this reply fails, just leave the + * lock in the list for retransmitted requests. + * Cleanup is via unlock or host rele (statmon). + */ + if (reply_cb != NULL) { + /* i.e. nlm_lock_1_reply */ + if (!(*reply_cb)(sr->rq_xprt, resp)) + svcerr_systemerr(sr->rq_xprt); + } + if (res_cb != NULL && rpcp != NULL) + NLM_INVOKE_CALLBACK("lock", rpcp, resp, res_cb); + + /* + * The reply has been sent to the client. + * Start monitoring this client (maybe). + * + * Note that the non-monitored (NM) calls pass grant_cb=NULL + * indicating that the client doesn't support RPC callbacks. + * No monitoring for these (lame) clients. + */ + if (do_mon_req && grant_cb != NULL) + nlm_host_monitor(g, host, argp->state); + + if (do_blocking) { + /* + * We need to block on this lock, and when that + * completes, do the granted RPC call. Note that + * we "reserved" this thread above, so we can now + * "detach" it from the RPC SVC pool, allowing it + * to block indefinitely if needed. + */ + ASSERT(rpcp != NULL); + (void) svc_detach_thread(sr->rq_xprt); + nlm_block(argp, host, nvp, rpcp, &fl, grant_cb); + } + + DTRACE_PROBE3(lock__end, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_res *, resp); + + if (rpcp != NULL) + nlm_host_rele_rpc(host, rpcp); + + nlm_vhold_release(host, nvp); + nlm_host_release(g, host); +} + +/* + * Helper for nlm_do_lock(), partly for observability, + * (we'll see a call blocked in this function) and + * because nlm_do_lock() was getting quite long. + */ +static void +nlm_block(nlm4_lockargs *lockargs, + struct nlm_host *host, + struct nlm_vhold *nvp, + nlm_rpc_t *rpcp, + struct flock64 *flp, + nlm_testargs_cb grant_cb) +{ + nlm4_testargs args; + int error; + flk_callback_t flk_cb; + struct nlm_block_cb_data cb_data; + + /* + * Keep a list of blocked locks on nh_pending, and use it + * to cancel these threads in nlm_destroy_client_pending. + * + * Check to see if this lock is already in the list + * and if not, add an entry for it. Allocate first, + * then if we don't insert, free the new one. + * Caller already has vp held. + */ + + error = nlm_slreq_register(host, nvp, flp); + if (error != 0) { + /* + * Sleeping lock request with given fl is already + * registered by someone else. This means that + * some other thread is handling the request, let + * him to do its work. + */ + ASSERT(error == EEXIST); + return; + } + + cb_data.hostp = host; + cb_data.nvp = nvp; + cb_data.flp = flp; + flk_init_callback(&flk_cb, nlm_block_callback, &cb_data); + + /* BSD: VOP_ADVLOCK(vp, NULL, F_SETLK, fl, F_REMOTE); */ + error = nlm_vop_frlock(nvp->nv_vp, F_SETLKW, flp, + F_REMOTELOCK | FREAD | FWRITE, + (u_offset_t)0, &flk_cb, CRED(), NULL); + + if (error != 0) { + /* + * We failed getting the lock, but have no way to + * tell the client about that. Let 'em time out. + */ + (void) nlm_slreq_unregister(host, nvp, flp); + return; + } + + /* + * Do the "granted" call-back to the client. + */ + args.cookie = lockargs->cookie; + args.exclusive = lockargs->exclusive; + args.alock = lockargs->alock; + + NLM_INVOKE_CALLBACK("grant", rpcp, &args, grant_cb); +} + +/* + * The function that is used as flk callback when NLM server + * sets new sleeping lock. The function unregisters NLM + * sleeping lock request (nlm_slreq) associated with the + * sleeping lock _before_ lock becomes active. It prevents + * potential race condition between nlm_block() and + * nlm_do_cancel(). + */ +static callb_cpr_t * +nlm_block_callback(flk_cb_when_t when, void *data) +{ + struct nlm_block_cb_data *cb_data; + + cb_data = (struct nlm_block_cb_data *)data; + if (when == FLK_AFTER_SLEEP) { + (void) nlm_slreq_unregister(cb_data->hostp, + cb_data->nvp, cb_data->flp); + } + + return (0); +} + +/* + * NLM_CANCEL, NLM_CANCEL_MSG, + * NLM4_CANCEL, NLM4_CANCEL_MSG, + * Client gives up waiting for a blocking lock. + */ +void +nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *resp, + struct svc_req *sr, nlm_res_cb cb) +{ + struct nlm_globals *g; + struct nlm_host *host; + struct netbuf *addr; + struct nlm_vhold *nvp = NULL; + nlm_rpc_t *rpcp = NULL; + char *netid; + char *name; + int error; + struct flock64 fl; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + name = argp->alock.caller_name; + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, name, netid, addr); + if (host == NULL) { + resp->stat.stat = nlm4_denied_nolocks; + return; + } + if (cb != NULL) { + error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); + if (error != 0) { + resp->stat.stat = nlm4_denied_nolocks; + return; + } + } + + DTRACE_PROBE3(start, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_cancargs *, argp); + + if (NLM_IN_GRACE(g)) { + resp->stat.stat = nlm4_denied_grace_period; + goto out; + } + + nvp = nlm_fh_to_vhold(host, &argp->alock.fh); + if (nvp == NULL) { + resp->stat.stat = nlm4_stale_fh; + goto out; + } + + /* Convert to local form. */ + error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, + (argp->exclusive) ? F_WRLCK : F_RDLCK); + if (error) { + resp->stat.stat = nlm4_failed; + goto out; + } + + error = nlm_slreq_unregister(host, nvp, &fl); + if (error != 0) { + /* + * There's no sleeping lock request corresponding + * to the lock. Then requested sleeping lock + * doesn't exist. + */ + resp->stat.stat = nlm4_denied; + goto out; + } + + fl.l_type = F_UNLCK; + error = nlm_vop_frlock(nvp->nv_vp, F_SETLK, &fl, + F_REMOTELOCK | FREAD | FWRITE, + (u_offset_t)0, NULL, CRED(), NULL); + + resp->stat.stat = (error == 0) ? + nlm4_granted : nlm4_denied; + +out: + /* + * If we have a callback funtion, use that to + * deliver the response via another RPC call. + */ + if (cb != NULL && rpcp != NULL) + NLM_INVOKE_CALLBACK("cancel", rpcp, resp, cb); + + DTRACE_PROBE3(cancel__end, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_res *, resp); + + if (rpcp != NULL) + nlm_host_rele_rpc(host, rpcp); + + nlm_vhold_release(host, nvp); + nlm_host_release(g, host); +} + +/* + * NLM_UNLOCK, NLM_UNLOCK_MSG, + * NLM4_UNLOCK, NLM4_UNLOCK_MSG, + * Client removes one of their locks. + */ +void +nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *resp, + struct svc_req *sr, nlm_res_cb cb) +{ + struct nlm_globals *g; + struct nlm_host *host; + struct netbuf *addr; + nlm_rpc_t *rpcp = NULL; + vnode_t *vp = NULL; + char *netid; + char *name; + int error; + struct flock64 fl; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + name = argp->alock.caller_name; + + /* + * NLM_UNLOCK operation doesn't have an error code + * denoting that operation failed, so we always + * return nlm4_granted except when the server is + * in a grace period. + */ + resp->stat.stat = nlm4_granted; + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, name, netid, addr); + if (host == NULL) + return; + + if (cb != NULL) { + error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); + if (error != 0) + goto out; + } + + DTRACE_PROBE3(start, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_unlockargs *, argp); + + if (NLM_IN_GRACE(g)) { + resp->stat.stat = nlm4_denied_grace_period; + goto out; + } + + vp = nlm_fh_to_vp(&argp->alock.fh); + if (vp == NULL) + goto out; + + /* Convert to local form. */ + error = nlm_init_flock(&fl, &argp->alock, host, sr->rq_vers, F_UNLCK); + if (error) + goto out; + + /* BSD: VOP_ADVLOCK(nv->nv_vp, NULL, F_UNLCK, &fl, F_REMOTE); */ + error = nlm_vop_frlock(vp, F_SETLK, &fl, + F_REMOTELOCK | FREAD | FWRITE, + (u_offset_t)0, NULL, CRED(), NULL); + + DTRACE_PROBE1(unlock__res, int, error); +out: + /* + * If we have a callback funtion, use that to + * deliver the response via another RPC call. + */ + if (cb != NULL && rpcp != NULL) + NLM_INVOKE_CALLBACK("unlock", rpcp, resp, cb); + + DTRACE_PROBE3(unlock__end, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_res *, resp); + + if (vp != NULL) + VN_RELE(vp); + if (rpcp != NULL) + nlm_host_rele_rpc(host, rpcp); + + nlm_host_release(g, host); +} + +/* + * NLM_GRANTED, NLM_GRANTED_MSG, + * NLM4_GRANTED, NLM4_GRANTED_MSG, + * + * This service routine is special. It's the only one that's + * really part of our NLM _client_ support, used by _servers_ + * to "call back" when a blocking lock from this NLM client + * is granted by the server. In this case, we _know_ there is + * already an nlm_host allocated and held by the client code. + * We want to find that nlm_host here. + * + * Over in nlm_call_lock(), the client encoded the sysid for this + * server in the "owner handle" netbuf sent with our lock request. + * We can now use that to find the nlm_host object we used there. + * (NB: The owner handle is opaque to the server.) + */ +void +nlm_do_granted(nlm4_testargs *argp, nlm4_res *resp, + struct svc_req *sr, nlm_res_cb cb) +{ + struct nlm_globals *g; + struct nlm_owner_handle *oh; + struct nlm_host *host; + nlm_rpc_t *rpcp = NULL; + int error; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + resp->stat.stat = nlm4_denied; + + g = zone_getspecific(nlm_zone_key, curzone); + oh = (void *) argp->alock.oh.n_bytes; + if (oh == NULL) + return; + + host = nlm_host_find_by_sysid(g, oh->oh_sysid); + if (host == NULL) + return; + + if (cb != NULL) { + error = nlm_host_get_rpc(host, sr->rq_vers, &rpcp); + if (error != 0) + goto out; + } + + if (NLM_IN_GRACE(g)) { + resp->stat.stat = nlm4_denied_grace_period; + goto out; + } + + error = nlm_slock_grant(g, host, &argp->alock); + if (error == 0) + resp->stat.stat = nlm4_granted; + +out: + /* + * If we have a callback funtion, use that to + * deliver the response via another RPC call. + */ + if (cb != NULL && rpcp != NULL) + NLM_INVOKE_CALLBACK("do_granted", rpcp, resp, cb); + + if (rpcp != NULL) + nlm_host_rele_rpc(host, rpcp); + + nlm_host_release(g, host); +} + +/* + * NLM_FREE_ALL, NLM4_FREE_ALL + * + * Destroy all lock state for the calling client. + */ +void +nlm_do_free_all(nlm4_notify *argp, void *res, struct svc_req *sr) +{ + struct nlm_globals *g; + struct nlm_host_list host_list; + struct nlm_host *hostp; + + TAILQ_INIT(&host_list); + g = zone_getspecific(nlm_zone_key, curzone); + + /* Serialize calls to clean locks. */ + mutex_enter(&g->clean_lock); + + /* + * Find all hosts that have the given node name and put them on a + * local list. + */ + mutex_enter(&g->lock); + for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL; + hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) { + if (strcasecmp(hostp->nh_name, argp->name) == 0) { + /* + * If needed take the host out of the idle list since + * we are taking a reference. + */ + if (hostp->nh_flags & NLM_NH_INIDLE) { + TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, + nh_link); + hostp->nh_flags &= ~NLM_NH_INIDLE; + } + hostp->nh_refs++; + + TAILQ_INSERT_TAIL(&host_list, hostp, nh_link); + } + } + mutex_exit(&g->lock); + + /* Free locks for all hosts on the local list. */ + while (!TAILQ_EMPTY(&host_list)) { + hostp = TAILQ_FIRST(&host_list); + TAILQ_REMOVE(&host_list, hostp, nh_link); + + /* + * Note that this does not do client-side cleanup. + * We want to do that ONLY if statd tells us the + * server has restarted. + */ + nlm_host_notify_server(hostp, argp->state); + nlm_host_release(g, hostp); + } + + mutex_exit(&g->clean_lock); + + (void) res; + (void) sr; +} + +static void +nlm_init_shrlock(struct shrlock *shr, + nlm4_share *nshare, struct nlm_host *host) +{ + + switch (nshare->access) { + default: + case fsa_NONE: + shr->s_access = 0; + break; + case fsa_R: + shr->s_access = F_RDACC; + break; + case fsa_W: + shr->s_access = F_WRACC; + break; + case fsa_RW: + shr->s_access = F_RWACC; + break; + } + + switch (nshare->mode) { + default: + case fsm_DN: + shr->s_deny = F_NODNY; + break; + case fsm_DR: + shr->s_deny = F_RDDNY; + break; + case fsm_DW: + shr->s_deny = F_WRDNY; + break; + case fsm_DRW: + shr->s_deny = F_RWDNY; + break; + } + + shr->s_sysid = host->nh_sysid; + shr->s_pid = 0; + shr->s_own_len = nshare->oh.n_len; + shr->s_owner = nshare->oh.n_bytes; +} + +/* + * NLM_SHARE, NLM4_SHARE + * + * Request a DOS-style share reservation + */ +void +nlm_do_share(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) +{ + struct nlm_globals *g; + struct nlm_host *host; + struct netbuf *addr; + struct nlm_vhold *nvp = NULL; + char *netid; + char *name; + int error; + struct shrlock shr; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + + name = argp->share.caller_name; + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_findcreate(g, name, netid, addr); + if (host == NULL) { + resp->stat = nlm4_denied_nolocks; + return; + } + + DTRACE_PROBE3(share__start, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_shareargs *, argp); + + if (argp->reclaim == 0 && NLM_IN_GRACE(g)) { + resp->stat = nlm4_denied_grace_period; + goto out; + } + + /* + * Get holded vnode when on lock operation. + * Only lock() and share() need vhold objects. + */ + nvp = nlm_fh_to_vhold(host, &argp->share.fh); + if (nvp == NULL) { + resp->stat = nlm4_stale_fh; + goto out; + } + + /* Convert to local form. */ + nlm_init_shrlock(&shr, &argp->share, host); + error = VOP_SHRLOCK(nvp->nv_vp, F_SHARE, &shr, + FREAD | FWRITE, CRED(), NULL); + + if (error == 0) { + resp->stat = nlm4_granted; + nlm_host_monitor(g, host, 0); + } else { + resp->stat = nlm4_denied; + } + +out: + DTRACE_PROBE3(share__end, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_shareres *, resp); + + nlm_vhold_release(host, nvp); + nlm_host_release(g, host); +} + +/* + * NLM_UNSHARE, NLM4_UNSHARE + * + * Release a DOS-style share reservation + */ +void +nlm_do_unshare(nlm4_shareargs *argp, nlm4_shareres *resp, struct svc_req *sr) +{ + struct nlm_globals *g; + struct nlm_host *host; + struct netbuf *addr; + vnode_t *vp = NULL; + char *netid; + int error; + struct shrlock shr; + + nlm_copy_netobj(&resp->cookie, &argp->cookie); + + netid = svc_getnetid(sr->rq_xprt); + addr = svc_getrpccaller(sr->rq_xprt); + + g = zone_getspecific(nlm_zone_key, curzone); + host = nlm_host_find(g, netid, addr); + if (host == NULL) { + resp->stat = nlm4_denied_nolocks; + return; + } + + DTRACE_PROBE3(unshare__start, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_shareargs *, argp); + + if (NLM_IN_GRACE(g)) { + resp->stat = nlm4_denied_grace_period; + goto out; + } + + vp = nlm_fh_to_vp(&argp->share.fh); + if (vp == NULL) { + resp->stat = nlm4_stale_fh; + goto out; + } + + /* Convert to local form. */ + nlm_init_shrlock(&shr, &argp->share, host); + error = VOP_SHRLOCK(vp, F_UNSHARE, &shr, + FREAD | FWRITE, CRED(), NULL); + + (void) error; + resp->stat = nlm4_granted; + +out: + DTRACE_PROBE3(unshare__end, struct nlm_globals *, g, + struct nlm_host *, host, nlm4_shareres *, resp); + + if (vp != NULL) + VN_RELE(vp); + + nlm_host_release(g, host); +} + +/* + * NLM wrapper to VOP_FRLOCK that checks the validity of the lock before + * invoking the vnode operation. + */ +static int +nlm_vop_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, + struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct) +{ + if (bfp->l_len != 0 && bfp->l_start + (bfp->l_len - 1) < bfp->l_start) { + return (EOVERFLOW); + } + + return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); +} diff --git a/usr/src/uts/common/klm/nsm_addr_clnt.sed b/usr/src/uts/common/klm/nsm_addr_clnt.sed new file mode 100644 index 0000000000..e04cee6cfb --- /dev/null +++ b/usr/src/uts/common/klm/nsm_addr_clnt.sed @@ -0,0 +1,29 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# +# +# Copyright (c) 2012 by Delphix. All rights reserved. +# + +# +# This sed script is run on the client code generated by rpcgen +# from nsm_addr.x before it is compiled. +# + +6{ +i\ +#include <sys/param.h> +i\ +#include <sys/systm.h> +i\ +#include <rpcsvc/nsm_addr.h> +} +/^.include/,/^.endif/d + diff --git a/usr/src/uts/common/klm/sm_inter_clnt.sed b/usr/src/uts/common/klm/sm_inter_clnt.sed new file mode 100644 index 0000000000..6954efe1a6 --- /dev/null +++ b/usr/src/uts/common/klm/sm_inter_clnt.sed @@ -0,0 +1,28 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy is of the CDDL is also available via the Internet +# at http://www.illumos.org/license/CDDL. +# +# +# Copyright 2011 Nexenta Systems, Inc. All rights reserved. +# + +# +# This sed script is run on the client code generated by rpcgen +# from sm_inter.x before it is compiled. +# + +6{ +i\ +#include <sys/param.h> +i\ +#include <sys/systm.h> +i\ +#include <rpcsvc/sm_inter.h> +} +/^.include/,/^.endif/d diff --git a/usr/src/uts/common/nfs/lm.h b/usr/src/uts/common/nfs/lm.h index 1556b03494..fa467faa4b 100644 --- a/usr/src/uts/common/nfs/lm.h +++ b/usr/src/uts/common/nfs/lm.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + #ifndef _NFS_LM_H #define _NFS_LM_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Interface definitions for the NFSv2/v3 lock manager. */ @@ -42,7 +44,6 @@ extern "C" { #include <sys/types.h> #include <sys/vnode.h> #include <rpc/rpc.h> -#include <nfs/export.h> #ifdef _KERNEL @@ -50,6 +51,8 @@ extern "C" { * Common interfaces. */ +struct exportinfo; + /* * The numeric sysid is used to identify a host and transport. * @@ -154,7 +157,7 @@ extern int lm_vp_active(const struct vnode *); extern sysid_t lm_alloc_sysidt(void); extern void lm_free_sysidt(sysid_t); -#else /* _KERNEL */ +#endif /* _KERNEL */ #ifdef __STDC__ extern int lm_shutdown(void); @@ -162,8 +165,6 @@ extern int lm_shutdown(void); extern int lm_shutdown(); #endif /* __STDC__ */ -#endif /* _KERNEL */ - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/os/flock.c b/usr/src/uts/common/os/flock.c index 4cb6ea8d82..5dad4abb61 100644 --- a/usr/src/uts/common/os/flock.c +++ b/usr/src/uts/common/os/flock.c @@ -27,6 +27,10 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + #include <sys/flock_impl.h> #include <sys/vfs.h> #include <sys/t_lock.h> /* for <sys/callb.h> */ @@ -260,8 +264,8 @@ reclock(vnode_t *vp, * Check access permissions */ if ((cmd & SETFLCK) && - ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || - (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) + ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || + (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) return (EBADF); /* @@ -269,10 +273,10 @@ reclock(vnode_t *vp, */ if ((lckdat->l_type == F_UNLCK) || - !((cmd & INOFLCK) || (cmd & SETFLCK))) { + !((cmd & INOFLCK) || (cmd & SETFLCK))) { lock_request = &stack_lock_request; (void) bzero((caddr_t)lock_request, - sizeof (lock_descriptor_t)); + sizeof (lock_descriptor_t)); /* * following is added to make the assertions in @@ -302,17 +306,17 @@ reclock(vnode_t *vp, ASSERT(lckdat->l_whence == 0); lock_request->l_start = lckdat->l_start; lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : - lckdat->l_start + (lckdat->l_len - 1); + lckdat->l_start + (lckdat->l_len - 1); } else { /* check the validity of the lock range */ error = flk_convert_lock_data(vp, lckdat, - &lock_request->l_start, &lock_request->l_end, - offset); + &lock_request->l_start, &lock_request->l_end, + offset); if (error) { goto done; } error = flk_check_lock_data(lock_request->l_start, - lock_request->l_end, MAXEND); + lock_request->l_end, MAXEND); if (error) { goto done; } @@ -342,7 +346,7 @@ reclock(vnode_t *vp, } if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { if (lock_request->l_type == F_RDLCK || - lock_request->l_type == F_WRLCK) + lock_request->l_type == F_WRLCK) lock_request->l_state |= QUERY_LOCK; } lock_request->l_flock = (*lckdat); @@ -378,10 +382,10 @@ reclock(vnode_t *vp, * to the registry. */ if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, - nlmid)) { + nlmid)) { FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, - nlmid)) { + nlmid)) { /* * If the NLM server is already known (has made * previous lock requests) and its state is @@ -407,8 +411,8 @@ reclock(vnode_t *vp, if (IS_IO_LOCK(lock_request)) { VOP_RWUNLOCK(vp, - (lock_request->l_type == F_RDLCK) ? - V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); + (lock_request->l_type == F_RDLCK) ? + V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); } mutex_enter(&gp->gp_mutex); @@ -481,8 +485,8 @@ reclock(vnode_t *vp, if (IS_IO_LOCK(lock_request)) { (void) VOP_RWLOCK(vp, - (lock_request->l_type == F_RDLCK) ? - V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); + (lock_request->l_type == F_RDLCK) ? + V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); if (!error) { lckdat->l_type = F_UNLCK; @@ -618,7 +622,7 @@ flk_init(void) uint_t i; flk_edge_cache = kmem_cache_create("flk_edges", - sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); + sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); if (flk_edge_cache == NULL) { cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); } @@ -641,8 +645,8 @@ flk_init(void) if (nlm_status_size != 0) { /* booted as a cluster */ nlm_reg_status = (flk_nlm_status_t *) - kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, - KM_SLEEP); + kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, + KM_SLEEP); /* initialize all NLM states in array to NLM_UNKNOWN */ for (i = 0; i < nlm_status_size; i++) { @@ -807,8 +811,8 @@ flk_process_request(lock_descriptor_t *request) */ if (SAME_OWNER(lock, request) && - COVERS(lock, request) && - (request->l_type == F_RDLCK)) + COVERS(lock, request) && + (request->l_type == F_RDLCK)) return (flk_execute_request(request)); lock = lock->l_next; } while (lock->l_vnode == vp); @@ -913,7 +917,7 @@ block: if (!request_will_wait) return (EAGAIN); if (COVERS(lock, request) && - lock->l_type == F_WRLCK) { + lock->l_type == F_WRLCK) { if (found_covering_lock && !SAME_OWNER(lock, covered_by)) { found_covering_lock++; @@ -923,12 +927,12 @@ block: covered_by = lock; } if (found_covering_lock && - !SAME_OWNER(lock, covered_by)) { + !SAME_OWNER(lock, covered_by)) { lock = lock->l_next; continue; } if ((error = flk_add_edge(request, lock, - !found_covering_lock, 0))) + !found_covering_lock, 0))) return (error); } lock = lock->l_next; @@ -949,12 +953,12 @@ block: do { if (BLOCKS(lock, request)) { if (found_covering_lock && - !SAME_OWNER(lock, covered_by)) { + !SAME_OWNER(lock, covered_by)) { lock = lock->l_next; continue; } if ((error = flk_add_edge(request, lock, - CHECK_CYCLE, 0))) + CHECK_CYCLE, 0))) return (error); } lock = lock->l_next; @@ -1106,7 +1110,7 @@ flk_wait_execute_request(lock_descriptor_t *request) mutex_exit(&gp->gp_mutex); cprp = flk_invoke_callbacks(request->l_callbacks, - FLK_BEFORE_SLEEP); + FLK_BEFORE_SLEEP); mutex_enter(&gp->gp_mutex); @@ -1124,7 +1128,7 @@ flk_wait_execute_request(lock_descriptor_t *request) mutex_exit(&gp->gp_mutex); (void) flk_invoke_callbacks(request->l_callbacks, - FLK_AFTER_SLEEP); + FLK_AFTER_SLEEP); mutex_enter(&gp->gp_mutex); } else { wait_for_lock(request); @@ -1136,7 +1140,7 @@ flk_wait_execute_request(lock_descriptor_t *request) * error that will encourage the client to retransmit. */ if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && - !IS_GRANTED(request)) { + !IS_GRANTED(request)) { flk_cancel_sleeping_lock(request, 1); return (ENOLCK); } @@ -1234,8 +1238,8 @@ flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, STACK_POP(vertex_stack, l_stack); for (ep = FIRST_ADJ(vertex); - ep != HEAD(vertex); - ep = NEXT_ADJ(ep)) { + ep != HEAD(vertex); + ep = NEXT_ADJ(ep)) { if (COLORED(ep->to_vertex)) continue; COLOR(ep->to_vertex); @@ -1324,17 +1328,17 @@ flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) if (request->l_type == F_UNLCK) lock_effect = FLK_UNLOCK; else if (request->l_type == F_RDLCK && - lock->l_type == F_WRLCK) + lock->l_type == F_WRLCK) lock_effect = FLK_DOWNGRADE; else if (request->l_type == F_WRLCK && - lock->l_type == F_RDLCK) + lock->l_type == F_RDLCK) lock_effect = FLK_UPGRADE; else lock_effect = FLK_STAY_SAME; if (lock->l_end < request->l_start) { if (lock->l_end == request->l_start - 1 && - lock_effect == FLK_STAY_SAME) { + lock_effect == FLK_STAY_SAME) { topology[0] = request; request->l_start = lock->l_start; nvertex = 1; @@ -1346,7 +1350,7 @@ flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) if (lock->l_start > request->l_end) { if (request->l_end == lock->l_start - 1 && - lock_effect == FLK_STAY_SAME) { + lock_effect == FLK_STAY_SAME) { topology[0] = request; request->l_end = lock->l_end; nvertex = 1; @@ -1544,7 +1548,7 @@ flk_insert_active_lock(lock_descriptor_t *new_lock) if (first_lock != NULL) { for (; (lock->l_vnode == vp && - lock->l_start < new_lock->l_start); lock = lock->l_next) + lock->l_start < new_lock->l_start); lock = lock->l_next) ; } else { lock = ACTIVE_HEAD(gp); @@ -1587,8 +1591,8 @@ flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) if (vp->v_filocks == (struct filock *)lock) { vp->v_filocks = (struct filock *) - ((lock->l_next->l_vnode == vp) ? lock->l_next : - NULL); + ((lock->l_next->l_vnode == vp) ? lock->l_next : + NULL); } lock->l_next->l_prev = lock->l_prev; lock->l_prev->l_next = lock->l_next; @@ -1617,7 +1621,7 @@ flk_insert_sleeping_lock(lock_descriptor_t *request) ASSERT(IS_INITIAL(request)); for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && - lock->l_vnode < vp); lock = lock->l_next) + lock->l_vnode < vp); lock = lock->l_next) ; lock->l_prev->l_next = request; @@ -1660,7 +1664,7 @@ flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) while ((vertex = STACK_TOP(vertex_stack)) != NULL) { STACK_POP(vertex_stack, l_stack); for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); - ep = NEXT_ADJ(ep)) { + ep = NEXT_ADJ(ep)) { if (IS_RECOMPUTE(ep->to_vertex)) continue; ep->to_vertex->l_state |= RECOMPUTE_LOCK; @@ -1675,7 +1679,7 @@ flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) if (nvertex) { topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), - KM_SLEEP); + KM_SLEEP); } /* @@ -1754,7 +1758,7 @@ flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) */ if (nvertex) kmem_free((void *)topology, - (nvertex * sizeof (lock_descriptor_t *))); + (nvertex * sizeof (lock_descriptor_t *))); /* * Possibility of some locks unblocked now */ @@ -1785,11 +1789,11 @@ flk_graph_uncolor(graph_t *gp) if (gp->mark == UINT_MAX) { gp->mark = 1; for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); - lock = lock->l_next) + lock = lock->l_next) lock->l_color = 0; for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); - lock = lock->l_next) + lock = lock->l_next) lock->l_color = 0; } else { gp->mark++; @@ -1920,7 +1924,7 @@ flk_recompute_dependencies(lock_descriptor_t *request, next_in_edge: if (count == nvertex || - vertex->l_sedge == HEAD(vertex)) { + vertex->l_sedge == HEAD(vertex)) { /* prune the tree below this */ STACK_POP(vertex_stack, l_stack); vertex->l_state &= ~RECOMPUTE_DONE; @@ -1966,7 +1970,7 @@ flk_color_reachables(lock_descriptor_t *vertex) STACK_POP(vertex_stack, l_stack1); for (ep = FIRST_ADJ(ver); ep != HEAD(ver); - ep = NEXT_ADJ(ep)) { + ep = NEXT_ADJ(ep)) { lock = ep->to_vertex; if (COLORED(lock)) continue; @@ -1999,7 +2003,7 @@ flk_update_barriers(lock_descriptor_t *lock) while ((vertex = STACK_TOP(vertex_stack)) != NULL) { STACK_POP(vertex_stack, l_stack1); for (ep = FIRST_IN(vertex); ep != HEAD(vertex); - ep = NEXT_IN(ep)) { + ep = NEXT_IN(ep)) { lck = ep->from_vertex; if (COLORED(lck)) { if (IS_BARRIER(lck)) { @@ -2044,7 +2048,7 @@ flk_find_barriers(lock_descriptor_t *lock) while ((vertex = STACK_TOP(vertex_stack)) != NULL) { STACK_POP(vertex_stack, l_stack1); for (ep = FIRST_IN(vertex); ep != HEAD(vertex); - ep = NEXT_IN(ep)) { + ep = NEXT_IN(ep)) { lck = ep->from_vertex; if (COLORED(lck)) { /* this is a barrier */ @@ -2290,6 +2294,57 @@ done: } /* + * Determine whether there are any locks for the given vnode with a remote + * sysid matching given sysid. + * Used by the new (open source) NFS Lock Manager (NLM) + */ +int +flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid) +{ + lock_descriptor_t *lock; + int result = 0; + graph_t *gp; + + if (sysid == 0) + return (0); + + gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); + if (gp == NULL) { + return (0); + } + + mutex_enter(&gp->gp_mutex); + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + if (lock->l_flock.l_sysid == sysid) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + if (lock->l_flock.l_sysid == sysid) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + +done: + mutex_exit(&gp->gp_mutex); + return (result); +} + +/* * Determine if there are any locks owned by the given sysid. * Returns zero if not, non-zero if there are. Note that this return code * could be derived from flk_get_{sleeping,active}_locks, but this routine @@ -2328,8 +2383,8 @@ flk_sysid_has_locks(int sysid, int lck_type) if (lck_type & FLK_QUERY_SLEEPING) { for (lock = SLEEPING_HEAD(gp)->l_next; - lock != SLEEPING_HEAD(gp) && !has_locks; - lock = lock->l_next) { + lock != SLEEPING_HEAD(gp) && !has_locks; + lock = lock->l_next) { if (lock->l_flock.l_sysid == sysid) has_locks = 1; } @@ -2534,8 +2589,8 @@ flk_canceled(lock_descriptor_t *request) while (lock->l_vnode == vp) { nlock = lock->l_next; if (SAME_OWNER(lock, request) && - lock->l_start == request->l_start && - lock->l_end == request->l_end) { + lock->l_start == request->l_start && + lock->l_end == request->l_end) { INTERRUPT_WAKEUP(lock); return (1); } @@ -2573,8 +2628,8 @@ cleanlocks(vnode_t *vp, pid_t pid, int sysid) do { nlock = lock->l_next; if ((lock->l_flock.l_pid == pid || - pid == IGN_PID) && - lock->l_flock.l_sysid == sysid) { + pid == IGN_PID) && + lock->l_flock.l_sysid == sysid) { CANCEL_WAKEUP(lock); } lock = nlock; @@ -2587,8 +2642,8 @@ cleanlocks(vnode_t *vp, pid_t pid, int sysid) do { nlock = lock->l_next; if ((lock->l_flock.l_pid == pid || - pid == IGN_PID) && - lock->l_flock.l_sysid == sysid) { + pid == IGN_PID) && + lock->l_flock.l_sysid == sysid) { flk_delete_active_lock(lock, 0); STACK_PUSH(link_stack, lock, l_stack); } @@ -2817,7 +2872,7 @@ deadlock: flk_free_edge(ep); ppep = start_vertex->edge; for (pep = start_vertex->edge; pep != NULL; ppep = pep, - pep = ppep->next) { + pep = ppep->next) { if (pep->to_proc == adj_proc) { pep->refcount--; if (pep->refcount == 0) { @@ -2845,7 +2900,7 @@ deadlock: flk_free_edge(ep); ppep = in_proc->edge; for (pep = in_proc->edge; pep != NULL; ppep = pep, - pep = ppep->next) { + pep = ppep->next) { if (pep->to_proc == start_vertex) { pep->refcount--; if (pep->refcount == 0) { @@ -2911,14 +2966,14 @@ flk_get_proc_vertex(lock_descriptor_t *lock) } } palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * - sizeof (proc_vertex_t *), KM_SLEEP); + sizeof (proc_vertex_t *), KM_SLEEP); if (pgraph.proc) { bcopy(pgraph.proc, palloc, - pgraph.gcount * sizeof (proc_vertex_t *)); + pgraph.gcount * sizeof (proc_vertex_t *)); kmem_free(pgraph.proc, - pgraph.gcount * sizeof (proc_vertex_t *)); + pgraph.gcount * sizeof (proc_vertex_t *)); } pgraph.proc = palloc; pgraph.free += (PROC_CHUNK - 1); @@ -3122,14 +3177,14 @@ cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) * add it to the registry in the nlm shutting down state. */ FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, - FLK_NLM_SHUTTING_DOWN); + FLK_NLM_SHUTTING_DOWN); } else { /* * Change the state of the NLM server identified by "nlmid" * in the NLM registry to the argument "nlm_state." */ FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, - nlm_state); + nlm_state); } /* @@ -3314,7 +3369,7 @@ get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, mutex_enter(&gp->gp_mutex); graph_head = (list_type == FLK_ACTIVE_STATE) ? - ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); + ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); for (lock = graph_head->l_next; lock != graph_head; lock = lock->l_next) { @@ -3362,14 +3417,14 @@ locklist_t * flk_get_sleeping_locks(int sysid, pid_t pid) { return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, - ALL_ZONES)); + ALL_ZONES)); } locklist_t * flk_get_active_locks(int sysid, pid_t pid) { return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, - ALL_ZONES)); + ALL_ZONES)); } /* @@ -3386,7 +3441,7 @@ locklist_t * flk_active_locks_for_vp(const vnode_t *vp) { return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, - ALL_ZONES)); + ALL_ZONES)); } /* @@ -3400,7 +3455,7 @@ locklist_t * flk_active_nbmand_locks_for_vp(const vnode_t *vp) { return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, - NOPID, vp, ALL_ZONES)); + NOPID, vp, ALL_ZONES)); } /* @@ -3417,7 +3472,7 @@ locklist_t * flk_active_nbmand_locks(pid_t pid) { return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, - pid, NULL, ALL_ZONES)); + pid, NULL, ALL_ZONES)); } /* @@ -3546,10 +3601,10 @@ cl_flk_wakeup_sleeping_nlm_locks(int nlmid) if (IS_LOCKMGR(lock)) { /* get NLM id */ lock_nlmid = - GETNLMID(lock->l_flock.l_sysid); + GETNLMID(lock->l_flock.l_sysid); if (nlmid == lock_nlmid) { SET_NLM_STATE(lock, - FLK_NLM_SHUTTING_DOWN); + FLK_NLM_SHUTTING_DOWN); INTERRUPT_WAKEUP(lock); } } @@ -3722,7 +3777,7 @@ create_flock(lock_descriptor_t *lp, flock64_t *flp) flp->l_whence = 0; flp->l_start = lp->l_start; flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : - (lp->l_end - lp->l_start + 1); + (lp->l_end - lp->l_start + 1); flp->l_sysid = lp->l_flock.l_sysid; flp->l_pid = lp->l_flock.l_pid; } @@ -3803,7 +3858,7 @@ flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) return (EINVAL); } if (start > end) { - return (EINVAL); + return (EINVAL); } return (0); } @@ -3853,7 +3908,7 @@ report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) flrp->l_len = 0; else flrp->l_len = blocker->l_end - - blocker->l_start + 1; + blocker->l_start + 1; } } } @@ -3929,7 +3984,7 @@ nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, (lock->l_flock.l_sysid != sysid || lock->l_flock.l_pid != pid) && lock_blocks_io(op, offset, length, - lock->l_type, lock->l_start, lock->l_end)) { + lock->l_type, lock->l_start, lock->l_end)) { conflict = 1; break; } @@ -3969,7 +4024,7 @@ check_active_locks(graph_t *gp) edge_t *ep; for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); - lock = lock->l_next) { + lock = lock->l_next) { ASSERT(IS_ACTIVE(lock)); ASSERT(NOT_BLOCKED(lock)); ASSERT(!IS_BARRIER(lock)); @@ -3983,7 +4038,7 @@ check_active_locks(graph_t *gp) } for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); - lock1 = lock1->l_next) { + lock1 = lock1->l_next) { if (lock1->l_vnode == lock->l_vnode) { if (BLOCKS(lock1, lock)) { cmn_err(CE_PANIC, @@ -4078,10 +4133,10 @@ check_sleeping_locks(graph_t *gp) lock_descriptor_t *lock1, *lock2; edge_t *ep; for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); - lock1 = lock1->l_next) { + lock1 = lock1->l_next) { ASSERT(!IS_BARRIER(lock1)); for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); - lock2 = lock2->l_next) { + lock2 = lock2->l_next) { if (lock1->l_vnode == lock2->l_vnode) { if (BLOCKS(lock2, lock1)) { ASSERT(!IS_GRANTED(lock1)); @@ -4092,7 +4147,7 @@ check_sleeping_locks(graph_t *gp) } for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); - lock2 = lock2->l_next) { + lock2 = lock2->l_next) { ASSERT(!IS_BARRIER(lock1)); if (lock1->l_vnode == lock2->l_vnode) { if (BLOCKS(lock2, lock1)) { @@ -4133,7 +4188,7 @@ level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) while ((vertex = STACK_TOP(vertex_stack)) != NULL) { STACK_POP(vertex_stack, l_dstack); for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); - ep = NEXT_ADJ(ep)) { + ep = NEXT_ADJ(ep)) { if (COLORED(ep->to_vertex)) continue; COLOR(ep->to_vertex); diff --git a/usr/src/uts/common/os/share.c b/usr/src/uts/common/os/share.c index 2fcbae4872..3acc10bf7d 100644 --- a/usr/src/uts/common/os/share.c +++ b/usr/src/uts/common/os/share.c @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ + #include <sys/types.h> #include <sys/sysmacros.h> #include <sys/param.h> @@ -46,6 +50,8 @@ static void print_share(struct shrlock *); #endif static int isreadonly(struct vnode *); +static void do_cleanshares(struct vnode *, pid_t, int32_t); + /* * Add the share reservation shr to vp. @@ -335,6 +341,32 @@ del_share(struct vnode *vp, struct shrlock *shr) void cleanshares(struct vnode *vp, pid_t pid) { + do_cleanshares(vp, pid, 0); +} + +/* + * Cleanup all remote share reservations that + * were made by the given sysid on given vnode. + */ +void +cleanshares_by_sysid(struct vnode *vp, int32_t sysid) +{ + if (sysid == 0) + return; + + do_cleanshares(vp, 0, sysid); +} + +/* + * Cleanup share reservations on given vnode made + * by the either given pid or sysid. + * If sysid is 0, remove all shares made by given pid, + * otherwise all shares made by the given sysid will + * be removed. + */ +static void +do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid) +{ struct shrlock shr; if (vp->v_shrlocks == NULL) @@ -343,7 +375,7 @@ cleanshares(struct vnode *vp, pid_t pid) shr.s_access = 0; shr.s_deny = 0; shr.s_pid = pid; - shr.s_sysid = 0; + shr.s_sysid = sysid; shr.s_own_len = 0; shr.s_owner = NULL; diff --git a/usr/src/uts/common/rpcsvc/Makefile b/usr/src/uts/common/rpcsvc/Makefile index b6e0608a1e..4c134208d9 100644 --- a/usr/src/uts/common/rpcsvc/Makefile +++ b/usr/src/uts/common/rpcsvc/Makefile @@ -22,6 +22,8 @@ # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# Copyright (c) 2012 by Delphix. All rights reserved. +# # uts/common/rpcsvc/Makefile # This makefile installs system header files that go into # /usr/include/rpcsvc. @@ -34,7 +36,8 @@ include ../../../Makefile.master # Also, the v3 headers have been hacked so that they no longer # quite reflect what goes over the wire. IDMAP_PROT_X= idmap_prot.x -RPCGEN_SRC= autofs_prot.x sm_inter.x nsm_addr.x $(IDMAP_PROT_X) +RPCGEN_SRC= autofs_prot.x nlm_prot.x sm_inter.x nsm_addr.x \ + $(IDMAP_PROT_X) DERIVED_HDRS= $(RPCGEN_SRC:%.x=%.h) @@ -44,6 +47,12 @@ ROOTDIRS= $(ROOT)/usr/include/rpcsvc ROOTHDRS= $(ALLHDRS:%=$(ROOTDIRS)/%) +RPCGENFLAGS = -C +idmap_prot.h := RPCGENFLAGS += -MN +nlm_prot.h := RPCGENFLAGS += -M +sm_inter.h := RPCGENFLAGS += -M +nsm_addr.h := RPCGENFLAGS += -M + $(ROOTDIRS)/%: % $(INS.file) @@ -68,7 +77,4 @@ $(ROOTDIRS): $(INS.dir) %.h: %.x - $(RPCGEN) -C -h $< -o $@ - -idmap_prot.h: $(IDMAP_PROT_X) - $(RPCGEN) -CMNh -o $@ $(IDMAP_PROT_X) + $(RPCGEN) $(RPCGENFLAGS) -h $< -o $@ diff --git a/usr/src/head/rpcsvc/nlm_prot.x b/usr/src/uts/common/rpcsvc/nlm_prot.x index 1583fead44..1ce59d416b 100644 --- a/usr/src/head/rpcsvc/nlm_prot.x +++ b/usr/src/uts/common/rpcsvc/nlm_prot.x @@ -20,10 +20,11 @@ * CDDL HEADER END */ /* - * Network lock manager protocol definition * Copyright (C) 1986, 1992, 1993, 1997, 1999 by Sun Microsystems, Inc. * All rights reserved. * + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * * Protocol used between local lock manager and remote lock manager. * * There are currently 3 versions of the protocol in use. Versions 1 @@ -35,8 +36,6 @@ * changes in monitored systems.) */ -%#pragma ident "%Z%%M% %I% %E% SMI" - #if RPC_HDR % %#include <rpc/rpc_sztypes.h> @@ -201,20 +200,20 @@ struct nlm_notify { */ enum nlm4_stats { - NLM4_GRANTED = 0, /* lock was granted */ - NLM4_DENIED = 1, /* lock was not granted, usually */ + nlm4_granted = 0, /* lock was granted */ + nlm4_denied = 1, /* lock was not granted, usually */ /* due to conflicting lock */ - NLM4_DENIED_NOLOCKS = 2, /* not granted: out of resources */ - NLM4_BLOCKED = 3, /* not granted: expect callback */ + nlm4_denied_nolocks = 2, /* not granted: out of resources */ + nlm4_blocked = 3, /* not granted: expect callback */ /* when granted */ - NLM4_DENIED_GRACE_PERIOD = 4, /* not granted: server is */ + nlm4_denied_grace_period = 4, /* not granted: server is */ /* reestablishing old locks */ - NLM4_DEADLCK = 5, /* not granted: deadlock detected */ - NLM4_ROFS = 6, /* not granted: read-only filesystem */ - NLM4_STALE_FH = 7, /* not granted: stale file handle */ - NLM4_FBIG = 8, /* not granted: offset or length */ + nlm4_deadlck = 5, /* not granted: deadlock detected */ + nlm4_rofs = 6, /* not granted: read-only filesystem */ + nlm4_stale_fh = 7, /* not granted: stale file handle */ + nlm4_fbig = 8, /* not granted: offset or length */ /* too big */ - NLM4_FAILED = 9 /* not granted: some other error */ + nlm4_failed = 9 /* not granted: some other error */ }; /* @@ -230,7 +229,7 @@ struct nlm4_holder { }; union nlm4_testrply switch (nlm4_stats stat) { - case NLM4_DENIED: + case nlm4_denied: struct nlm4_holder holder; default: void; @@ -286,33 +285,12 @@ struct nlm4_unlockargs { struct nlm4_lock alock; }; -#ifdef RPC_HDR -%/* -% * The following enums are actually bit encoded for efficient -% * boolean algebra.... DON'T change them..... -% */ -#endif - -enum fsh4_mode { - FSM_DN = 0, /* deny none */ - FSM_DR = 1, /* deny read */ - FSM_DW = 2, /* deny write */ - FSM_DRW = 3 /* deny read/write */ -}; - -enum fsh4_access { - FSA_NONE = 0, /* for completeness */ - FSA_R = 1, /* read only */ - FSA_W = 2, /* write only */ - FSA_RW = 3 /* read/write */ -}; - struct nlm4_share { string caller_name<LM_MAXSTRLEN>; netobj fh; netobj oh; - fsh4_mode mode; - fsh4_access access; + fsh_mode mode; + fsh_access access; }; struct nlm4_shareargs { @@ -333,12 +311,29 @@ struct nlm4_notify { }; /* + * Argument for the NLM call-back procedure called by rpc.statd + * when a monitored host status changes. The statd calls the + * NLM prog,vers,proc specified in the SM_MON call. + * NB: This struct must exactly match sm_inter.x:sm_status + * and requires LM_MAXSTRLEN == SM_MAXSTRLEN + */ +struct nlm_sm_status { + string mon_name<LM_MAXSTRLEN>; /* name of host */ + int32 state; /* new state */ + opaque priv[16]; /* private data */ +}; + +/* * Over-the-wire protocol used between the network lock managers */ program NLM_PROG { + version NLM_VERS { + void + NLM_NULL(void) = 0; + nlm_testres NLM_TEST(nlm_testargs) = 1; @@ -382,6 +377,17 @@ program NLM_PROG { NLM_GRANTED_RES(nlm_res) = 15; } = 1; + /* + * Private (loopback-only) call-backs from statd, + * used to notify that some machine has restarted. + * The meaning of these is up to the lock manager + * implemenation. (See the SM_MON calls.) + */ + version NLM_SM { + void NLM_SM_NOTIFY1(struct nlm_sm_status) = 17; + void NLM_SM_NOTIFY2(struct nlm_sm_status) = 18; + } = 2; + version NLM_VERSX { nlm_shareres NLM_SHARE(nlm_shareargs) = 20; @@ -395,58 +401,58 @@ program NLM_PROG { version NLM4_VERS { void - NLMPROC4_NULL(void) = 0; + NLM4_NULL(void) = 0; nlm4_testres - NLMPROC4_TEST(nlm4_testargs) = 1; + NLM4_TEST(nlm4_testargs) = 1; nlm4_res - NLMPROC4_LOCK(nlm4_lockargs) = 2; + NLM4_LOCK(nlm4_lockargs) = 2; nlm4_res - NLMPROC4_CANCEL(nlm4_cancargs) = 3; + NLM4_CANCEL(nlm4_cancargs) = 3; nlm4_res - NLMPROC4_UNLOCK(nlm4_unlockargs) = 4; + NLM4_UNLOCK(nlm4_unlockargs) = 4; /* * remote lock manager call-back to grant lock */ nlm4_res - NLMPROC4_GRANTED(nlm4_testargs) = 5; + NLM4_GRANTED(nlm4_testargs) = 5; /* * message passing style of requesting lock */ void - NLMPROC4_TEST_MSG(nlm4_testargs) = 6; + NLM4_TEST_MSG(nlm4_testargs) = 6; void - NLMPROC4_LOCK_MSG(nlm4_lockargs) = 7; + NLM4_LOCK_MSG(nlm4_lockargs) = 7; void - NLMPROC4_CANCEL_MSG(nlm4_cancargs) = 8; + NLM4_CANCEL_MSG(nlm4_cancargs) = 8; void - NLMPROC4_UNLOCK_MSG(nlm4_unlockargs) = 9; + NLM4_UNLOCK_MSG(nlm4_unlockargs) = 9; void - NLMPROC4_GRANTED_MSG(nlm4_testargs) = 10; + NLM4_GRANTED_MSG(nlm4_testargs) = 10; void - NLMPROC4_TEST_RES(nlm4_testres) = 11; + NLM4_TEST_RES(nlm4_testres) = 11; void - NLMPROC4_LOCK_RES(nlm4_res) = 12; + NLM4_LOCK_RES(nlm4_res) = 12; void - NLMPROC4_CANCEL_RES(nlm4_res) = 13; + NLM4_CANCEL_RES(nlm4_res) = 13; void - NLMPROC4_UNLOCK_RES(nlm4_res) = 14; + NLM4_UNLOCK_RES(nlm4_res) = 14; void - NLMPROC4_GRANTED_RES(nlm4_res) = 15; + NLM4_GRANTED_RES(nlm4_res) = 15; /* * DOS-style file sharing */ nlm4_shareres - NLMPROC4_SHARE(nlm4_shareargs) = 20; + NLM4_SHARE(nlm4_shareargs) = 20; nlm4_shareres - NLMPROC4_UNSHARE(nlm4_shareargs) = 21; + NLM4_UNSHARE(nlm4_shareargs) = 21; nlm4_res - NLMPROC4_NM_LOCK(nlm4_lockargs) = 22; + NLM4_NM_LOCK(nlm4_lockargs) = 22; void - NLMPROC4_FREE_ALL(nlm4_notify) = 23; + NLM4_FREE_ALL(nlm4_notify) = 23; } = 4; } = 100021; diff --git a/usr/src/uts/common/rpcsvc/sm_inter.x b/usr/src/uts/common/rpcsvc/sm_inter.x index 8104f852e2..7d9f62736c 100644 --- a/usr/src/uts/common/rpcsvc/sm_inter.x +++ b/usr/src/uts/common/rpcsvc/sm_inter.x @@ -26,12 +26,6 @@ %/* from sm_inter.x */ -#ifdef RPC_HDR -% -%#pragma ident "%Z%%M% %I% %E% SMI" -% -#endif - /* * Status monitor protocol specification */ @@ -98,13 +92,13 @@ struct sm_stat { int state; /* state # of status monitor */ }; -enum res { +enum sm_res { stat_succ = 0, /* status monitor agrees to monitor */ stat_fail = 1 /* status monitor cannot monitor */ }; struct sm_stat_res { - res res_stat; + sm_res res_stat; int state; }; @@ -112,7 +106,7 @@ struct sm_stat_res { * structure of the status message sent by the status monitor to the * requesting program when a monitored site changes status. */ -struct status { +struct sm_status { string mon_name<SM_MAXSTRLEN>; int state; opaque priv[16]; /* stored private information */ diff --git a/usr/src/uts/common/sys/flock.h b/usr/src/uts/common/sys/flock.h index 673396963d..3ea7afb23b 100644 --- a/usr/src/uts/common/sys/flock.h +++ b/usr/src/uts/common/sys/flock.h @@ -27,12 +27,13 @@ * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ #ifndef _SYS_FLOCK_H #define _SYS_FLOCK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/fcntl.h> #include <sys/vnode.h> @@ -238,6 +239,7 @@ int flk_check_lock_data(u_offset_t, u_offset_t, offset_t); int flk_has_remote_locks(struct vnode *vp); void flk_set_lockmgr_status(flk_lockmgr_status_t status); int flk_sysid_has_locks(int sysid, int chklck); +int flk_has_remote_locks_for_sysid(vnode_t *vp, int); void flk_init_callback(flk_callback_t *, callb_cpr_t *(*)(flk_cb_when_t, void *), void *); void flk_add_callback(flk_callback_t *, diff --git a/usr/src/uts/common/sys/share.h b/usr/src/uts/common/sys/share.h index ed4d822065..4de5f5ce65 100644 --- a/usr/src/uts/common/sys/share.h +++ b/usr/src/uts/common/sys/share.h @@ -19,6 +19,9 @@ * CDDL HEADER END */ /* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,6 +69,7 @@ struct flock64; extern int add_share(struct vnode *, struct shrlock *); extern int del_share(struct vnode *, struct shrlock *); extern void cleanshares(struct vnode *, pid_t); +extern void cleanshares_by_sysid(struct vnode *, int32_t); extern int shr_has_remote_shares(vnode_t *, int32_t); extern int proc_has_nbmand_share_on_vp(vnode_t *, pid_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index da586e2184..551d013e3a 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -654,7 +654,8 @@ MISC_KMODS += hwa1480_fw MISC_KMODS += uathfw MISC_KMODS += uwba -$(CLOSED_BUILD)CLOSED_MISC_KMODS += klmmod klmops +MISC_KMODS += klmmod klmops + $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_asym_lsi $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_asym_emc $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_sym_emc diff --git a/usr/src/uts/intel/klmmod/Makefile b/usr/src/uts/intel/klmmod/Makefile new file mode 100644 index 0000000000..43e24b29b1 --- /dev/null +++ b/usr/src/uts/intel/klmmod/Makefile @@ -0,0 +1,94 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Copyright (c) 2012 by Delphix. All rights reserved. +# +# This makefile drives the production of the network lock manager server +# specific kernel module. +# +# intel implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = klmmod +OBJECTS = $(KLMMOD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(KLMMOD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +LDFLAGS += -dy -Nstrmod/rpcmod -Nfs/nfs +LDFLAGS += -M $(UTSBASE)/common/klm/mapfile-mod +CTFMRGFLAGS += -f + +# +# Code generated by rpcgen triggers the -Wswitch warning. +# +CERRWARN += -_gcc=-Wno-switch + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/klmops/Makefile b/usr/src/uts/intel/klmops/Makefile new file mode 100644 index 0000000000..e62d6ce160 --- /dev/null +++ b/usr/src/uts/intel/klmops/Makefile @@ -0,0 +1,89 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Copyright (c) 2012 by Delphix. All rights reserved. +# +# This makefile drives the production of the network lock manager client +# side module. +# +# intel implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = klmops +OBJECTS = $(KLMOPS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(KLMOPS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +LDFLAGS += -dy -Nstrmod/rpcmod -Nfs/nfs -Nmisc/klmmod +LDFLAGS += -M $(UTSBASE)/common/klm/mapfile-ops +CTFMRGFLAGS += -f + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/sparc/Makefile.sparc.shared b/usr/src/uts/sparc/Makefile.sparc.shared index 8af720cd34..71fe0467fc 100644 --- a/usr/src/uts/sparc/Makefile.sparc.shared +++ b/usr/src/uts/sparc/Makefile.sparc.shared @@ -434,7 +434,8 @@ MISC_KMODS += spuni MISC_KMODS += hwa1480_fw uwba MISC_KMODS += mii -$(CLOSED_BUILD)CLOSED_MISC_KMODS += klmmod klmops +MISC_KMODS += klmmod klmops + $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_asym_lsi $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_asym_emc $(CLOSED_BUILD)CLOSED_MISC_KMODS += scsi_vhci_f_sym_emc diff --git a/usr/src/uts/sparc/klmmod/Makefile b/usr/src/uts/sparc/klmmod/Makefile new file mode 100644 index 0000000000..ed0ac4c675 --- /dev/null +++ b/usr/src/uts/sparc/klmmod/Makefile @@ -0,0 +1,95 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Copyright (c) 2012 by Delphix. All rights reserved. +# +# This makefile drives the production of the server-side network lock +# manager kernel module. +# +# sparc architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = klmmod +OBJECTS = $(KLMMOD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(KLMMOD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nstrmod/rpcmod -Nfs/nfs +LDFLAGS += -M $(UTSBASE)/common/klm/mapfile-mod +CTFMRGFLAGS += -f + +# +# Code generated by rpcgen triggers the -Wswitch warning. +# +CERRWARN += -_gcc=-Wno-switch + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ diff --git a/usr/src/uts/sparc/klmops/Makefile b/usr/src/uts/sparc/klmops/Makefile new file mode 100644 index 0000000000..6c44483c85 --- /dev/null +++ b/usr/src/uts/sparc/klmops/Makefile @@ -0,0 +1,90 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Copyright (c) 2012 by Delphix. All rights reserved. +# +# This makefile drives the production of the client-side network lock +# manager kernel module. +# +# sparc architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = klmops +OBJECTS = $(KLMOPS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(KLMOPS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nstrmod/rpcmod -Nfs/nfs +LDFLAGS += -M $(UTSBASE)/common/klm/mapfile-ops +CTFMRGFLAGS += -f + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ |