summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorcalum <none@none>2006-05-22 15:43:31 -0700
committercalum <none@none>2006-05-22 15:43:31 -0700
commitcee8668251d5ec44fd1c6d6ddeb9c1d1821a57d2 (patch)
tree8d4780a53c331cd3c2879233916bc1608c9a6df0 /usr/src
parenta4ac8bb3f5b2fff60581bee101792ac7a34bad8c (diff)
downloadillumos-gate-cee8668251d5ec44fd1c6d6ddeb9c1d1821a57d2.tar.gz
PSARC/2006/313 NFSv4: nfsd "-s" distributed stable storage
6244819 NFSv4 needs distributed stable storage to work on Cluster HA-NFS
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/fs.d/nfs/nfsd/Makefile9
-rw-r--r--usr/src/cmd/fs.d/nfs/nfsd/nfsd.c283
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv.c137
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_state.c532
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c208
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_sys.c87
-rw-r--r--usr/src/uts/common/nfs/nfs.h5
-rw-r--r--usr/src/uts/common/nfs/nfs4.h37
-rw-r--r--usr/src/uts/common/nfs/nfssys.h11
9 files changed, 999 insertions, 310 deletions
diff --git a/usr/src/cmd/fs.d/nfs/nfsd/Makefile b/usr/src/cmd/fs.d/nfs/nfsd/Makefile
index 81a54a572d..1061f6d286 100644
--- a/usr/src/cmd/fs.d/nfs/nfsd/Makefile
+++ b/usr/src/cmd/fs.d/nfs/nfsd/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 1989,2001-2003 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -32,7 +31,7 @@ ATTMK= $(TYPEPROG)
include ../../Makefile.fstype
-LDLIBS += -lnsl -lcmd
+LDLIBS += -lnsl -lcmd -lnvpair
LOCAL= nfsd.o
OBJS= $(LOCAL) nfs_tbind.o thrpool.o
diff --git a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
index 6dbb8053c4..169a3cd544 100644
--- a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
+++ b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
@@ -45,6 +45,7 @@
#include <sys/param.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <syslog.h>
#include <tiuser.h>
#include <rpc/rpc.h>
@@ -73,15 +74,22 @@
#include <deflt.h>
#include <rpcsvc/daemon_utils.h>
#include <rpcsvc/nfs4_prot.h>
+#include <libnvpair.h>
#include "nfs_tbind.h"
#include "thrpool.h"
/* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
#define QUIESCE_VERSMIN 4
+/* DSS: distributed stable storage */
+#define DSS_VERSMIN 4
static int nfssvc(int, struct netbuf, struct netconfig *);
-static int nfssvcpool(int maxservers);
+static int nfssvcpool(int maxservers);
+static int dss_init(uint_t npaths, char **pathnames);
+static void dss_mkleafdirs(uint_t npaths, char **pathnames);
+static void dss_mkleafdir(char *dir, char *leaf, char *path);
static void usage(void);
+int qstrcmp(const void *s1, const void *s2);
extern int _nfssys(int, void *);
@@ -138,6 +146,8 @@ main(int ac, char *av[])
NETSELPDECL(providerp);
char *defval;
boolean_t can_do_mlp;
+ uint_t dss_npaths = 0;
+ char **dss_pathnames = NULL;
MyName = *av;
@@ -239,7 +249,7 @@ main(int ac, char *av[])
}
opt_cnt = 0;
- while ((i = getopt(ac, av, "ac:p:t:l:")) != EOF) {
+ while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
switch (i) {
case 'a':
free(df_proto);
@@ -261,6 +271,39 @@ main(int ac, char *av[])
opt_cnt++;
break;
+ /*
+ * DSS: NFSv4 distributed stable storage.
+ *
+ * This is a Contracted Project Private interface, for
+ * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
+ */
+ case 's':
+ if (strlen(optarg) < MAXPATHLEN) {
+ /* first "-s" option encountered? */
+ if (dss_pathnames == NULL) {
+ /*
+ * Allocate maximum possible space
+ * required given cmdline arg count;
+ * "-s <path>" consumes two args.
+ */
+ size_t sz = (ac / 2) * sizeof (char *);
+ dss_pathnames = (char **)malloc(sz);
+ if (dss_pathnames == NULL) {
+ (void) fprintf(stderr, "%s: "
+ "dss paths malloc failed\n",
+ av[0]);
+ exit(1);
+ }
+ (void) memset(dss_pathnames, 0, sz);
+ }
+ dss_pathnames[dss_npaths] = optarg;
+ dss_npaths++;
+ } else {
+ (void) fprintf(stderr,
+ "%s: -s pathname too long.\n", av[0]);
+ }
+ break;
+
case 't':
provider = optarg;
df_allflag = 0;
@@ -410,6 +453,18 @@ main(int ac, char *av[])
exit(0);
}
+ /*
+ * If we've been given a list of paths to be used for distributed
+ * stable storage, and provided we're going to run a version
+ * that supports it, setup the DSS paths.
+ */
+ if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
+ if (dss_init(dss_npaths, dss_pathnames) != 0) {
+ syslog(LOG_ERR, "dss_init failed. Exiting.");
+ exit(1);
+ }
+ }
+
sigset(SIGTERM, sigflush);
sigset(SIGUSR1, quiesce);
@@ -520,7 +575,7 @@ done:
if (num_fds == 0) {
(void) syslog(LOG_ERR,
- "Could not start NFS service for any protocol. Exiting.");
+ "Could not start NFS service for any protocol. Exiting");
exit(1);
}
@@ -643,7 +698,12 @@ sigflush(int sig)
/*
* SIGUSR1 handler.
- * Request server quiesce, then exit. For subsequent warm start.
+ *
+ * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
+ *
+ * This is a Contracted Project Private interface, for the sole use
+ * of Sun Cluster HA-NFS. See PSARC/2004/497.
+ *
* Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
*/
static void
@@ -654,10 +714,10 @@ quiesce(int sig)
if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
/* Request server quiesce at next shutdown */
- error = _nfssys(NFS_SVC_REQUEST_QUIESCE, &id);
+ error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
if (error) {
syslog(LOG_ERR,
- "_nfssys(NFS_SVC_REQUEST_QUIESCE) failed: %s\n",
+ "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
strerror(errno));
return;
}
@@ -668,3 +728,214 @@ quiesce(int sig)
exit(0);
}
+
+/*
+ * DSS: distributed stable storage.
+ * Create leaf directories as required, keeping an eye on path
+ * lengths. Calls exit(1) on failure.
+ * The pathnames passed in must already exist, and must be writeable by nfsd.
+ * Note: the leaf directories under NFS4_VAR_DIR are not created here;
+ * they're created at pkg install.
+ */
+static void
+dss_mkleafdirs(uint_t npaths, char **pathnames)
+{
+ int i;
+ char *tmppath = NULL;
+
+ /*
+ * Create the temporary storage used by dss_mkleafdir() here,
+ * rather than in that function, so that it only needs to be
+ * done once, rather than once for each call. Too big to put
+ * on the function's stack.
+ */
+ tmppath = (char *)malloc(MAXPATHLEN);
+ if (tmppath == NULL) {
+ syslog(LOG_ERR, "tmppath malloc failed. Exiting");
+ exit(1);
+ }
+
+ for (i = 0; i < npaths; i++) {
+ char *p = pathnames[i];
+
+ dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
+ dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
+ }
+
+ free(tmppath);
+}
+
+/*
+ * Create "leaf" in "dir" (which must already exist).
+ * leaf: should start with a '/'
+ */
+static void
+dss_mkleafdir(char *dir, char *leaf, char *tmppath)
+{
+ /* MAXPATHLEN includes the terminating NUL */
+ if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
+ syslog(LOG_ERR, "stable storage path too long: %s%s. Exiting",
+ dir, leaf);
+ exit(1);
+ }
+
+ (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
+
+ /* the directory may already exist: that's OK */
+ if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
+ syslog(LOG_ERR, "error creating stable storage directory: "
+ "%s: %s. Exiting", strerror(errno), tmppath);
+ exit(1);
+ }
+}
+
+/*
+ * Create the storage dirs, and pass the path list to the kernel.
+ * This requires the nfssrv module to be loaded; the _nfssys() syscall
+ * will fail ENOTSUP if it is not.
+ * Use libnvpair(3LIB) to pass the data to the kernel.
+ */
+static int
+dss_init(uint_t npaths, char **pathnames)
+{
+ int i, j, nskipped, error;
+ char *bufp;
+ uint32_t bufsize;
+ size_t buflen;
+ nvlist_t *nvl;
+
+ if (npaths > 1) {
+ /*
+ * We need to remove duplicate paths; this might be user error
+ * in the general case, but HA-NFSv4 can also cause this.
+ * Sort the pathnames array, and NULL out duplicates,
+ * then write the non-NULL entries to a new array.
+ * Sorting will also allow the kernel to optimise its searches.
+ */
+
+ qsort(pathnames, npaths, sizeof (char *), qstrcmp);
+
+ /* now NULL out any duplicates */
+ i = 0; j = 1; nskipped = 0;
+ while (j < npaths) {
+ if (strcmp(pathnames[i], pathnames[j]) == NULL) {
+ pathnames[j] = NULL;
+ j++;
+ nskipped++;
+ continue;
+ }
+
+ /* skip i over any of its NULLed duplicates */
+ i = j++;
+ }
+
+ /* finally, write the non-NULL entries to a new array */
+ if (nskipped > 0) {
+ int nreal;
+ size_t sz;
+ char **tmp_pathnames;
+
+ nreal = npaths - nskipped;
+
+ sz = nreal * sizeof (char *);
+ tmp_pathnames = (char **)malloc(sz);
+ if (tmp_pathnames == NULL) {
+ syslog(LOG_ERR, "tmp_pathnames malloc failed");
+ exit(1);
+ }
+
+ for (i = 0, j = 0; i < npaths; i++)
+ if (pathnames[i] != NULL)
+ tmp_pathnames[j++] = pathnames[i];
+ free(pathnames);
+ pathnames = tmp_pathnames;
+ npaths = nreal;
+ }
+
+ }
+
+ /* Create directories to store the distributed state files */
+ dss_mkleafdirs(npaths, pathnames);
+
+ /* Create the name-value pair list */
+ error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
+ if (error) {
+ syslog(LOG_ERR, "nvlist_alloc failed: %s.", strerror(errno));
+ return (1);
+ }
+
+ /* Add the pathnames array as a single name-value pair */
+ error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
+ pathnames, npaths);
+ if (error) {
+ syslog(LOG_ERR, "nvlist_add_string_array failed: %s.",
+ strerror(errno));
+ nvlist_free(nvl);
+ return (1);
+ }
+
+ /*
+ * Pack list into contiguous memory, for passing to kernel.
+ * nvlist_pack() will allocate the memory for the buffer,
+ * which we should free() when no longer needed.
+ * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
+ */
+ bufp = NULL;
+ error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
+ if (error) {
+ syslog(LOG_ERR, "nvlist_pack failed: %s.", strerror(errno));
+ nvlist_free(nvl);
+ return (1);
+ }
+
+ /* Now we have the packed buffer, we no longer need the list */
+ nvlist_free(nvl);
+
+ /*
+ * Let the kernel know in advance how big the buffer is.
+ * NOTE: we cannot just pass buflen, since size_t is a long, and
+ * thus a different size between ILP32 userland and LP64 kernel.
+ * Use an int for the transfer, since that should be big enough;
+ * this is a no-op at the moment, here, since nfsd is 32-bit, but
+ * that could change.
+ */
+ bufsize = (uint32_t)buflen;
+ error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
+ if (error) {
+ syslog(LOG_ERR,
+ "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
+ strerror(errno));
+ free(bufp);
+ return (1);
+ }
+
+ /* Pass the packed buffer to the kernel */
+ error = _nfssys(NFS4_DSS_SETPATHS, bufp);
+ if (error) {
+ syslog(LOG_ERR,
+ "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
+ free(bufp);
+ return (1);
+ }
+
+ /*
+ * The kernel has now unpacked the buffer and extracted the
+ * pathnames array, we no longer need the buffer.
+ */
+ free(bufp);
+
+ return (0);
+}
+
+/*
+ * Quick sort string compare routine, for qsort.
+ * Needed to make arg types correct.
+ */
+int
+qstrcmp(const void *p1, const void *p2)
+{
+ char *s1 = *((char **)p1);
+ char *s2 = *((char **)p2);
+
+ return (strcmp(s1, s2));
+}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c
index c0222cc6e2..0646bace0f 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c
@@ -54,6 +54,7 @@
#include <sys/policy.h>
#include <sys/fem.h>
#include <sys/sdt.h>
+#include <sys/ddi.h>
#include <rpc/types.h>
#include <rpc/auth.h>
@@ -272,10 +273,6 @@ rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
kmutex_t rfs4_servinst_lock; /* protects linked list */
int rfs4_seen_first_compound; /* set first time we see one */
-#ifdef DEBUG
-int rfs4_servinst_debug = 0;
-#endif
-
/*
* NFS4 op dispatch table
*/
@@ -470,6 +467,8 @@ static char *rfs4_op_string[] = {
void rfs4_ss_chkclid(rfs4_client_t *);
+extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
+
#ifdef nextdp
#undef nextdp
#endif
@@ -601,9 +600,6 @@ rfs4_grace_start(rfs4_servinst_t *sip)
{
time_t now = gethrestime_sec();
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
-
rw_enter(&sip->rwlock, RW_WRITER);
sip->start_time = now;
sip->grace_period = rfs4_grace_period;
@@ -655,24 +651,13 @@ rfs4_clnt_in_grace(rfs4_client_t *cp)
void
rfs4_grace_reset_all(void)
{
-#ifdef DEBUG
- int n = 0;
-#endif
rfs4_servinst_t *sip;
mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
- if (rfs4_servinst_in_grace(sip)) {
+ for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
+ if (rfs4_servinst_in_grace(sip))
rfs4_grace_start(sip);
-#ifdef DEBUG
- n++;
-#endif
- }
- }
mutex_exit(&rfs4_servinst_lock);
-
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_grace_reset_all: reset %d instances", n));
}
/*
@@ -681,23 +666,52 @@ rfs4_grace_reset_all(void)
void
rfs4_grace_start_new(void)
{
-#ifdef DEBUG
- int n = 0;
-#endif
rfs4_servinst_t *sip;
mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
if (rfs4_servinst_grace_new(sip))
rfs4_grace_start(sip);
-#ifdef DEBUG
- n++;
-#endif
- }
mutex_exit(&rfs4_servinst_lock);
+}
+
+static rfs4_dss_path_t *
+rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
+{
+ size_t len;
+ rfs4_dss_path_t *dss_path;
+
+ dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
+
+ /*
+ * Take a copy of the string, since the original may be overwritten.
+ * Sadly, no strdup() in the kernel.
+ */
+ /* allow for NUL */
+ len = strlen(path) + 1;
+ dss_path->path = kmem_alloc(len, KM_SLEEP);
+ (void) strlcpy(dss_path->path, path, len);
+
+ /* associate with servinst */
+ dss_path->sip = sip;
+ dss_path->index = index;
+
+ /*
+ * Add to list of served paths.
+ * No locking required, as we're only ever called at startup.
+ */
+ if (rfs4_dss_pathlist == NULL) {
+ /* this is the first dss_path_t */
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_grace_start_new: started %d new instances", n));
+ /* needed for insque/remque */
+ dss_path->next = dss_path->prev = dss_path;
+
+ rfs4_dss_pathlist = dss_path;
+ } else {
+ insque(dss_path, rfs4_dss_pathlist);
+ }
+
+ return (dss_path);
}
/*
@@ -706,9 +720,11 @@ rfs4_grace_start_new(void)
* recovery window.
*/
void
-rfs4_servinst_create(int start_grace)
+rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
{
+ unsigned i;
rfs4_servinst_t *sip;
+ rfs4_oldstate_t *oldstate;
sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
@@ -718,11 +734,28 @@ rfs4_servinst_create(int start_grace)
sip->next = NULL;
sip->prev = NULL;
+ rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
+ /*
+ * This initial dummy entry is required to setup for insque/remque.
+ * It must be skipped over whenever the list is traversed.
+ */
+ oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
+ /* insque/remque require initial list entry to be self-terminated */
+ oldstate->next = oldstate;
+ oldstate->prev = oldstate;
+ sip->oldstate = oldstate;
+
+
+ sip->dss_npaths = dss_npaths;
+ sip->dss_paths = kmem_alloc(dss_npaths *
+ sizeof (rfs4_dss_path_t *), KM_SLEEP);
+
+ for (i = 0; i < dss_npaths; i++) {
+ sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
+ }
+
mutex_enter(&rfs4_servinst_lock);
- if (rfs4_cur_servinst == NULL) {
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_servinst_create: creating first instance"));
- } else {
+ if (rfs4_cur_servinst != NULL) {
/* add to linked list */
sip->prev = rfs4_cur_servinst;
rfs4_cur_servinst->next = sip;
@@ -731,11 +764,8 @@ rfs4_servinst_create(int start_grace)
rfs4_grace_start(sip);
/* make the new instance "current" */
rfs4_cur_servinst = sip;
- mutex_exit(&rfs4_servinst_lock);
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_servinst_create: new current instance: %p; start_grace: %d",
- (void *)sip, start_grace));
+ mutex_exit(&rfs4_servinst_lock);
}
/*
@@ -757,15 +787,17 @@ rfs4_servinst_destroy_all(void)
for (sip = current; sip != NULL; sip = prev) {
prev = sip->prev;
rw_destroy(&sip->rwlock);
+ if (sip->oldstate)
+ kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
+ if (sip->dss_paths)
+ kmem_free(sip->dss_paths,
+ sip->dss_npaths * sizeof (rfs4_dss_path_t *));
kmem_free(sip, sizeof (rfs4_servinst_t));
#ifdef DEBUG
n++;
#endif
}
mutex_exit(&rfs4_servinst_lock);
-
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_servinst_destroy_all: destroyed %d instances", n));
}
/*
@@ -777,10 +809,6 @@ rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
{
ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
- NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
- "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
- (void *)cp->server_instance, (void *)sip));
-
/*
* The lock ensures that if the current instance is in the process
* of changing, we will see the new one.
@@ -7486,7 +7514,15 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
}
/*
- * Record clientid in stable storage
+ * Update the client's associated server instance, if it's changed
+ * since the client was created.
+ */
+ if (rfs4_servinst(cp) != rfs4_cur_servinst)
+ rfs4_servinst_assign(cp, rfs4_cur_servinst);
+
+ /*
+ * Record clientid in stable storage.
+ * Must be done after server instance has been assigned.
*/
rfs4_ss_clid(cp, req);
@@ -7501,13 +7537,6 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
rfs4_update_lease(cp);
/*
- * Update the client's associated server instance, if it's changed
- * since the client was created.
- */
- if (rfs4_servinst(cp) != rfs4_cur_servinst)
- rfs4_servinst_assign(cp, rfs4_cur_servinst);
-
- /*
* Check to see if client can perform reclaims
*/
rfs4_ss_chkclid(cp);
diff --git a/usr/src/uts/common/fs/nfs/nfs4_state.c b/usr/src/uts/common/fs/nfs/nfs4_state.c
index 463cc89e6f..dd78ad7b74 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_state.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_state.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,7 +38,7 @@
#include <nfs/nfssys.h>
#include <nfs/lm.h>
#include <sys/pathname.h>
-
+#include <sys/nvpair.h>
extern time_t rfs4_start_time;
@@ -72,6 +71,11 @@ int rfs4_debug;
static uint32_t rfs4_database_debug = 0x00;
+static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
+static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
+static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
+static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
+
/*
* Couple of simple init/destroy functions for a general waiter
*/
@@ -333,6 +337,8 @@ static time_t rfs4_file_cache_time = 0;
static time_t rfs4_deleg_state_cache_time = 0;
static bool_t rfs4_client_create(rfs4_entry_t, void *);
+static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
+static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
static void rfs4_client_destroy(rfs4_entry_t);
static bool_t rfs4_client_expiry(rfs4_entry_t);
static uint32_t clientid_hash(void *);
@@ -394,15 +400,8 @@ static void *deleg_state_mkkey(rfs4_entry_t);
static void rfs4_state_rele_nounlock(rfs4_state_t *);
-static rfs4_oldstate_t *rfs4_oldstate = NULL;
-static krwlock_t rfs4_oldstate_lock;
static int rfs4_ss_enabled = 0;
-#define NFS4_VAR_DIR "/var/nfs"
-#define NFS4_STATE_DIR NFS4_VAR_DIR"/v4_state"
-#define NFS4_OLDSTATE_DIR NFS4_VAR_DIR"/v4_oldstate"
-#define NFS4_SS_DIR_MODE 0755
-
extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
void
@@ -411,24 +410,6 @@ rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
}
-/*
- * Free all malloced rsf4_oldstate_t memory
- */
-void
-rfs4_oldstate_free(rfs4_oldstate_t *ros)
-{
- if (ros == NULL)
- return;
-
- if (ros->cl_id4.id_val)
- kmem_free(ros->cl_id4.id_val, ros->cl_id4.id_len);
-
- if (ros->ss_pn)
- kmem_free(ros->ss_pn, sizeof (rfs4_ss_pn_t));
-
- kmem_free(ros, sizeof (rfs4_oldstate_t));
-}
-
static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char *dir, char *leaf)
{
@@ -465,9 +446,8 @@ rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
{
rfs4_ss_pn_t *src, *dst;
- if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL) {
+ if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
return (NULL);
- }
if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
rfs4_ss_pnfree(src);
@@ -500,9 +480,8 @@ rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
uint_t id_len;
int err, kill_file, file_vers;
- if (ss_pn == NULL) {
+ if (ss_pn == NULL)
return (NULL);
- }
/*
* open the state file.
@@ -554,7 +533,7 @@ rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
*/
iov[0].iov_base = (caddr_t)&file_vers;
iov[0].iov_len = sizeof (int);
- iov[1].iov_base = (caddr_t)cl_ss;
+ iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
iov[1].iov_len = NFS4_VERIFIER_SIZE;
iov[2].iov_base = (caddr_t)&id_len;
iov[2].iov_len = sizeof (uint_t);
@@ -626,9 +605,11 @@ rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
/*
+ * Add entries from statedir to supplied oldstate list.
+ * Optionally, move all entries from statedir -> destdir.
*/
void
-rfs4_ss_oldstate(char *dir, int do_move)
+rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
{
rfs4_ss_pn_t *ss_pn;
rfs4_oldstate_t *cl_ss = NULL;
@@ -643,24 +624,11 @@ rfs4_ss_oldstate(char *dir, int do_move)
/*
* open the state directory
*/
- if (err = vn_open(dir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0)) {
- return;
- }
-
- /*
- * if this is not a directory return
- */
- if (dvp->v_type != VDIR) {
- (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED());
- VN_RELE(dvp);
+ if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
return;
- }
- err = VOP_ACCESS(dvp, VREAD, 0, CRED());
- if (err) {
- /* Can't read the directory. So get the heck out. */
+ if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED()))
goto out;
- }
dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
@@ -678,12 +646,9 @@ rfs4_ss_oldstate(char *dir, int do_move)
uio.uio_resid = RFS4_SS_DIRSIZE;
err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof);
-
VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
-
- if (err) {
+ if (err)
goto out;
- }
size = RFS4_SS_DIRSIZE - uio.uio_resid;
@@ -700,131 +665,136 @@ rfs4_ss_oldstate(char *dir, int do_move)
/*
* Skip '.' and '..'
*/
- if (NFS_IS_DOTNAME(dep->d_name)) {
+ if (NFS_IS_DOTNAME(dep->d_name))
continue;
- }
- if ((ss_pn = rfs4_ss_pnalloc(dir, dep->d_name))
- == NULL) {
+ ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
+ if (ss_pn == NULL)
continue;
- }
if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
- if (do_move) {
+ if (destdir != NULL) {
rfs4_ss_pnfree(ss_pn);
cl_ss->ss_pn = rfs4_ss_movestate(
- NFS4_STATE_DIR,
- NFS4_OLDSTATE_DIR,
- dep->d_name);
+ statedir, destdir, dep->d_name);
} else {
cl_ss->ss_pn = ss_pn;
}
- insque(cl_ss, rfs4_oldstate);
+ insque(cl_ss, oldstate);
} else {
rfs4_ss_pnfree(ss_pn);
}
}
}
-out:
+out:
(void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED());
VN_RELE(dvp);
if (dirt)
kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
}
-/*
- * Validates that the needed directories exist
- */
-bool_t
-rfs4_validate_var(void)
+static void
+rfs4_ss_init(void)
{
- vnode_t *vp;
- int i;
- char *dnp;
- bool_t ret_val = TRUE;
- char *dir_names[] = {
- NFS4_VAR_DIR,
- NFS4_STATE_DIR,
- NFS4_OLDSTATE_DIR,
- NULL
- };
+ int npaths = 1;
+ char *default_dss_path = NFS4_DSS_VAR_DIR;
- for (i = 0, dnp = dir_names[i]; dnp; i++) {
- if (lookupname(dnp, UIO_SYSSPACE,
- NO_FOLLOW, NULLVPP, &vp) != 0) {
- cmn_err(CE_WARN, "!NFS4 stable storage directory "
- "missing!: %s", dnp);
- ret_val = FALSE;
- } else {
- VN_RELE(vp);
- }
- dnp = dir_names[i];
+ /* read the default stable storage state */
+ rfs4_dss_readstate(npaths, &default_dss_path);
+
+ rfs4_ss_enabled = 1;
+}
+
+static void
+rfs4_ss_fini(void)
+{
+ rfs4_servinst_t *sip;
+
+ mutex_enter(&rfs4_servinst_lock);
+ sip = rfs4_cur_servinst;
+ while (sip != NULL) {
+ rfs4_dss_clear_oldstate(sip);
+ sip = sip->next;
}
- return (ret_val);
+ mutex_exit(&rfs4_servinst_lock);
}
/*
- *
+ * Remove all oldstate files referenced by this servinst.
*/
static void
-rfs4_ss_init(void)
+rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
{
- rw_init(&rfs4_oldstate_lock, NULL, RW_DEFAULT, NULL);
+ rfs4_oldstate_t *os_head, *osp;
+
+ rw_enter(&sip->oldstate_lock, RW_WRITER);
+ os_head = sip->oldstate;
- if (rfs4_validate_var() == FALSE) {
- rfs4_oldstate = NULL;
+ if (os_head == NULL)
return;
- }
- rfs4_oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
- rfs4_oldstate->next = rfs4_oldstate;
- rfs4_oldstate->prev = rfs4_oldstate;
+ /* skip dummy entry */
+ osp = os_head->next;
+ while (osp != os_head) {
+ char *leaf = osp->ss_pn->leaf;
+ rfs4_oldstate_t *os_next;
- /*
- * load info from the OLD directory
- */
- rfs4_ss_oldstate(NFS4_OLDSTATE_DIR, 0);
+ rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
- /*
- * Gather and move NFS4_STATE_DIR to NFS4_OLDSTATE_DIR
- */
- rfs4_ss_oldstate(NFS4_STATE_DIR, 1);
+ if (osp->cl_id4.id_val)
+ kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
+ if (osp->ss_pn)
+ kmem_free(osp->ss_pn, sizeof (rfs4_ss_pn_t));
- rfs4_ss_enabled = 1;
+ os_next = osp->next;
+ remque(osp);
+ kmem_free(osp, sizeof (rfs4_oldstate_t));
+ osp = os_next;
+ }
+
+ /* free dummy entry */
+ kmem_free(osp, sizeof (rfs4_oldstate_t));
+
+ sip->oldstate = NULL;
+
+ rw_exit(&sip->oldstate_lock);
}
-static void
-rfs4_ss_fini(void)
+/*
+ * Form the state and oldstate paths, and read in the stable storage files.
+ */
+void
+rfs4_dss_readstate(int npaths, char **paths)
{
+ int i;
+ char *state, *oldstate;
- rfs4_oldstate_t *ost, *osp, *os_head;
+ state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- rw_destroy(&rfs4_oldstate_lock);
+ for (i = 0; i < npaths; i++) {
+ char *path = paths[i];
- /*
- * short circuit everything if we have no
- * remaining oldstate!
- */
- if (rfs4_oldstate == NULL) {
- return;
- }
-
- /*
- * It is possible to start and immediately stop the server
- * in which case we would not have cleaned up the oldstate
- * circular queue so we may do it here.
- */
- os_head = rfs4_oldstate;
- osp = os_head->next;
+ (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
+ (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
- while (osp != os_head) {
- ost = osp->next;
- remque(osp);
- rfs4_oldstate_free(osp);
- osp = ost;
+ /*
+ * Populate the current server instance's oldstate list.
+ *
+ * 1. Read stable storage data from old state directory,
+ * leaving its contents alone.
+ *
+ * 2. Read stable storage data from state directory,
+ * and move the latter's contents to old state
+ * directory.
+ */
+ rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
+ rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
}
- kmem_free(os_head, sizeof (rfs4_oldstate_t));
+
+ kmem_free(state, MAXPATHLEN);
+ kmem_free(oldstate, MAXPATHLEN);
}
@@ -835,63 +805,63 @@ rfs4_ss_fini(void)
void
rfs4_ss_chkclid(rfs4_client_t *cp)
{
- rfs4_oldstate_t *ost, *osp, *os_head;
+ rfs4_servinst_t *sip;
/*
- * short circuit everything if we have no
- * oldstate!
+ * It should be sufficient to check the oldstate data for just
+ * this client's instance. However, since our per-instance
+ * client grouping is solely temporal, HA-NFSv4 RG failover
+ * might result in clients of the same RG being partitioned into
+ * separate instances.
+ *
+ * Until the client grouping is improved, we must check the
+ * oldstate data for all instances with an active grace period.
+ *
+ * This also serves as the mechanism to remove stale oldstate data.
+ * The first time we check an instance after its grace period has
+ * expired, the oldstate data should be cleared.
+ *
+ * Start at the current instance, and walk the list backwards
+ * to the first.
*/
- if (rfs4_oldstate == NULL) {
- return;
- }
+ mutex_enter(&rfs4_servinst_lock);
+ for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ rfs4_ss_chkclid_sip(cp, sip);
- /*
- * if we are not in the grace_period then
- * we can destroy and mutilate all the old state.
- */
- if (!rfs4_clnt_in_grace(cp)) {
- rw_enter(&rfs4_oldstate_lock, RW_WRITER);
- if (rfs4_oldstate == NULL) {
- /*
- * some other thread is killing
- * the state so we get to just return.
- */
- rw_exit(&rfs4_oldstate_lock);
- return;
- }
-
- os_head = rfs4_oldstate;
- rfs4_oldstate = NULL;
- rw_exit(&rfs4_oldstate_lock);
+ /* if the above check found this client, we're done */
+ if (cp->can_reclaim)
+ break;
+ }
+ mutex_exit(&rfs4_servinst_lock);
+}
- /*
- * Now ditch the state files and structures
- * we've malloc()'d
- */
- osp = os_head->next;
+static void
+rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
+{
+ rfs4_oldstate_t *osp, *os_head;
- while (osp != os_head) {
- if (osp->ss_pn != NULL) {
- (void) vn_remove(osp->ss_pn->pn,
- UIO_SYSSPACE, RMFILE);
- }
- ost = osp->next;
- remque(osp);
- rfs4_oldstate_free(osp);
- osp = ost;
- }
- kmem_free(os_head, sizeof (rfs4_oldstate_t));
+ /* short circuit everything if this server instance has no oldstate */
+ rw_enter(&sip->oldstate_lock, RW_READER);
+ os_head = sip->oldstate;
+ rw_exit(&sip->oldstate_lock);
+ if (os_head == NULL)
return;
- }
/*
- * we're still in grace, search for the clientid
+ * If this server instance is no longer in a grace period then
+ * the client won't be able to reclaim. No further need for this
+ * instance's oldstate data, so it can be cleared.
*/
- rw_enter(&rfs4_oldstate_lock, RW_READER);
+ if (!rfs4_servinst_in_grace(sip))
+ return;
- os_head = rfs4_oldstate;
- osp = os_head->next;
+ /* this instance is still in grace; search for the clientid */
+
+ rw_enter(&sip->oldstate_lock, RW_READER);
+ os_head = sip->oldstate;
+ /* skip dummy entry */
+ osp = os_head->next;
while (osp != os_head) {
if (osp->cl_id4.id_len == cp->nfs_client.id_len) {
if (bcmp(osp->cl_id4.id_val, cp->nfs_client.id_val,
@@ -903,25 +873,19 @@ rfs4_ss_chkclid(rfs4_client_t *cp)
osp = osp->next;
}
- rw_exit(&rfs4_oldstate_lock);
+ rw_exit(&sip->oldstate_lock);
}
/*
- * Place client information into stable storage.
+ * Place client information into stable storage: 1/3.
+ * First, generate the leaf filename, from the client's IP address and
+ * the server-generated short-hand clientid.
*/
void
rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req)
{
const char *kinet_ntop6(uchar_t *, char *, size_t);
-
- nfs_client_id4 *cl_id4;
- rfs4_ss_pn_t *ss_pn;
char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
- vnode_t *vp;
- struct uio uio;
- struct iovec iov[4];
- int file_vers = NFS4_SS_VERSION;
- int ioflag;
struct sockaddr *ca;
uchar_t *b;
@@ -959,10 +923,70 @@ rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req)
(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
(longlong_t)cp->clientid);
+ rfs4_ss_clid_write(cp, leaf);
+}
- if ((ss_pn = rfs4_ss_pnalloc(NFS4_STATE_DIR, leaf)) == NULL) {
- return;
+/*
+ * Place client information into stable storage: 2/3.
+ * DSS: distributed stable storage: the file may need to be written to
+ * multiple directories.
+ */
+static void
+rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
+{
+ rfs4_servinst_t *sip;
+
+ /*
+ * It should be sufficient to write the leaf file to (all) DSS paths
+ * associated with just this client's instance. However, since our
+ * per-instance client grouping is solely temporal, HA-NFSv4 RG
+ * failover might result in us losing DSS data.
+ *
+ * Until the client grouping is improved, we must write the DSS data
+ * to all instances' paths. Start at the current instance, and
+ * walk the list backwards to the first.
+ */
+ mutex_enter(&rfs4_servinst_lock);
+ for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ int i, npaths = sip->dss_npaths;
+
+ /* write the leaf file to all DSS paths */
+ for (i = 0; i < npaths; i++) {
+ rfs4_dss_path_t *dss_path = sip->dss_paths[i];
+
+ /* HA-NFSv4 path might have been failed-away from us */
+ if (dss_path == NULL)
+ continue;
+
+ rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
+ }
}
+ mutex_exit(&rfs4_servinst_lock);
+}
+
+/*
+ * Place client information into stable storage: 3/3.
+ * Write the stable storage data to the requested file.
+ */
+static void
+rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
+{
+ int ioflag;
+ int file_vers = NFS4_SS_VERSION;
+ struct uio uio;
+ struct iovec iov[4];
+ char *dir;
+ rfs4_ss_pn_t *ss_pn;
+ vnode_t *vp;
+ nfs_client_id4 *cl_id4 = &(cp->nfs_client);
+
+ /* allow 2 extra bytes for '/' & NUL */
+ dir = kmem_alloc(strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2,
+ KM_SLEEP);
+ (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
+
+ if ((ss_pn = rfs4_ss_pnalloc(dir, leaf)) == NULL)
+ return;
if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
CRCREAT, 0)) {
@@ -970,19 +994,31 @@ rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req)
return;
}
- if (cp->ss_pn)
- rfs4_ss_pnfree(cp->ss_pn);
-
- cp->ss_pn = ss_pn;
-
- cl_id4 = &(cp->nfs_client);
+ /*
+ * We need to record leaf - i.e. the filename - so that we know
+ * what to remove, in the future. However, the dir part of cp->ss_pn
+ * should never be referenced directly, since it's potentially only
+ * one of several paths with this leaf in it.
+ */
+ if (cp->ss_pn != NULL) {
+ if (strcmp(cp->ss_pn->leaf, leaf) == 0) {
+ /* we've already recorded *this* leaf */
+ rfs4_ss_pnfree(ss_pn);
+ } else {
+ /* replace with this leaf */
+ rfs4_ss_pnfree(cp->ss_pn);
+ cp->ss_pn = ss_pn;
+ }
+ } else {
+ cp->ss_pn = ss_pn;
+ }
/*
* Build a scatter list that points to the nfs_client_id4
*/
iov[0].iov_base = (caddr_t)&file_vers;
iov[0].iov_len = sizeof (int);
- iov[1].iov_base = (caddr_t)cl_id4;
+ iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
iov[1].iov_len = NFS4_VERIFIER_SIZE;
iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
iov[2].iov_len = sizeof (uint_t);
@@ -1010,6 +1046,45 @@ rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req)
}
/*
+ * DSS: distributed stable storage.
+ * Unpack the list of paths passed by nfsd.
+ * Use nvlist_alloc(9F) to manage the data.
+ * The caller is responsible for allocating and freeing the buffer.
+ */
+int
+rfs4_dss_setpaths(char *buf, size_t buflen)
+{
+ int error;
+
+ /*
+ * If this is a "warm start", i.e. we previously had DSS paths,
+ * preserve the old paths.
+ */
+ if (rfs4_dss_paths != NULL) {
+ /*
+ * Before we lose the ptr, destroy the nvlist and pathnames
+ * array from the warm start before this one.
+ */
+ if (rfs4_dss_oldpaths)
+ nvlist_free(rfs4_dss_oldpaths);
+ rfs4_dss_oldpaths = rfs4_dss_paths;
+ }
+
+ /* unpack the buffer into a searchable nvlist */
+ error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
+ if (error)
+ return (error);
+
+ /*
+ * Search the nvlist for the pathnames nvpair (which is the only nvpair
+ * in the list, and record its location.
+ */
+ error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
+ &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
+ return (error);
+}
+
+/*
* Ultimately the nfssys() call NFS4_CLR_STATE endsup here
* to find and mark the client for forced expire.
*/
@@ -1089,6 +1164,7 @@ rfs4_state_init()
{
int start_grace;
extern boolean_t rfs4_cpr_callb(void *, int);
+ char *dss_path = NFS4_DSS_VAR_DIR;
mutex_enter(&rfs4_state_lock);
@@ -1114,6 +1190,9 @@ rfs4_state_init()
else
rfs4_start_time++;
+ /* DSS: distributed stable storage: initialise served paths list */
+ rfs4_dss_pathlist = NULL;
+
/*
* Create the first server instance, or a new one if the server has
* been restarted; see above comments on rfs4_start_time. Don't
@@ -1121,7 +1200,7 @@ rfs4_state_init()
* clients' recovery window.
*/
start_grace = 0;
- rfs4_servinst_create(start_grace);
+ rfs4_servinst_create(start_grace, 1, &dss_path);
/* reset the "first NFSv4 request" status */
rfs4_seen_first_compound = 0;
@@ -1355,6 +1434,13 @@ rfs4_state_fini()
/* reset the "first NFSv4 request" status */
rfs4_seen_first_compound = 0;
+
+ /* DSS: distributed stable storage */
+ if (rfs4_dss_oldpaths)
+ nvlist_free(rfs4_dss_oldpaths);
+ if (rfs4_dss_paths)
+ nvlist_free(rfs4_dss_paths);
+ rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
}
typedef union {
@@ -1455,11 +1541,50 @@ rfs4_client_expiry(rfs4_entry_t u_entry)
cp_expired = (cp->forced_expire ||
(gethrestime_sec() - cp->last_access
> rfs4_lease_time));
+
if (!cp->ss_remove && cp_expired)
cp->ss_remove = 1;
return (cp_expired);
}
+/*
+ * Remove the leaf file from all distributed stable storage paths.
+ */
+static void
+rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
+{
+ char *leaf = cp->ss_pn->leaf;
+
+ rfs4_dss_remove_leaf(cp->server_instance, NFS4_DSS_STATE_LEAF, leaf);
+}
+
+static void
+rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
+{
+ int i, npaths = sip->dss_npaths;
+
+ for (i = 0; i < npaths; i++) {
+ rfs4_dss_path_t *dss_path = sip->dss_paths[i];
+ char *path, *dir;
+ size_t pathlen;
+
+ /* the HA-NFSv4 path might have been failed-over away from us */
+ if (dss_path == NULL)
+ continue;
+
+ dir = dss_path->path;
+
+ /* allow 3 extra bytes for two '/' & a NUL */
+ pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
+ path = kmem_alloc(pathlen, KM_SLEEP);
+ (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
+
+ (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
+
+ kmem_free(path, pathlen);
+ }
+}
+
static void
rfs4_client_destroy(rfs4_entry_t u_entry)
{
@@ -1476,12 +1601,9 @@ rfs4_client_destroy(rfs4_entry_t u_entry)
rfs4_client_rele(cp->cp_confirmed);
if (cp->ss_pn) {
- /*
- * check if the stable storage file needs
- * to be removed
- */
+ /* check if the stable storage files need to be removed */
if (cp->ss_remove)
- (void) vn_remove(cp->ss_pn->pn, UIO_SYSSPACE, RMFILE);
+ rfs4_dss_remove_cpleaf(cp);
rfs4_ss_pnfree(cp->ss_pn);
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index 42d7e071ab..7b382608f5 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -106,6 +106,9 @@ static struct modlinkage modlinkage = {
char _depends_on[] = "misc/klmmod";
+/* for testing RG failover code path on non-Cluster system */
+int hanfsv4_force = 0;
+
int
_init(void)
{
@@ -125,7 +128,19 @@ _init(void)
nfs_srvfini();
}
+ /*
+ * Initialise some placeholders for nfssys() calls. These have
+ * to be declared by the nfs module, since that handles nfssys()
+ * calls - also used by NFS clients - but are provided by this
+ * nfssrv module. These also then serve as confirmation to the
+ * relevant code in nfs that nfssrv has been loaded, as they're
+ * initially NULL.
+ */
nfs_srv_quiesce_func = nfs_srv_quiesce_all;
+ nfs_srv_dss_func = rfs4_dss_setpaths;
+
+ /* setup DSS paths here; must be done before initial server startup */
+ rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
return (status);
}
@@ -166,6 +181,7 @@ static void acl_dispatch(struct svc_req *, SVCXPRT *);
static void common_dispatch(struct svc_req *, SVCXPRT *,
rpcvers_t, rpcvers_t, char *,
struct rpc_disptable *);
+static void hanfsv4_failover(void);
static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
bool_t);
static char *client_name(struct svc_req *req);
@@ -241,6 +257,12 @@ static nfs_server_running_t nfs_server_upordown;
static kmutex_t nfs_server_upordown_lock;
static kcondvar_t nfs_server_upordown_cv;
+/*
+ * DSS: distributed stable storage
+ * lists of all DSS paths: current, and before last warmstart
+ */
+nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
+
int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
/*
@@ -298,6 +320,11 @@ nfs_srv_shutdown_all(int quiesce) {
nfs_server_upordown == NFS_SERVER_OFFLINE) {
nfs_server_upordown = NFS_SERVER_QUIESCED;
cv_signal(&nfs_server_upordown_cv);
+
+ /* reset DSS state, for subsequent warm restart */
+ rfs4_dss_numnewpaths = 0;
+ rfs4_dss_newpaths = NULL;
+
cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
"NFSv4 state has been preserved");
}
@@ -458,7 +485,7 @@ nfs_svc(struct nfs_svc_args *arg, model_t model)
releasef(STRUCT_FGET(uap, fd));
- /* save the cluster nodeid */
+ /* HA-NFSv4: save the cluster nodeid */
if (cluster_bootflags & CLUSTER_BOOTED)
lm_global_nlmid = clconf_get_nodeid();
@@ -489,28 +516,20 @@ rfs4_server_start(int nfs4_srv_delegation)
/* is this an nfsd warm start? */
if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
- int start_grace;
-
cmn_err(CE_NOTE, "nfs_server: "
"server was previously quiesced; "
"existing NFSv4 state will be re-used");
/*
- * Cluster: this is also the signal that
- * a failover has occurred, so create a new
- * server instance, and start its grace period.
- * We also need to reset all currently
- * active grace periods in case of multiple
- * failovers within the grace duration,
- * to avoid partitioning clients of the same
- * resource into different instances.
+ * HA-NFSv4: this is also the signal
+ * that a Resource Group failover has
+ * occurred.
*/
- if (cluster_bootflags & CLUSTER_BOOTED) {
- rfs4_grace_reset_all();
- start_grace = 1;
- rfs4_servinst_create(start_grace);
- }
+ if (cluster_bootflags & CLUSTER_BOOTED ||
+ hanfsv4_force)
+ hanfsv4_failover();
} else {
+ /* cold start */
rfs4_state_init();
nfs4_drc = rfs4_init_drc(nfs4_drc_max,
nfs4_drc_hash,
@@ -2836,3 +2855,160 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
return (error);
}
+
+/*
+ * Do the main work of handling HA-NFSv4 Resource Group failover on
+ * Sun Cluster.
+ * We need to detect whether any RG admin paths have been added or removed,
+ * and adjust resources accordingly.
+ * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
+ * order to scale, the list and array of paths need to be held in more
+ * suitable data structures.
+ */
+static void
+hanfsv4_failover(void)
+{
+ int i, start_grace, numadded_paths = 0;
+ char **added_paths = NULL;
+ rfs4_dss_path_t *dss_path;
+
+ /*
+ * First, look for removed paths: RGs that have been failed-over
+ * away from this node.
+ * Walk the "currently-serving" rfs4_dss_pathlist and, for each
+ * path, check if it is on the "passed-in" rfs4_dss_newpaths array
+ * from nfsd. If not, that RG path has been removed.
+ *
+ * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
+ * any duplicates.
+ */
+ dss_path = rfs4_dss_pathlist;
+ do {
+ int found = 0;
+ char *path = dss_path->path;
+
+ /* used only for non-HA so may not be removed */
+ if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
+ dss_path = dss_path->next;
+ continue;
+ }
+
+ for (i = 0; i < rfs4_dss_numnewpaths; i++) {
+ int cmpret;
+ size_t ncmp;
+ char *newpath = rfs4_dss_newpaths[i];
+
+ ncmp = MAX(strlen(path), strlen(newpath));
+ cmpret = strncmp(path, newpath, ncmp);
+
+ /*
+ * Since nfsd has sorted rfs4_dss_newpaths for us,
+ * once the return from strncmp is negative we know
+ * we've passed the point where "path" should be,
+ * and can stop searching: "path" has been removed.
+ */
+ if (cmpret < 0)
+ break;
+
+ if (cmpret == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found == 0) {
+ unsigned index = dss_path->index;
+ rfs4_servinst_t *sip = dss_path->sip;
+ rfs4_dss_path_t *path_next = dss_path->next;
+
+ /*
+ * This path has been removed.
+ * We must clear out the servinst reference to
+ * it, since it's now owned by another
+ * node: we should not attempt to touch it.
+ */
+ ASSERT(dss_path == sip->dss_paths[index]);
+ sip->dss_paths[index] = NULL;
+
+ /* remove from "currently-serving" list, and destroy */
+ remque(dss_path);
+ kmem_free(dss_path, sizeof (rfs4_dss_path_t));
+
+ dss_path = path_next;
+ } else {
+ /* path was found; not removed */
+ dss_path = dss_path->next;
+ }
+ } while (dss_path != rfs4_dss_pathlist);
+
+ /*
+ * Now, look for added paths: RGs that have been failed-over
+ * to this node.
+ * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
+ * for each path, check if it is on the "currently-serving"
+ * rfs4_dss_pathlist. If not, that RG path has been added.
+ *
+ * Note: we don't do duplicate detection here; nfsd does that for us.
+ *
+ * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
+ * an upper bound for the size needed for added_paths[numadded_paths].
+ */
+
+ /* probably more space than we need, but guaranteed to be enough */
+ if (rfs4_dss_numnewpaths > 0) {
+ size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
+ added_paths = kmem_zalloc(sz, KM_SLEEP);
+ }
+
+ /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
+ for (i = 0; i < rfs4_dss_numnewpaths; i++) {
+ int found = 0;
+ char *newpath = rfs4_dss_newpaths[i];
+
+ dss_path = rfs4_dss_pathlist;
+ do {
+ char *path = dss_path->path;
+
+ /* used only for non-HA */
+ if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
+ dss_path = dss_path->next;
+ continue;
+ }
+
+ if (strncmp(path, newpath, strlen(path)) == 0) {
+ found = 1;
+ break;
+ }
+
+ dss_path = dss_path->next;
+ } while (dss_path != rfs4_dss_pathlist);
+
+ if (found == 0) {
+ added_paths[numadded_paths] = newpath;
+ numadded_paths++;
+ }
+ }
+
+ /* did we find any added paths? */
+ if (numadded_paths > 0) {
+ /* create a new server instance, and start its grace period */
+ start_grace = 1;
+ rfs4_servinst_create(start_grace, numadded_paths, added_paths);
+
+ /* read in the stable storage state from these paths */
+ rfs4_dss_readstate(numadded_paths, added_paths);
+
+ /*
+ * Multiple failovers during a grace period will cause
+ * clients of the same resource group to be partitioned
+ * into different server instances, with different
+ * grace periods. Since clients of the same resource
+ * group must be subject to the same grace period,
+ * we need to reset all currently active grace periods.
+ */
+ rfs4_grace_reset_all();
+ }
+
+ if (rfs4_dss_numnewpaths > 0)
+ kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
+}
diff --git a/usr/src/uts/common/fs/nfs/nfs_sys.c b/usr/src/uts/common/fs/nfs/nfs_sys.c
index af32a7a7f3..5101da02ed 100644
--- a/usr/src/uts/common/fs/nfs/nfs_sys.c
+++ b/usr/src/uts/common/fs/nfs/nfs_sys.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
@@ -38,6 +37,7 @@
#include <sys/policy.h>
#include <sys/siginfo.h>
#include <sys/proc.h> /* for exit() declaration */
+#include <sys/kmem.h>
#include <nfs/nfs4.h>
#include <nfs/nfssys.h>
#include <sys/thread.h>
@@ -70,6 +70,12 @@ void (*nfs_srv_quiesce_func)(void) = NULL;
time_t rfs4_lease_time = RFS4_LEASETIME;
time_t rfs4_grace_period = RFS4_LEASETIME;
+/* DSS: distributed stable storage */
+size_t nfs4_dss_buflen = 0;
+/* This filled in by nfssrv:_init() */
+int (*nfs_srv_dss_func)(char *, size_t) = NULL;
+
+
int
nfssys(enum nfssys_op opcode, void *arg)
{
@@ -182,22 +188,6 @@ nfssys(enum nfssys_op opcode, void *arg)
break;
}
- /* Request that NFS server quiesce on next shutdown */
- case NFS_SVC_REQUEST_QUIESCE: {
- int id;
-
- /* check that nfssrv module is loaded */
- if (nfs_srv_quiesce_func == NULL)
- return (set_errno(ENOTSUP));
-
- if (copyin(arg, &id, sizeof (id)))
- return (set_errno(EFAULT));
-
- error = svc_pool_control(id, SVCPSET_SHUTDOWN_PROC,
- (void *)nfs_srv_quiesce_func);
- break;
- }
-
case EXPORTFS: { /* export a file system */
STRUCT_DECL(exportfs_args, ea);
@@ -295,6 +285,22 @@ nfssys(enum nfssys_op opcode, void *arg)
break;
}
+ /* Request that NFSv4 server quiesce on next shutdown */
+ case NFS4_SVC_REQUEST_QUIESCE: {
+ int id;
+
+ /* check that nfssrv module is loaded */
+ if (nfs_srv_quiesce_func == NULL)
+ return (set_errno(ENOTSUP));
+
+ if (copyin(arg, &id, sizeof (id)))
+ return (set_errno(EFAULT));
+
+ error = svc_pool_control(id, SVCPSET_SHUTDOWN_PROC,
+ (void *)nfs_srv_quiesce_func);
+ break;
+ }
+
case NFS_IDMAP: {
struct nfsidmap_args idm;
@@ -306,6 +312,47 @@ nfssys(enum nfssys_op opcode, void *arg)
break;
}
+ case NFS4_DSS_SETPATHS_SIZE: {
+ /* crosses ILP32/LP64 boundary */
+ uint32_t nfs4_dss_bufsize = 0;
+
+ if (copyin(arg, &nfs4_dss_bufsize, sizeof (nfs4_dss_bufsize)))
+ return (set_errno(EFAULT));
+ nfs4_dss_buflen = (long)nfs4_dss_bufsize;
+ error = 0;
+ break;
+ }
+
+ case NFS4_DSS_SETPATHS: {
+ char *nfs4_dss_bufp;
+
+ /* check that nfssrv module is loaded */
+ if (nfs_srv_dss_func == NULL)
+ return (set_errno(ENOTSUP));
+
+ /*
+ * NFS4_DSS_SETPATHS_SIZE must be called before
+ * NFS4_DSS_SETPATHS, to tell us how big a buffer we need
+ * to allocate.
+ */
+ if (nfs4_dss_buflen == 0)
+ return (set_errno(EINVAL));
+ nfs4_dss_bufp = kmem_alloc(nfs4_dss_buflen, KM_SLEEP);
+ if (nfs4_dss_bufp == NULL)
+ return (set_errno(ENOMEM));
+
+ if (copyin(arg, nfs4_dss_bufp, nfs4_dss_buflen)) {
+ kmem_free(nfs4_dss_bufp, nfs4_dss_buflen);
+ return (set_errno(EFAULT));
+ }
+
+ /* unpack the buffer and extract the pathnames */
+ error = nfs_srv_dss_func(nfs4_dss_bufp, nfs4_dss_buflen);
+ kmem_free(nfs4_dss_bufp, nfs4_dss_buflen);
+
+ break;
+ }
+
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h
index 043014ff37..eda293574e 100644
--- a/usr/src/uts/common/nfs/nfs.h
+++ b/usr/src/uts/common/nfs/nfs.h
@@ -44,6 +44,7 @@
#include <sys/dirent.h>
#include <sys/zone.h>
#include <sys/tsol/label.h>
+#include <sys/nvpair.h>
#include <nfs/mount.h>
#endif
#include <vm/page.h>
@@ -933,8 +934,12 @@ extern int nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
extern void nfs_srv_stop_all(void);
extern void nfs_srv_quiesce_all(void);
extern void (*nfs_srv_quiesce_func)(void);
+extern int rfs4_dss_setpaths(char *, size_t);
+extern int (*nfs_srv_dss_func)(char *, size_t);
extern time_t rfs4_lease_time;
extern time_t rfs4_grace_period;
+extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
+
extern kstat_named_t *global_svstat_ptr[];
diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h
index eacbefe747..0950547bc9 100644
--- a/usr/src/uts/common/nfs/nfs4.h
+++ b/usr/src/uts/common/nfs/nfs4.h
@@ -36,6 +36,7 @@
#ifdef _KERNEL
#include <nfs/nfs4_kprot.h>
+#include <sys/nvpair.h>
#else
#include <rpcsvc/nfs4_prot.h>
#endif
@@ -324,17 +325,46 @@ typedef struct {
*
* Currently used only for Sun Cluster HA-NFS support, to group clients
* on NFS resource failover so each set of clients gets its own dedicated
- * grace period.
+ * grace period and distributed stable storage data.
*/
typedef struct rfs4_servinst {
+ int dss_npaths;
krwlock_t rwlock;
+ krwlock_t oldstate_lock;
time_t start_time;
time_t grace_period;
+ rfs4_oldstate_t *oldstate;
+ struct rfs4_dss_path **dss_paths;
struct rfs4_servinst *next;
struct rfs4_servinst *prev;
} rfs4_servinst_t;
/*
+ * DSS: distributed stable storage
+ */
+
+typedef struct rfs4_dss_path {
+ struct rfs4_dss_path *next; /* for insque/remque */
+ struct rfs4_dss_path *prev; /* for insque/remque */
+ char *path;
+ struct rfs4_servinst *sip;
+ unsigned index; /* offset in servinst's array */
+} rfs4_dss_path_t;
+
+/* array of paths passed-in from nfsd command-line; stored in nvlist */
+char **rfs4_dss_newpaths;
+uint_t rfs4_dss_numnewpaths;
+
+/*
+ * Circular doubly-linked list of paths for currently-served RGs.
+ * No locking required: only changed on warmstart. Managed with insque/remque.
+ */
+rfs4_dss_path_t *rfs4_dss_pathlist;
+
+/* nvlists of all DSS paths: current, and before last warmstart */
+nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
+
+/*
* List declarations (suitable for insque/remque) used to link the
* various datastructs listed below.
*/
@@ -712,12 +742,11 @@ typedef struct rfs4_file {
krwlock_t file_rwlock;
} rfs4_file_t;
-extern int rfs4_servinst_debug;
extern int rfs4_seen_first_compound; /* set first time we see one */
extern rfs4_servinst_t *rfs4_cur_servinst; /* current server instance */
extern kmutex_t rfs4_servinst_lock; /* protects linked list */
-extern void rfs4_servinst_create(int);
+extern void rfs4_servinst_create(int, int, char **);
extern void rfs4_servinst_destroy_all(void);
extern void rfs4_servinst_assign(rfs4_client_t *,
rfs4_servinst_t *);
@@ -728,6 +757,8 @@ extern int rfs4_servinst_grace_new(rfs4_servinst_t *);
extern void rfs4_grace_start(rfs4_servinst_t *);
extern void rfs4_grace_start_new(void);
extern void rfs4_grace_reset_all(void);
+extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *);
+extern void rfs4_dss_readstate(int, char **);
/*
* rfs4_deleg_policy is used to signify the server's global delegation
diff --git a/usr/src/uts/common/nfs/nfssys.h b/usr/src/uts/common/nfs/nfssys.h
index 4d3794f1a9..931990fcf5 100644
--- a/usr/src/uts/common/nfs/nfssys.h
+++ b/usr/src/uts/common/nfs/nfssys.h
@@ -50,7 +50,8 @@ enum nfssys_op { OLD_NFS_SVC, OLD_ASYNC_DAEMON, EXPORTFS, OLD_NFS_GETFH,
OLD_NFS_CNVT, NFS_REVAUTH, OLD_NFS_FH_TO_FID, OLD_LM_SVC, KILL_LOCKMGR,
LOG_FLUSH, SVCPOOL_CREATE, NFS_SVC, LM_SVC, SVCPOOL_WAIT, SVCPOOL_RUN,
NFS4_SVC, RDMA_SVC_INIT, NFS4_CLR_STATE, NFS_IDMAP,
- NFS_SVC_REQUEST_QUIESCE, NFS_GETFH };
+ NFS4_SVC_REQUEST_QUIESCE, NFS_GETFH, NFS4_DSS_SETPATHS,
+ NFS4_DSS_SETPATHS_SIZE };
struct nfs_svc_args {
int fd; /* Connection endpoint */
@@ -294,6 +295,14 @@ struct nfs4_svc_args32 {
#define NFS4_SETPORT 2
#define NFS4_DQUERY 4
+/* DSS: distributed stable storage */
+#define NFS4_DSS_STATE_LEAF "v4_state"
+#define NFS4_DSS_OLDSTATE_LEAF "v4_oldstate"
+#define NFS4_DSS_DIR_MODE 0755
+#define NFS4_DSS_NVPAIR_NAME "dss_pathname_array"
+/* default storage dir */
+#define NFS4_DSS_VAR_DIR "/var/nfs"
+
#ifdef _KERNEL
#include <sys/systm.h> /* for rval_t typedef */