summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-02-10 12:46:03 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-02-10 12:46:03 +0000
commit78121a0c3d633d344c4092336d357036c0e19f96 (patch)
treec6691bd7e380d723ada73f9b61dc4ef7634ee081
parent471ea867225b8c01fb430e738f6cf835294c99fc (diff)
parent19ee9cd1f5161e227951200cab5ecbff45fd5d71 (diff)
downloadillumos-joyent-78121a0c3d633d344c4092336d357036c0e19f96.tar.gz
[illumos-gate merge]
commit 19ee9cd1f5161e227951200cab5ecbff45fd5d71 12277 /proc/<PID>/fdinfo should resolve paths relative to current process commit 52aec5b9758f6352670ab269980b437a987f4822 12276 smatch-clean sockfs commit 0dfe541e13279d277d838d6a27e55188b9486cb1 11083 support NFS server in zone commit d8ab6e129d75d7c3f21a7909bf811a3de65faea8 12235 Add libzutil for libzfs or libzpool consumers commit fdefee4c75361dc5ea202f7e1f7c49f8a27ea043 12266 loader: rewrite zfs reader zap code to use malloc commit 8af765f5897d30449b941438e8d833f02dac74f8 12252 remove sunfire-specific code from kstat(1m) commit 34173ec8d0147391124bee4bbc60ac00807c0377 12284 errors in compress(1) and pack(1) man pages Conflicts: usr/src/uts/common/klm/nlm_impl.h usr/src/uts/common/fs/nfs/nfs_server.c usr/src/lib/Makefile usr/src/cmd/fs.d/nfs/svc/nfs-server
-rw-r--r--manifest11
-rw-r--r--usr/src/Targetdirs11
-rw-r--r--usr/src/boot/Makefile.version2
-rw-r--r--usr/src/boot/lib/libstand/zfs/zfsimpl.c436
-rw-r--r--usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h11
-rw-r--r--usr/src/cmd/dfs.cmds/sharemgr/commands.c9
-rw-r--r--usr/src/cmd/dumpadm/Makefile5
-rw-r--r--usr/src/cmd/dumpadm/dconf.c3
-rw-r--r--usr/src/cmd/fs.d/nfs/Makefile7
-rw-r--r--usr/src/cmd/fs.d/nfs/dtrace/Makefile37
-rwxr-xr-xusr/src/cmd/fs.d/nfs/dtrace/nfs-time.d78
-rwxr-xr-xusr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d248
-rw-r--r--usr/src/cmd/fs.d/nfs/svc/nfs-server13
-rw-r--r--usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml6
-rw-r--r--usr/src/cmd/fs.d/zfs/fstyp/Makefile11
-rw-r--r--usr/src/cmd/fs.d/zfs/fstyp/fstyp.c7
-rw-r--r--usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs174
-rw-r--r--usr/src/cmd/smbsrv/dtrace/smb-trace.d39
-rw-r--r--usr/src/cmd/smbsrv/dtrace/smb2-trace.d39
-rw-r--r--usr/src/cmd/stat/kstat/Makefile3
-rw-r--r--usr/src/cmd/stat/kstat/kstat.c168
-rw-r--r--usr/src/cmd/stat/kstat/kstat.h20
-rw-r--r--usr/src/cmd/zdb/Makefile.com5
-rw-r--r--usr/src/cmd/zdb/zdb.c10
-rw-r--r--usr/src/cmd/zfs/Makefile5
-rw-r--r--usr/src/cmd/zfs/zfs_main.c1
-rw-r--r--usr/src/cmd/zhack/Makefile.com4
-rw-r--r--usr/src/cmd/zhack/zhack.c13
-rw-r--r--usr/src/cmd/zinject/Makefile.com3
-rw-r--r--usr/src/cmd/zinject/translate.c126
-rw-r--r--usr/src/cmd/zinject/zinject.c2
-rw-r--r--usr/src/cmd/zpool/Makefile5
-rw-r--r--usr/src/cmd/zpool/zpool_iter.c1
-rw-r--r--usr/src/cmd/zpool/zpool_main.c52
-rw-r--r--usr/src/cmd/zpool/zpool_vdev.c3
-rw-r--r--usr/src/cmd/ztest/Makefile.com5
-rw-r--r--usr/src/cmd/ztest/ztest.c8
-rw-r--r--usr/src/lib/Makefile12
-rw-r--r--usr/src/lib/brand/ipkg/zone/platform.xml3
-rw-r--r--usr/src/lib/libdtrace/common/nfs.d15
-rw-r--r--usr/src/lib/libdtrace/common/smb.d6
-rw-r--r--usr/src/lib/libshare/common/libshare_zfs.c38
-rw-r--r--usr/src/lib/libshare/common/mapfile-vers4
-rw-r--r--usr/src/lib/libshare/nfs/libshare_nfs.c78
-rw-r--r--usr/src/lib/libshare/smb/libshare_smb.c31
-rw-r--r--usr/src/lib/libzfs/Makefile.com7
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h36
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c21
-rw-r--r--usr/src/lib/libzfs/common/libzfs_import.c1364
-rw-r--r--usr/src/lib/libzfs/common/libzfs_iter.c1
-rw-r--r--usr/src/lib/libzfs/common/libzfs_mount.c44
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c42
-rw-r--r--usr/src/lib/libzfs/common/libzfs_sendrecv.c1
-rw-r--r--usr/src/lib/libzfs/common/libzfs_status.c66
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c32
-rw-r--r--usr/src/lib/libzfs/common/mapfile-vers12
-rw-r--r--usr/src/lib/libzfs_jni/Makefile.com7
-rw-r--r--usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c29
-rw-r--r--usr/src/lib/libzpool/Makefile.com6
-rw-r--r--usr/src/lib/libzpool/common/kernel.c4
-rw-r--r--usr/src/lib/libzpool/common/util.c55
-rw-r--r--usr/src/lib/libzutil/Makefile51
-rw-r--r--usr/src/lib/libzutil/Makefile.com49
-rw-r--r--usr/src/lib/libzutil/amd64/Makefile19
-rw-r--r--usr/src/lib/libzutil/common/libzutil.h122
-rw-r--r--usr/src/lib/libzutil/common/mapfile-vers48
-rw-r--r--usr/src/lib/libzutil/common/zutil_import.c1548
-rw-r--r--usr/src/lib/libzutil/common/zutil_import.h76
-rw-r--r--usr/src/lib/libzutil/common/zutil_nicenum.c172
-rw-r--r--usr/src/lib/libzutil/common/zutil_pool.c165
-rw-r--r--usr/src/lib/libzutil/i386/Makefile18
-rw-r--r--usr/src/lib/libzutil/inc.flg19
-rw-r--r--usr/src/lib/libzutil/sparc/Makefile18
-rw-r--r--usr/src/lib/libzutil/sparcv9/Makefile21
-rw-r--r--usr/src/man/man1/compress.140
-rw-r--r--usr/src/man/man1/pack.130
-rw-r--r--usr/src/pkg/manifests/service-file-system-nfs.mf5
-rw-r--r--usr/src/pkg/manifests/system-file-system-zfs.mf11
-rw-r--r--usr/src/uts/common/dtrace/sdt_subr.c977
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_srv.c414
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_callback.c14
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_db.c72
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_dispatch.c59
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv.c671
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_attr.c38
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c95
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_ns.c143
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_state.c637
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_auth.c327
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_client.c19
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_cmd.c71
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_export.c537
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_log.c51
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c588
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_srv.c154
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_stats.c250
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_sys.c20
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c27
-rw-r--r--usr/src/uts/common/fs/proc/prvnops.c114
-rw-r--r--usr/src/uts/common/fs/sharefs/sharefs_vfsops.c14
-rw-r--r--usr/src/uts/common/fs/sharefs/sharefs_vnops.c83
-rw-r--r--usr/src/uts/common/fs/sharefs/sharetab.c300
-rw-r--r--usr/src/uts/common/fs/sockfs/nl7curi.c12
-rw-r--r--usr/src/uts/common/fs/sockfs/sockfilter.c23
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi.c10
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c11
-rw-r--r--usr/src/uts/common/fs/zfs/zio_inject.c69
-rw-r--r--usr/src/uts/common/klm/klmmod.c4
-rw-r--r--usr/src/uts/common/klm/nlm_impl.c49
-rw-r--r--usr/src/uts/common/klm/nlm_impl.h2
-rw-r--r--usr/src/uts/common/nfs/export.h100
-rw-r--r--usr/src/uts/common/nfs/nfs.h89
-rw-r--r--usr/src/uts/common/nfs/nfs4.h189
-rw-r--r--usr/src/uts/common/nfs/nfs4_drc.h24
-rw-r--r--usr/src/uts/common/nfs/nfs_acl.h2
-rw-r--r--usr/src/uts/common/nfs/nfs_cmd.h9
-rw-r--r--usr/src/uts/common/nfs/nfs_dispatch.h1
-rw-r--r--usr/src/uts/common/nfs/nfs_log.h9
-rw-r--r--usr/src/uts/common/sharefs/sharefs.h33
-rw-r--r--usr/src/uts/common/sys/prsystm.h2
-rw-r--r--usr/src/uts/common/sys/sdt.h8
-rw-r--r--usr/src/uts/common/sys/zone.h26
-rw-r--r--usr/src/uts/intel/sockfs/Makefile24
-rw-r--r--usr/src/uts/sparc/sockfs/Makefile21
125 files changed, 7224 insertions, 5076 deletions
diff --git a/manifest b/manifest
index 5833bd27d0..8fcb2c018c 100644
--- a/manifest
+++ b/manifest
@@ -1379,6 +1379,8 @@ f lib/amd64/libzfs.so.1 0755 root bin
s lib/amd64/libzfs.so=libzfs.so.1
s lib/amd64/libzfs_core.so.1=../../usr/lib/amd64/libzfs_core.so.1
s lib/amd64/libzfs_core.so=../../usr/lib/amd64/libzfs_core.so
+f lib/amd64/libzutil.so.1 0755 root bin
+s lib/amd64/libzutil.so=libzutil.so.1
f lib/amd64/nss_compat.so.1 0755 root bin
f lib/amd64/nss_dns.so.1 0755 root bin
f lib/amd64/nss_files.so.1 0755 root bin
@@ -1584,6 +1586,8 @@ f lib/libzfs.so.1 0755 root bin
s lib/libzfs.so=libzfs.so.1
s lib/libzfs_core.so.1=../usr/lib/libzfs_core.so
s lib/libzfs_core.so=../usr/lib/libzfs_core.so
+f lib/libzutil.so.1 0755 root bin
+s lib/libzutil.so=libzutil.so.1
d lib/mpxio 0755 root bin
f lib/mpxio/stmsboot_util 0555 root bin
f lib/nss_compat.so.1 0755 root bin
@@ -5430,6 +5434,8 @@ f usr/lib/amd64/libzonecfg.so.1 0755 root bin
s usr/lib/amd64/libzonecfg.so=libzonecfg.so.1
f usr/lib/amd64/libzpool.so.1 0755 root bin
s usr/lib/amd64/libzpool.so=libzpool.so.1
+s usr/lib/amd64/libzutil.so.1=../../../lib/amd64/libzutil.so.1
+s usr/lib/amd64/libzutil.so=../../../lib/amd64/libzutil.so.1
f usr/lib/amd64/lx_brand.so.1 0755 root sys
f usr/lib/amd64/madv.so.1 0755 root bin
f usr/lib/amd64/mpss.so.1 0755 root bin
@@ -6957,6 +6963,8 @@ f usr/lib/libzoneinfo.so.1 0755 root bin
s usr/lib/libzoneinfo.so=libzoneinfo.so.1
f usr/lib/libzpool.so.1 0755 root bin
s usr/lib/libzpool.so=libzpool.so.1
+s usr/lib/libzutil.so.1=../../lib/libzutil.so.1
+s usr/lib/libzutil.so=../../lib/libzutil.so.1
d usr/lib/link_audit 0755 root bin
s usr/lib/link_audit/32=.
s usr/lib/link_audit/64=amd64
@@ -10127,6 +10135,9 @@ f usr/lib/netsvc/yp/ypxfr_2perday 0555 root sys
f usr/lib/netsvc/yp/ypxfrd 0555 root sys
f usr/lib/newsyslog 0555 root sys
d usr/lib/nfs 0755 root sys
+d usr/lib/nfs/dtrace 0755 root sys
+f usr/lib/nfs/dtrace/nfs-time.d 0555 root bin
+f usr/lib/nfs/dtrace/nfs-trace.d 0555 root bin
f usr/lib/nfs/libmapid.so.1 0755 root bin
s usr/lib/nfs/libmapid.so=libmapid.so.1
f usr/lib/nfs/lockd 0555 root bin
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index bb212bbeec..699a2b8dc9 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -30,6 +30,7 @@
# Copyright 2017 RackTop Systems.
# Copyright 2019 Joyent, Inc.
# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+# Copyright 2020 Joyent, Inc.
#
#
@@ -926,6 +927,8 @@ $(ROOT)/usr/lib/libzfs.so.1:= REALPATH=../../lib/libzfs.so.1
$(ROOT)/usr/lib/libzfs.so:= REALPATH=../../lib/libzfs.so.1
$(ROOT)/usr/lib/libzfs_core.so.1:= REALPATH=../../lib/libzfs_core.so.1
$(ROOT)/usr/lib/libzfs_core.so:= REALPATH=../../lib/libzfs_core.so.1
+$(ROOT)/usr/lib/libzutil.so.1:= REALPATH=../../lib/libzutil.so.1
+$(ROOT)/usr/lib/libzutil.so:= REALPATH=../../lib/libzutil.so.1
$(ROOT)/usr/lib/nss_compat.so.1:= REALPATH=../../lib/nss_compat.so.1
$(ROOT)/usr/lib/nss_dns.so.1:= REALPATH=../../lib/nss_dns.so.1
$(ROOT)/usr/lib/nss_files.so.1:= REALPATH=../../lib/nss_files.so.1
@@ -1194,6 +1197,10 @@ $(ROOT)/usr/lib/$(MACH64)/libzfs_core.so:= \
REALPATH=../../../lib/$(MACH64)/libzfs_core.so.1
$(ROOT)/usr/lib/$(MACH64)/libzfs_core.so.1:= \
REALPATH=../../../lib/$(MACH64)/libzfs_core.so.1
+$(ROOT)/usr/lib/$(MACH64)/libzutil.so:= \
+ REALPATH=../../../lib/$(MACH64)/libzutil.so.1
+$(ROOT)/usr/lib/$(MACH64)/libzutil.so.1:= \
+ REALPATH=../../../lib/$(MACH64)/libzutil.so.1
$(ROOT)/usr/lib/$(MACH64)/libfakekernel.so:= \
REALPATH=../../../lib/$(MACH64)/libfakekernel.so.1
$(ROOT)/usr/lib/$(MACH64)/libfakekernel.so.1:= \
@@ -1357,6 +1364,8 @@ SYM.USRLIB= \
/usr/lib/libzfs.so.1 \
/usr/lib/libzfs_core.so \
/usr/lib/libzfs_core.so.1 \
+ /usr/lib/libzutil.so \
+ /usr/lib/libzutil.so.1 \
/usr/lib/nss_compat.so.1 \
/usr/lib/nss_dns.so.1 \
/usr/lib/nss_files.so.1 \
@@ -1506,6 +1515,8 @@ SYM.USRLIB64= \
/usr/lib/$(MACH64)/libzfs.so.1 \
/usr/lib/$(MACH64)/libzfs_core.so \
/usr/lib/$(MACH64)/libzfs_core.so.1 \
+ /usr/lib/$(MACH64)/libzutil.so \
+ /usr/lib/$(MACH64)/libzutil.so.1 \
/usr/lib/$(MACH64)/nss_compat.so.1 \
/usr/lib/$(MACH64)/nss_dns.so.1 \
/usr/lib/$(MACH64)/nss_files.so.1 \
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version
index b4fb4691f4..4c3af63cb9 100644
--- a/usr/src/boot/Makefile.version
+++ b/usr/src/boot/Makefile.version
@@ -33,4 +33,4 @@ LOADER_VERSION = 1.1
# Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
# The version is processed from left to right, the version number can only
# be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2020.02.02.1
+BOOT_VERSION = $(LOADER_VERSION)-2020.02.03.1
diff --git a/usr/src/boot/lib/libstand/zfs/zfsimpl.c b/usr/src/boot/lib/libstand/zfs/zfsimpl.c
index 212b5faa52..c291ef771e 100644
--- a/usr/src/boot/lib/libstand/zfs/zfsimpl.c
+++ b/usr/src/boot/lib/libstand/zfs/zfsimpl.c
@@ -137,7 +137,6 @@ static spa_list_t zfs_pools;
static const dnode_phys_t *dnode_cache_obj;
static uint64_t dnode_cache_bn;
static char *dnode_cache_buf;
-static char *zap_scratch;
static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr;
#define TEMP_SIZE (1024 * 1024)
@@ -167,7 +166,6 @@ zfs_init(void)
zfs_temp_end = zfs_temp_buf + TEMP_SIZE;
zfs_temp_ptr = zfs_temp_buf;
dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
- zap_scratch = malloc(SPA_MAXBLOCKSIZE);
zfs_init_crc();
}
@@ -2303,26 +2301,20 @@ dnode_read(const spa_t *spa, const dnode_phys_t *dnode, off_t offset,
}
/*
- * Lookup a value in a microzap directory. Assumes that the zap
- * scratch buffer contains the directory contents.
+ * Lookup a value in a microzap directory.
*/
static int
-mzap_lookup(const dnode_phys_t *dnode, const char *name, uint64_t *value)
+mzap_lookup(const mzap_phys_t *mz, size_t size, const char *name,
+ uint64_t *value)
{
- const mzap_phys_t *mz;
const mzap_ent_phys_t *mze;
- size_t size;
int chunks, i;
/*
* Microzap objects use exactly one block. Read the whole
* thing.
*/
- size = dnode->dn_datablkszsec * 512;
-
- mz = (const mzap_phys_t *) zap_scratch;
chunks = size / MZAP_ENT_LEN - 1;
-
for (i = 0; i < chunks; i++) {
mze = &mz->mz_chunk[i];
if (strcmp(mze->mze_name, name) == 0) {
@@ -2465,91 +2457,166 @@ fzap_check_size(uint64_t integer_size, uint64_t num_integers)
return (0);
}
-/*
- * Lookup a value in a fatzap directory. Assumes that the zap scratch
- * buffer contains the directory header.
- */
+static void
+zap_leaf_free(zap_leaf_t *leaf)
+{
+ free(leaf->l_phys);
+ free(leaf);
+}
+
static int
-fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name,
- uint64_t integer_size, uint64_t num_integers, void *value)
+zap_get_leaf_byblk(fat_zap_t *zap, uint64_t blk, zap_leaf_t **lp)
{
- int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- zap_phys_t zh = *(zap_phys_t *)zap_scratch;
- fat_zap_t z;
- uint64_t *ptrtbl;
- uint64_t hash;
- int rc;
+ int bs = FZAP_BLOCK_SHIFT(zap);
+ int err;
- if (zh.zap_magic != ZAP_MAGIC)
- return (EIO);
+ *lp = malloc(sizeof (**lp));
+ if (*lp == NULL)
+ return (ENOMEM);
- if ((rc = fzap_check_size(integer_size, num_integers)) != 0)
- return (rc);
+ (*lp)->l_bs = bs;
+ (*lp)->l_phys = malloc(1 << bs);
- z.zap_block_shift = ilog2(bsize);
- z.zap_phys = (zap_phys_t *)zap_scratch;
+ if ((*lp)->l_phys == NULL) {
+ free(*lp);
+ return (ENOMEM);
+ }
+ err = dnode_read(zap->zap_spa, zap->zap_dnode, blk << bs, (*lp)->l_phys,
+ 1 << bs);
+ if (err != 0) {
+ zap_leaf_free(*lp);
+ }
+ return (err);
+}
- /*
- * Figure out where the pointer table is and read it in if necessary.
- */
- if (zh.zap_ptrtbl.zt_blk) {
- rc = dnode_read(spa, dnode, zh.zap_ptrtbl.zt_blk * bsize,
- zap_scratch, bsize);
- if (rc)
- return (rc);
- ptrtbl = (uint64_t *)zap_scratch;
+static int
+zap_table_load(fat_zap_t *zap, zap_table_phys_t *tbl, uint64_t idx,
+ uint64_t *valp)
+{
+ int bs = FZAP_BLOCK_SHIFT(zap);
+ uint64_t blk = idx >> (bs - 3);
+ uint64_t off = idx & ((1 << (bs - 3)) - 1);
+ uint64_t *buf;
+ int rc;
+
+ buf = malloc(1 << zap->zap_block_shift);
+ if (buf == NULL)
+ return (ENOMEM);
+ rc = dnode_read(zap->zap_spa, zap->zap_dnode, (tbl->zt_blk + blk) << bs,
+ buf, 1 << zap->zap_block_shift);
+ if (rc == 0)
+ *valp = buf[off];
+ free(buf);
+ return (rc);
+}
+
+static int
+zap_idx_to_blk(fat_zap_t *zap, uint64_t idx, uint64_t *valp)
+{
+ if (zap->zap_phys->zap_ptrtbl.zt_numblks == 0) {
+ *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
+ return (0);
} else {
- ptrtbl = &ZAP_EMBEDDED_PTRTBL_ENT(&z, 0);
+ return (zap_table_load(zap, &zap->zap_phys->zap_ptrtbl,
+ idx, valp));
}
+}
- hash = zap_hash(zh.zap_salt, name);
+#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
+static int
+zap_deref_leaf(fat_zap_t *zap, uint64_t h, zap_leaf_t **lp)
+{
+ uint64_t idx, blk;
+ int err;
- zap_leaf_t zl;
- zl.l_bs = z.zap_block_shift;
+ idx = ZAP_HASH_IDX(h, zap->zap_phys->zap_ptrtbl.zt_shift);
+ err = zap_idx_to_blk(zap, idx, &blk);
+ if (err != 0)
+ return (err);
+ return (zap_get_leaf_byblk(zap, blk, lp));
+}
- off_t off = ptrtbl[hash >> (64 - zh.zap_ptrtbl.zt_shift)] << zl.l_bs;
- zap_leaf_chunk_t *zc;
+#define CHAIN_END 0xffff /* end of the chunk chain */
+#define LEAF_HASH(l, h) \
+ ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
+ ((h) >> \
+ (64 - ZAP_LEAF_HASH_SHIFT(l) - (l)->l_phys->l_hdr.lh_prefix_len)))
+#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
- rc = dnode_read(spa, dnode, off, zap_scratch, bsize);
- if (rc)
- return (rc);
-
- zl.l_phys = (zap_leaf_phys_t *)zap_scratch;
+static int
+zap_leaf_lookup(zap_leaf_t *zl, uint64_t hash, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *value)
+{
+ int rc;
+ uint16_t *chunkp;
+ struct zap_leaf_entry *le;
/*
* Make sure this chunk matches our hash.
*/
- if (zl.l_phys->l_hdr.lh_prefix_len > 0 &&
- zl.l_phys->l_hdr.lh_prefix !=
- hash >> (64 - zl.l_phys->l_hdr.lh_prefix_len))
- return (ENOENT);
+ if (zl->l_phys->l_hdr.lh_prefix_len > 0 &&
+ zl->l_phys->l_hdr.lh_prefix !=
+ hash >> (64 - zl->l_phys->l_hdr.lh_prefix_len))
+ return (EIO);
- /*
- * Hash within the chunk to find our entry.
- */
- int shift = (64 - ZAP_LEAF_HASH_SHIFT(&zl) -
- zl.l_phys->l_hdr.lh_prefix_len);
- int h = (hash >> shift) & ((1 << ZAP_LEAF_HASH_SHIFT(&zl)) - 1);
- h = zl.l_phys->l_hash[h];
- if (h == 0xffff)
- return (ENOENT);
- zc = &ZAP_LEAF_CHUNK(&zl, h);
- while (zc->l_entry.le_hash != hash) {
- if (zc->l_entry.le_next == 0xffff) {
- zc = 0;
+ rc = ENOENT;
+ for (chunkp = LEAF_HASH_ENTPTR(zl, hash);
+ *chunkp != CHAIN_END; chunkp = &le->le_next) {
+ zap_leaf_chunk_t *zc;
+ uint16_t chunk = *chunkp;
+
+ le = ZAP_LEAF_ENTRY(zl, chunk);
+ if (le->le_hash != hash)
+ continue;
+ zc = &ZAP_LEAF_CHUNK(zl, chunk);
+ if (fzap_name_equal(zl, zc, name)) {
+ if (zc->l_entry.le_value_intlen > integer_size) {
+ rc = EINVAL;
+ } else {
+ fzap_leaf_array(zl, zc, integer_size,
+ num_integers, value);
+ rc = 0;
+ }
break;
}
- zc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_next);
}
- if (fzap_name_equal(&zl, zc, name)) {
- if (zc->l_entry.le_value_intlen > integer_size)
- return (EINVAL);
+ return (rc);
+}
- fzap_leaf_array(&zl, zc, integer_size, num_integers, value);
- return (0);
- }
+/*
+ * Lookup a value in a fatzap directory.
+ */
+static int
+fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
+ const char *name, uint64_t integer_size, uint64_t num_integers,
+ void *value)
+{
+ int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ fat_zap_t z;
+ zap_leaf_t *zl;
+ uint64_t hash;
+ int rc;
- return (ENOENT);
+ if (zh->zap_magic != ZAP_MAGIC)
+ return (EIO);
+
+ if ((rc = fzap_check_size(integer_size, num_integers)) != 0)
+ return (rc);
+
+ z.zap_block_shift = ilog2(bsize);
+ z.zap_phys = zh;
+ z.zap_spa = spa;
+ z.zap_dnode = dnode;
+
+ hash = zap_hash(zh->zap_salt, name);
+ rc = zap_deref_leaf(&z, hash, &zl);
+ if (rc != 0)
+ return (rc);
+
+ rc = zap_leaf_lookup(zl, hash, name, integer_size, num_integers, value);
+
+ zap_leaf_free(zl);
+ return (rc);
}
/*
@@ -2560,74 +2627,79 @@ zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name,
uint64_t integer_size, uint64_t num_integers, void *value)
{
int rc;
- uint64_t zap_type;
+ zap_phys_t *zap;
size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- rc = dnode_read(spa, dnode, 0, zap_scratch, size);
+ zap = malloc(size);
+ if (zap == NULL)
+ return (ENOMEM);
+
+ rc = dnode_read(spa, dnode, 0, zap, size);
if (rc)
- return (rc);
+ goto done;
- zap_type = *(uint64_t *)zap_scratch;
- if (zap_type == ZBT_MICRO)
- return (mzap_lookup(dnode, name, value));
- else if (zap_type == ZBT_HEADER) {
- return (fzap_lookup(spa, dnode, name, integer_size,
- num_integers, value));
+ switch (zap->zap_block_type) {
+ case ZBT_MICRO:
+ rc = mzap_lookup((const mzap_phys_t *)zap, size, name, value);
+ break;
+ case ZBT_HEADER:
+ rc = fzap_lookup(spa, dnode, zap, name, integer_size,
+ num_integers, value);
+ break;
+ default:
+ printf("ZFS: invalid zap_type=%" PRIx64 "\n",
+ zap->zap_block_type);
+ rc = EIO;
}
- printf("ZFS: invalid zap_type=%d\n", (int)zap_type);
- return (EIO);
+done:
+ free(zap);
+ return (rc);
}
/*
- * List a microzap directory. Assumes that the zap scratch buffer contains
- * the directory contents.
+ * List a microzap directory.
*/
static int
-mzap_list(const dnode_phys_t *dnode, int (*callback)(const char *, uint64_t))
+mzap_list(const mzap_phys_t *mz, size_t size,
+ int (*callback)(const char *, uint64_t))
{
- const mzap_phys_t *mz;
const mzap_ent_phys_t *mze;
- size_t size;
int chunks, i, rc;
/*
* Microzap objects use exactly one block. Read the whole
* thing.
*/
- size = dnode->dn_datablkszsec * 512;
- mz = (const mzap_phys_t *) zap_scratch;
+ rc = 0;
chunks = size / MZAP_ENT_LEN - 1;
-
for (i = 0; i < chunks; i++) {
mze = &mz->mz_chunk[i];
if (mze->mze_name[0]) {
rc = callback(mze->mze_name, mze->mze_value);
if (rc != 0)
- return (rc);
+ break;
}
}
- return (0);
+ return (rc);
}
/*
- * List a fatzap directory. Assumes that the zap scratch buffer contains
- * the directory header.
+ * List a fatzap directory.
*/
static int
-fzap_list(const spa_t *spa, const dnode_phys_t *dnode,
+fzap_list(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
int (*callback)(const char *, uint64_t))
{
int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- zap_phys_t zh = *(zap_phys_t *)zap_scratch;
fat_zap_t z;
int i, j, rc;
- if (zh.zap_magic != ZAP_MAGIC)
+ if (zh->zap_magic != ZAP_MAGIC)
return (EIO);
z.zap_block_shift = ilog2(bsize);
- z.zap_phys = (zap_phys_t *)zap_scratch;
+ z.zap_phys = zh;
/*
* This assumes that the leaf blocks start at block 1. The
@@ -2635,15 +2707,19 @@ fzap_list(const spa_t *spa, const dnode_phys_t *dnode,
*/
zap_leaf_t zl;
zl.l_bs = z.zap_block_shift;
- for (i = 0; i < zh.zap_num_leafs; i++) {
+ zl.l_phys = malloc(bsize);
+ if (zl.l_phys == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < zh->zap_num_leafs; i++) {
off_t off = ((off_t)(i + 1)) << zl.l_bs;
char name[256], *p;
uint64_t value;
- if (dnode_read(spa, dnode, off, zap_scratch, bsize))
+ if (dnode_read(spa, dnode, off, zl.l_phys, bsize)) {
+ free(zl.l_phys);
return (EIO);
-
- zl.l_phys = (zap_leaf_phys_t *)zap_scratch;
+ }
for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
zap_leaf_chunk_t *zc, *nc;
@@ -2680,11 +2756,14 @@ fzap_list(const spa_t *spa, const dnode_phys_t *dnode,
/* printf("%s 0x%jx\n", name, (uintmax_t)value); */
rc = callback((const char *)name, value);
- if (rc != 0)
+ if (rc != 0) {
+ free(zl.l_phys);
return (rc);
+ }
}
}
+ free(zl.l_phys);
return (0);
}
@@ -2702,17 +2781,24 @@ static int zfs_printf(const char *name, uint64_t value __unused)
static int
zap_list(const spa_t *spa, const dnode_phys_t *dnode)
{
- uint64_t zap_type;
- size_t size = dnode->dn_datablkszsec * 512;
+ zap_phys_t *zap;
+ size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ int rc;
- if (dnode_read(spa, dnode, 0, zap_scratch, size))
- return (EIO);
+ zap = malloc(size);
+ if (zap == NULL)
+ return (ENOMEM);
- zap_type = *(uint64_t *)zap_scratch;
- if (zap_type == ZBT_MICRO)
- return (mzap_list(dnode, zfs_printf));
- else
- return (fzap_list(spa, dnode, zfs_printf));
+ rc = dnode_read(spa, dnode, 0, zap, size);
+ if (rc == 0) {
+ if (zap->zap_block_type == ZBT_MICRO)
+ rc = mzap_list((const mzap_phys_t *)zap, size,
+ zfs_printf);
+ else
+ rc = fzap_list(spa, dnode, zap, zfs_printf);
+ }
+ free(zap);
+ return (rc);
}
static int
@@ -2726,24 +2812,20 @@ objset_get_dnode(const spa_t *spa, const objset_phys_t *os, uint64_t objnum,
dnode, sizeof (dnode_phys_t)));
}
+/*
+ * Lookup a name in a microzap directory.
+ */
static int
-mzap_rlookup(const spa_t *spa __unused, const dnode_phys_t *dnode, char *name,
- uint64_t value)
+mzap_rlookup(const mzap_phys_t *mz, size_t size, char *name, uint64_t value)
{
- const mzap_phys_t *mz;
const mzap_ent_phys_t *mze;
- size_t size;
int chunks, i;
/*
* Microzap objects use exactly one block. Read the whole
* thing.
*/
- size = dnode->dn_datablkszsec * 512;
-
- mz = (const mzap_phys_t *)zap_scratch;
chunks = size / MZAP_ENT_LEN - 1;
-
for (i = 0; i < chunks; i++) {
mze = &mz->mz_chunk[i];
if (value == mze->mze_value) {
@@ -2781,19 +2863,19 @@ fzap_name_copy(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, char *name)
}
static int
-fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name,
- uint64_t value)
+fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh,
+ char *name, uint64_t value)
{
int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- zap_phys_t zh = *(zap_phys_t *)zap_scratch;
fat_zap_t z;
- int i, j;
+ uint64_t i;
+ int j, rc;
- if (zh.zap_magic != ZAP_MAGIC)
+ if (zh->zap_magic != ZAP_MAGIC)
return (EIO);
z.zap_block_shift = ilog2(bsize);
- z.zap_phys = (zap_phys_t *)zap_scratch;
+ z.zap_phys = zh;
/*
* This assumes that the leaf blocks start at block 1. The
@@ -2801,13 +2883,16 @@ fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name,
*/
zap_leaf_t zl;
zl.l_bs = z.zap_block_shift;
- for (i = 0; i < zh.zap_num_leafs; i++) {
- off_t off = ((off_t)(i + 1)) << zl.l_bs;
+ zl.l_phys = malloc(bsize);
+ if (zl.l_phys == NULL)
+ return (ENOMEM);
- if (dnode_read(spa, dnode, off, zap_scratch, bsize))
- return (EIO);
+ for (i = 0; i < zh->zap_num_leafs; i++) {
+ off_t off = ((off_t)(i + 1)) << zl.l_bs;
- zl.l_phys = (zap_leaf_phys_t *)zap_scratch;
+ rc = dnode_read(spa, dnode, off, zl.l_phys, bsize);
+ if (rc != 0)
+ goto done;
for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
zap_leaf_chunk_t *zc;
@@ -2821,31 +2906,39 @@ fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name,
if (fzap_leaf_value(&zl, zc) == value) {
fzap_name_copy(&zl, zc, name);
- return (0);
+ goto done;
}
}
}
- return (ENOENT);
+ rc = ENOENT;
+done:
+ free(zl.l_phys);
+ return (rc);
}
static int
zap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name,
uint64_t value)
{
+ zap_phys_t *zap;
+ size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
int rc;
- uint64_t zap_type;
- size_t size = dnode->dn_datablkszsec * 512;
- rc = dnode_read(spa, dnode, 0, zap_scratch, size);
- if (rc)
- return (rc);
+ zap = malloc(size);
+ if (zap == NULL)
+ return (ENOMEM);
- zap_type = *(uint64_t *)zap_scratch;
- if (zap_type == ZBT_MICRO)
- return (mzap_rlookup(spa, dnode, name, value));
- else
- return (fzap_rlookup(spa, dnode, name, value));
+ rc = dnode_read(spa, dnode, 0, zap, size);
+ if (rc == 0) {
+ if (zap->zap_block_type == ZBT_MICRO)
+ rc = mzap_rlookup((const mzap_phys_t *)zap, size,
+ name, value);
+ else
+ rc = fzap_rlookup(spa, dnode, zap, name, value);
+ }
+ free(zap);
+ return (rc);
}
static int
@@ -2997,10 +3090,12 @@ int
zfs_callback_dataset(const spa_t *spa, uint64_t objnum,
int (*callback)(const char *, uint64_t))
{
- uint64_t dir_obj, child_dir_zapobj, zap_type;
+ uint64_t dir_obj, child_dir_zapobj;
dnode_phys_t child_dir_zap, dir, dataset;
dsl_dataset_phys_t *ds;
dsl_dir_phys_t *dd;
+ zap_phys_t *zap;
+ size_t size;
int err;
err = objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset);
@@ -3026,16 +3121,24 @@ zfs_callback_dataset(const spa_t *spa, uint64_t objnum,
return (err);
}
- err = dnode_read(spa, &child_dir_zap, 0, zap_scratch,
- child_dir_zap.dn_datablkszsec * 512);
- if (err != 0)
- return (err);
+ size = child_dir_zap.dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ zap = malloc(size);
+ if (zap != NULL) {
+ err = dnode_read(spa, &child_dir_zap, 0, zap, size);
+ if (err != 0)
+ goto done;
- zap_type = *(uint64_t *)zap_scratch;
- if (zap_type == ZBT_MICRO)
- return (mzap_list(&child_dir_zap, callback));
- else
- return (fzap_list(spa, &child_dir_zap, callback));
+ if (zap->zap_block_type == ZBT_MICRO)
+ err = mzap_list((const mzap_phys_t *)zap, size,
+ callback);
+ else
+ err = fzap_list(spa, &child_dir_zap, zap, callback);
+ } else {
+ err = ENOMEM;
+ }
+done:
+ free(zap);
+ return (err);
}
/*
@@ -3166,7 +3269,8 @@ static int
check_mos_features(const spa_t *spa)
{
dnode_phys_t dir;
- uint64_t objnum, zap_type;
+ zap_phys_t *zap;
+ uint64_t objnum;
size_t size;
int rc;
@@ -3190,16 +3294,22 @@ check_mos_features(const spa_t *spa)
if (dir.dn_type != DMU_OTN_ZAP_METADATA)
return (EIO);
- size = dir.dn_datablkszsec * 512;
- if (dnode_read(spa, &dir, 0, zap_scratch, size))
+ size = dir.dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ zap = malloc(size);
+ if (zap == NULL)
+ return (ENOMEM);
+
+ if (dnode_read(spa, &dir, 0, zap, size)) {
+ free(zap);
return (EIO);
+ }
- zap_type = *(uint64_t *)zap_scratch;
- if (zap_type == ZBT_MICRO)
- rc = mzap_list(&dir, check_feature);
+ if (zap->zap_block_type == ZBT_MICRO)
+ rc = mzap_list((const mzap_phys_t *)zap, size, check_feature);
else
- rc = fzap_list(spa, &dir, check_feature);
+ rc = fzap_list(spa, &dir, zap, check_feature);
+ free(zap);
return (rc);
}
diff --git a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h
index c57181b670..4993ca754c 100644
--- a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h
+++ b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h
@@ -1339,8 +1339,7 @@ typedef struct dsl_dataset_phys {
#define ZAP_HASHBITS 28
#define MZAP_ENT_LEN 64
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
-#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
-#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
+#define MZAP_MAX_BLKSZ SPA_OLD_MAXBLOCKSIZE
typedef struct mzap_ent_phys {
uint64_t mze_value;
@@ -1352,7 +1351,8 @@ typedef struct mzap_ent_phys {
typedef struct mzap_phys {
uint64_t mz_block_type; /* ZBT_MICRO */
uint64_t mz_salt;
- uint64_t mz_pad[6];
+ uint64_t mz_normflags;
+ uint64_t mz_pad[5];
mzap_ent_phys_t mz_chunk[1];
/* actually variable size depending on block size */
} mzap_phys_t;
@@ -1409,6 +1409,8 @@ typedef struct zap_phys {
uint64_t zap_num_leafs; /* number of leafs */
uint64_t zap_num_entries; /* number of entries */
uint64_t zap_salt; /* salt to stir into hash function */
+ uint64_t zap_normflags; /* flags for u8_textprep_str() */
+ uint64_t zap_flags; /* zap_flags_t */
/*
* This structure is followed by padding, and then the embedded
* pointer table. The embedded pointer table takes up second
@@ -1419,9 +1421,12 @@ typedef struct zap_phys {
typedef struct zap_table_phys zap_table_phys_t;
+struct spa;
typedef struct fat_zap {
int zap_block_shift; /* block size shift */
zap_phys_t *zap_phys;
+ const struct spa *zap_spa;
+ const dnode_phys_t *zap_dnode;
} fat_zap_t;
#define ZAP_LEAF_MAGIC 0x2AB1EAF
diff --git a/usr/src/cmd/dfs.cmds/sharemgr/commands.c b/usr/src/cmd/dfs.cmds/sharemgr/commands.c
index 79be97f149..06b8fb54f0 100644
--- a/usr/src/cmd/dfs.cmds/sharemgr/commands.c
+++ b/usr/src/cmd/dfs.cmds/sharemgr/commands.c
@@ -22,7 +22,11 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ */
+
+/*
* Copyright 2012 Milan Jurik. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.
* Copyright 2019, Joyent, Inc.
*/
@@ -2167,7 +2171,6 @@ static void
show_group(sa_group_t group, int verbose, int properties, char *proto,
char *subgroup)
{
- sa_share_t share;
char *groupname;
char *zfs = NULL;
int iszfs = 0;
@@ -2175,6 +2178,8 @@ show_group(sa_group_t group, int verbose, int properties, char *proto,
groupname = sa_get_group_attr(group, "name");
if (groupname != NULL) {
+ sa_share_t share;
+
if (proto != NULL && !has_protocol(group, proto)) {
sa_free_attr_string(groupname);
return;
@@ -2191,7 +2196,7 @@ show_group(sa_group_t group, int verbose, int properties, char *proto,
iszfs = 1;
sa_free_attr_string(zfs);
}
- share = sa_get_share(group, NULL);
+
if (subgroup == NULL)
(void) printf("%s", groupname);
else
diff --git a/usr/src/cmd/dumpadm/Makefile b/usr/src/cmd/dumpadm/Makefile
index 0734535b29..c6b80188fa 100644
--- a/usr/src/cmd/dumpadm/Makefile
+++ b/usr/src/cmd/dumpadm/Makefile
@@ -20,7 +20,7 @@
#
#
# Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
PROG = dumpadm
@@ -36,12 +36,13 @@ ROOTETCFILES= $(ETCFILES:%=$(ROOTETC)/%)
include ../Makefile.cmd
CFLAGS += $(CCVERBOSE)
+CFLAGS += -I../../lib/libzutil/common
FILEMODE = 0555
ROOTMANIFESTDIR = $(ROOTSVCSYSTEM)
-LDLIBS += -ldiskmgt -lzfs -luuid
+LDLIBS += -ldiskmgt -lzfs -luuid -lzutil
.KEEP_STATE:
diff --git a/usr/src/cmd/dumpadm/dconf.c b/usr/src/cmd/dumpadm/dconf.c
index 5a1da87148..6e549afaa7 100644
--- a/usr/src/cmd/dumpadm/dconf.c
+++ b/usr/src/cmd/dumpadm/dconf.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
#include <sys/types.h>
@@ -39,6 +39,7 @@
#include <errno.h>
#include <libdiskmgt.h>
#include <libzfs.h>
+#include <libzutil.h>
#include <uuid/uuid.h>
#include "dconf.h"
diff --git a/usr/src/cmd/fs.d/nfs/Makefile b/usr/src/cmd/fs.d/nfs/Makefile
index 7f9ae26ae2..d18ddec4dd 100644
--- a/usr/src/cmd/fs.d/nfs/Makefile
+++ b/usr/src/cmd/fs.d/nfs/Makefile
@@ -23,6 +23,8 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+#
# cmd/fs.d/nfs/Makefile
#
# cmd/fs.d/nfs is the directory of all nfs specific commands
@@ -33,7 +35,7 @@ include $(SRC)/Makefile.master
SUBDIR1= exportfs nfsd rquotad \
statd nfsstat mountd dfshares \
- nfsfind nfs4cbd share tests
+ nfsfind nfs4cbd share tests dtrace
# These do "make catalog"
SUBDIR2= clear_locks lockd umount showmount \
@@ -52,14 +54,13 @@ all:= TARGET= all
install:= TARGET= install
clean:= TARGET= clean
clobber:= TARGET= clobber
-lint:= TARGET= lint
catalog:= TARGET= catalog
.KEEP_STATE:
.PARALLEL: $(SUBDIRS)
-all install clean clobber lint: $(SUBDIRS)
+all install clean clobber: $(SUBDIRS)
catalog: $(SUBDIR2)
$(RM) $(POFILE)
diff --git a/usr/src/cmd/fs.d/nfs/dtrace/Makefile b/usr/src/cmd/fs.d/nfs/dtrace/Makefile
new file mode 100644
index 0000000000..0882ae8b9d
--- /dev/null
+++ b/usr/src/cmd/fs.d/nfs/dtrace/Makefile
@@ -0,0 +1,37 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+#
+
+SRCS=nfs-trace.d nfs-time.d
+
+include $(SRC)/cmd/Makefile.cmd
+
+ROOTNFSDTRACEDIR = $(ROOTLIB)/nfs/dtrace
+ROOTNFSDTRACEFILE = $(SRCS:%=$(ROOTNFSDTRACEDIR)/%)
+
+$(ROOTNFSDTRACEFILE):= FILEMODE = 0555
+
+$(ROOTNFSDTRACEDIR):
+ $(INS.dir)
+
+$(ROOTNFSDTRACEDIR)/%: %
+ $(INS.file)
+
+all:
+
+clean:
+
+include $(SRC)/cmd/Makefile.targ
+
+install: all $(ROOTNFSDTRACEDIR) .WAIT $(ROOTNFSDTRACEFILE)
diff --git a/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d b/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d
new file mode 100755
index 0000000000..7d7c33d153
--- /dev/null
+++ b/usr/src/cmd/fs.d/nfs/dtrace/nfs-time.d
@@ -0,0 +1,78 @@
+#!/usr/sbin/dtrace -s
+
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * Quantize the time spent in each NFSv3 andf NFSv4 operation,
+ * optionally for a specified client, share and zone.
+ *
+ * Usage: nfs-time.d [<client ip>|all [<share path>|all] [<zone id>]]]
+ *
+ * example: nfs_time.d 192.168.123.1 /mypool/fs1 0
+ *
+ * It is valid to specify <client ip> or <share path> as "all"
+ * to quantize data for all clients and/or all shares.
+ * Omitting <zone id> will quantize data for all zones.
+ */
+
+#pragma D option flowindent
+#pragma D option defaultargs
+
+dtrace:::BEGIN
+{
+ all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0;
+ all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0;
+ all_zones = ($$3 == NULL) ? 1 : 0;
+
+ client = $$1;
+ share = $$2;
+ zoneid = $3;
+
+ printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp,
+ (all_clients) ? "all" : client,
+ (all_shares) ? "all" : share,
+ (all_zones) ? "all" : $$3);
+}
+
+nfsv3:::op-*-start,
+nfsv4:::op-*-start
+{
+ self->ts[probefunc] = timestamp;
+}
+
+nfsv3:::op-*-done,
+nfsv4:::op-*-done
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ elapsed = (timestamp - self->ts[probefunc]);
+ @q[probefunc]=quantize(elapsed);
+}
+
+tick-5s
+{
+ printa(@q);
+ /*
+ * uncomment "clear" to quantize per 5s interval
+ * rather than cumulative for duration of script.
+ * clear(@q);
+ */
+}
+
+dtrace:::END
+{
+}
diff --git a/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d b/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d
new file mode 100755
index 0000000000..a89fed3424
--- /dev/null
+++ b/usr/src/cmd/fs.d/nfs/dtrace/nfs-trace.d
@@ -0,0 +1,248 @@
+#!/usr/sbin/dtrace -s
+
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * Print input and output values for each NFSv3 andf NFSv4 operation,
+ * optionally for a specified client, share and zone.
+ *
+ * Usage: nfs-trace.d [<client ip>|all [<share path>|all] [<zone id>]]]
+ *
+ * example: nfs_trace.d 192.168.123.1 /mypool/fs1 0
+ *
+ * It is valid to specify <client ip> or <share path> as "all"
+ * to quantize data for all clients and/or all shares.
+ * Omitting <zone id> will quantize data for all zones.
+ */
+
+/*
+ * Unfortunately, trying to write this script using wildcards, for example:
+ * nfsv3:::op-*-start {}
+ * nfsv3:::op-*-done {}
+ * prints the operation-specific args[2] structure as the incorrect type.
+ * Until this is resolved it is necessary to explicitly list each operation.
+ *
+ * See nfs-time.d for an example of using the wildcard format when there are
+ * no operation-specific args (args[2]) being traced.
+ */
+
+#pragma D option flowindent
+#pragma D option defaultargs
+
+dtrace:::BEGIN
+{
+ all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0;
+ all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0;
+ all_zones = ($$3 == NULL) ? 1 : 0;
+
+ client = $$1;
+ share = $$2;
+ zoneid = $3;
+
+ printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp,
+ (all_clients) ? "all" : client,
+ (all_shares) ? "all" : share,
+ (all_zones) ? "all" : $$3);
+}
+
+nfsv3:::op-getattr-start,
+nfsv3:::op-setattr-start,
+nfsv3:::op-lookup-start,
+nfsv3:::op-access-start,
+nfsv3:::op-commit-start,
+nfsv3:::op-create-start,
+nfsv3:::op-fsinfo-start,
+nfsv3:::op-fsstat-start,
+nfsv3:::op-link-start,
+nfsv3:::op-mkdir-start,
+nfsv3:::op-mknod-start,
+nfsv3:::op-pathconf-start,
+nfsv3:::op-read-start,
+nfsv3:::op-readdir-start,
+nfsv3:::op-readdirplus-start,
+nfsv3:::op-readlink-start,
+nfsv3:::op-remove-start,
+nfsv3:::op-rename-start,
+nfsv3:::op-rmdir-start,
+nfsv3:::op-symlink-start,
+nfsv3:::op-write-start
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ printf("\n");
+ print(*args[0]);
+ printf("\n");
+ print(*args[1]);
+ printf("\n");
+ print(*args[2]);
+ printf("\n");
+}
+
+nfsv3:::op-getattr-done,
+nfsv3:::op-setattr-done,
+nfsv3:::op-lookup-done,
+nfsv3:::op-access-done,
+nfsv3:::op-commit-done,
+nfsv3:::op-create-done,
+nfsv3:::op-fsinfo-done,
+nfsv3:::op-fsstat-done,
+nfsv3:::op-link-done,
+nfsv3:::op-mkdir-done,
+nfsv3:::op-mknod-done,
+nfsv3:::op-pathconf-done,
+nfsv3:::op-read-done,
+nfsv3:::op-readdir-done,
+nfsv3:::op-readdirplus-done,
+nfsv3:::op-readlink-done,
+nfsv3:::op-remove-done,
+nfsv3:::op-rename-done,
+nfsv3:::op-rmdir-done,
+nfsv3:::op-symlink-done,
+nfsv3:::op-write-done
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ /*
+ printf("\n");
+ print(*args[0]);
+ printf("\n");
+ print(*args[1]);
+ */
+ printf("\n");
+ print(*args[2]);
+ printf("\n");
+}
+
+nfsv4:::op-access-start,
+nfsv4:::op-close-start,
+nfsv4:::op-commit-start,
+nfsv4:::op-create-start,
+nfsv4:::op-delegpurge-start,
+nfsv4:::op-delegreturn-start,
+nfsv4:::op-getattr-start,
+nfsv4:::op-link-start,
+nfsv4:::op-lock-start,
+nfsv4:::op-lockt-start,
+nfsv4:::op-locku-start,
+nfsv4:::op-lookup-start,
+nfsv4:::op-nverify-start,
+nfsv4:::op-open-start,
+nfsv4:::op-open-confirm-start,
+nfsv4:::op-open-downgrade-start,
+nfsv4:::op-openattr-start,
+nfsv4:::op-putfh-start,
+nfsv4:::op-read-start,
+nfsv4:::op-readdir-start,
+nfsv4:::op-release-lockowner-start,
+nfsv4:::op-remove-start,
+nfsv4:::op-rename-start,
+nfsv4:::op-renew-start,
+nfsv4:::op-secinfo-start,
+nfsv4:::op-setattr-start,
+nfsv4:::op-setclientid-start,
+nfsv4:::op-setclientid-confirm-start,
+nfsv4:::op-verify-start,
+nfsv4:::op-write-start
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ printf("\n");
+ print(*args[0]);
+ printf("\n");
+ print(*args[1]);
+ printf("\n");
+ print(*args[2]);
+ printf("\n");
+}
+
+/* These operations do not have args[2] */
+nfsv4:::op-getfh-start,
+nfsv4:::op-lookupp-start,
+nfsv4:::op-putpubfh-start,
+nfsv4:::op-putrootfh-start,
+nfsv4:::op-readlink-start,
+nfsv4:::op-restorefh-start,
+nfsv4:::op-savefh-start
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ printf("\n");
+ print(*args[0]);
+ printf("\n");
+ print(*args[1]);
+ printf("\n");
+}
+
+
+nfsv4:::op-access-done,
+nfsv4:::op-close-done,
+nfsv4:::op-commit-done,
+nfsv4:::op-create-done,
+nfsv4:::op-delegpurge-done,
+nfsv4:::op-delegreturn-done,
+nfsv4:::op-getattr-done,
+nfsv4:::op-getfh-done,
+nfsv4:::op-link-done,
+nfsv4:::op-lock-done,
+nfsv4:::op-lockt-done,
+nfsv4:::op-locku-done,
+nfsv4:::op-lookup-done,
+nfsv4:::op-lookupp-done,
+nfsv4:::op-nverify-done,
+nfsv4:::op-open-done,
+nfsv4:::op-open-confirm-done,
+nfsv4:::op-open-downgrade-done,
+nfsv4:::op-openattr-done,
+nfsv4:::op-putfh-done,
+nfsv4:::op-putpubfh-done,
+nfsv4:::op-putrootfh-done,
+nfsv4:::op-read-done,
+nfsv4:::op-readdir-done,
+nfsv4:::op-readlink-done,
+nfsv4:::op-release-lockowner-done,
+nfsv4:::op-remove-done,
+nfsv4:::op-rename-done,
+nfsv4:::op-renew-done,
+nfsv4:::op-restorefh-done,
+nfsv4:::op-savefh-done,
+nfsv4:::op-secinfo-done,
+nfsv4:::op-setattr-done,
+nfsv4:::op-setclientid-done,
+nfsv4:::op-setclientid-confirm-done,
+nfsv4:::op-verify-done,
+nfsv4:::op-write-done
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->noi_shrpath == share)) &&
+ ((all_zones) || (args[1]->noi_zoneid == zoneid)) /
+{
+ /*
+ printf("\n");
+ print(*args[0]);
+ printf("\n");
+ print(*args[1]);
+ */
+ printf("\n");
+ print(*args[2]);
+ printf("\n");
+}
+
+dtrace:::END
+{
+}
diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server
index 5c8c1a67dd..c2cb8069ed 100644
--- a/usr/src/cmd/fs.d/nfs/svc/nfs-server
+++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server
@@ -22,8 +22,8 @@
#
# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
# Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
+# Copyright 2018 Nexenta Systems, Inc. All rights reserved.
#
# Start/stop processes required for server NFS
@@ -53,8 +53,7 @@ configure_ipfilter()
#
# Nothing to do if:
- # - service's policy is 'use_global'
- # - ipfilter isn't online
+ # - ipfilter isn't online
# - global policy is 'custom'
#
[ "`get_policy $SMF_FMRI`" = "use_global" ] && return 0
@@ -66,14 +65,6 @@ configure_ipfilter()
case "$1" in
'start')
- # The NFS server is not supported in a local zone
- if smf_is_nonglobalzone; then
- /usr/sbin/svcadm disable -t svc:/network/nfs/server
- echo "The NFS server is not supported in a local zone"
- sleep 5 &
- exit $SMF_EXIT_OK
- fi
-
# Share all file systems enabled for sharing. sharemgr understands
# regular shares and ZFS shares and will handle both. Technically,
# the shares would have been started long before getting here since
diff --git a/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml b/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml
index b5165e7ad5..957ff00856 100644
--- a/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml
+++ b/usr/src/cmd/fs.d/nfs/svc/nlockmgr.xml
@@ -22,6 +22,8 @@
Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
NOTE: This service manifest is not editable; its contents will
be overwritten by package or patch operations, including
@@ -65,7 +67,7 @@
grouping='require_all'
restart_on='error'
type='service'>
- <service_fmri value='svc:/system/filesystem/minimal' />
+ <service_fmri value='svc:/system/filesystem/minimal' />
</dependency>
<exec_method
@@ -93,7 +95,7 @@
</property_group>
<instance name='default' enabled='false'>
<property_group name='nfs-props' type='com.oracle.nfs,props'>
- <propval name='grace_period' type='integer' value='90'/>
+ <propval name='grace_period' type='integer' value='60'/>
<propval name='lockd_listen_backlog' type='integer' value='32'/>
<propval name='lockd_retransmit_timeout' type='integer' value='5'/>
<propval name='lockd_servers' type='integer' value='256'/>
diff --git a/usr/src/cmd/fs.d/zfs/fstyp/Makefile b/usr/src/cmd/fs.d/zfs/fstyp/Makefile
index ba28b9e39f..dfcdddd812 100644
--- a/usr/src/cmd/fs.d/zfs/fstyp/Makefile
+++ b/usr/src/cmd/fs.d/zfs/fstyp/Makefile
@@ -22,12 +22,12 @@
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
+# Copyright 2020 Joyent, Inc.
#
FSTYP_VERS=1
FSTYPE= zfs
-LIBPROG= fstyp.so.${FSTYP_VERS}
+LIBPROG= fstyp.so.${FSTYP_VERS}
include ../../../../lib/Makefile.lib
include ../../Makefile.fstype
@@ -36,16 +36,17 @@ include ../../Makefile.fstype
MAPFILES =
CFLAGS += $(C_PICFLAGS)
+CFLAGS += -I../../../../lib/libzutil/common
DYNLIB= $(LIBPROG)
-LDLIBS += -lnvpair -lzfs -lc
+LDLIBS += -lnvpair -lc -lzutil
LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_NAME_DEF_NOT_USED2
LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN -erroff=E_NAME_DEF_NOT_USED2
SRCS= ${LIBPROG:%.so.$(FSTYP_VERS)=%.c}
-CPPFLAGS += -DFSTYP_VERS=${FSTYP_VERS} -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+CPPFLAGS += -DFSTYP_VERS=${FSTYP_VERS} -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
#
# Override PMAP dependency
@@ -76,7 +77,7 @@ lint: lint_SRCS
cstyle:
$(CSTYLE) $(SRCS)
-clean:
+clean:
${RM} $(LIBPROG)
clobber: clean
diff --git a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
index 30f86375e7..d49d998404 100644
--- a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
+++ b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
@@ -21,10 +21,10 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2020 Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* libfstyp module for zfs
*/
@@ -38,6 +38,7 @@
#include <string.h>
#include <libnvpair.h>
#include <libzfs.h>
+#include <libzutil.h>
#include <libfstyp_module.h>
#include <errno.h>
@@ -89,7 +90,7 @@ fstyp_mod_ident(fstyp_mod_handle_t handle)
uint64_t u64;
char buf[64];
- if (zpool_read_label(h->fd, &h->config) != 0) {
+ if (zpool_read_label(h->fd, &h->config, NULL) != 0) {
return (FSTYP_ERR_NO_MATCH);
}
diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
index e93076d9ce..968f0b1e87 100644
--- a/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
+++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Kstat/Kstat.xs
@@ -23,6 +23,7 @@
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Racktop Systems.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 Peter Tribble.
*/
/*
@@ -85,9 +86,6 @@
/* Ultra-specific kstat includes */
#ifdef __sparc
#include <vm/hat_sfmmu.h> /* from /usr/platform/sun4u/include */
-#include <sys/simmstat.h> /* from /usr/platform/sun4u/include */
-#include <sys/sysctrl.h> /* from /usr/platform/sun4u/include */
-#include <sys/fhc.h> /* from /usr/include */
#endif
/*
@@ -548,169 +546,6 @@ save_sfmmu_tsbsize_stat(HV *self, kstat_t *kp, int strip_str)
#endif
/*
- * Definition in /usr/platform/sun4u/include/sys/simmstat.h
- */
-
-#ifdef __sparc
-static void
-save_simmstat(HV *self, kstat_t *kp, int strip_str)
-{
- uchar_t *simmstatp;
- SV *list;
- int i;
-
- /* PERL_ASSERT(kp->ks_ndata == 1); */
- PERL_ASSERT(kp->ks_data_size == sizeof (uchar_t) * SIMM_COUNT);
-
- list = newSVpv("", 0);
- for (i = 0, simmstatp = (uchar_t *)(kp->ks_data);
- i < SIMM_COUNT - 1; i++, simmstatp++) {
- sv_catpvf(list, "%d,", *simmstatp);
- }
- sv_catpvf(list, "%d", *simmstatp);
- hv_store(self, "status", 6, list, 0);
-}
-#endif
-
-/*
- * Used by save_temperature to make CSV lists from arrays of
- * short temperature values
- */
-
-#ifdef __sparc
-static SV *
-short_array_to_SV(short *shortp, int len)
-{
- SV *list;
-
- list = newSVpv("", 0);
- for (; len > 1; len--, shortp++) {
- sv_catpvf(list, "%d,", *shortp);
- }
- sv_catpvf(list, "%d", *shortp);
- return (list);
-}
-
-/*
- * Definition in /usr/platform/sun4u/include/sys/fhc.h
- */
-
-static void
-save_temperature(HV *self, kstat_t *kp, int strip_str)
-{
- struct temp_stats *tempsp;
-
- /* PERL_ASSERT(kp->ks_ndata == 1); */
- PERL_ASSERT(kp->ks_data_size == sizeof (struct temp_stats));
- tempsp = (struct temp_stats *)(kp->ks_data);
-
- SAVE_UINT32(self, tempsp, index);
- hv_store(self, "l1", 2, short_array_to_SV(tempsp->l1, L1_SZ), 0);
- hv_store(self, "l2", 2, short_array_to_SV(tempsp->l2, L2_SZ), 0);
- hv_store(self, "l3", 2, short_array_to_SV(tempsp->l3, L3_SZ), 0);
- hv_store(self, "l4", 2, short_array_to_SV(tempsp->l4, L4_SZ), 0);
- hv_store(self, "l5", 2, short_array_to_SV(tempsp->l5, L5_SZ), 0);
- SAVE_INT32(self, tempsp, max);
- SAVE_INT32(self, tempsp, min);
- SAVE_INT32(self, tempsp, state);
- SAVE_INT32(self, tempsp, temp_cnt);
- SAVE_INT32(self, tempsp, shutdown_cnt);
- SAVE_INT32(self, tempsp, version);
- SAVE_INT32(self, tempsp, trend);
- SAVE_INT32(self, tempsp, override);
-}
-#endif
-
-/*
- * Not actually defined anywhere - just a short. Yuck.
- */
-
-#ifdef __sparc
-static void
-save_temp_over(HV *self, kstat_t *kp, int strip_str)
-{
- short *shortp;
-
- /* PERL_ASSERT(kp->ks_ndata == 1); */
- PERL_ASSERT(kp->ks_data_size == sizeof (short));
-
- shortp = (short *)(kp->ks_data);
- hv_store(self, "override", 8, newSViv(*shortp), 0);
-}
-#endif
-
-/*
- * Defined in /usr/platform/sun4u/include/sys/sysctrl.h
- * (Well, sort of. Actually there's no structure, just a list of #defines
- * enumerating *some* of the array indexes.)
- */
-
-#ifdef __sparc
-static void
-save_ps_shadow(HV *self, kstat_t *kp, int strip_str)
-{
- uchar_t *ucharp;
-
- /* PERL_ASSERT(kp->ks_ndata == 1); */
- PERL_ASSERT(kp->ks_data_size == SYS_PS_COUNT);
-
- ucharp = (uchar_t *)(kp->ks_data);
- hv_store(self, "core_0", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_1", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_2", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_3", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_4", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_5", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_6", 6, newSViv(*ucharp++), 0);
- hv_store(self, "core_7", 6, newSViv(*ucharp++), 0);
- hv_store(self, "pps_0", 5, newSViv(*ucharp++), 0);
- hv_store(self, "clk_33", 6, newSViv(*ucharp++), 0);
- hv_store(self, "clk_50", 6, newSViv(*ucharp++), 0);
- hv_store(self, "v5_p", 4, newSViv(*ucharp++), 0);
- hv_store(self, "v12_p", 5, newSViv(*ucharp++), 0);
- hv_store(self, "v5_aux", 6, newSViv(*ucharp++), 0);
- hv_store(self, "v5_p_pch", 8, newSViv(*ucharp++), 0);
- hv_store(self, "v12_p_pch", 9, newSViv(*ucharp++), 0);
- hv_store(self, "v3_pch", 6, newSViv(*ucharp++), 0);
- hv_store(self, "v5_pch", 6, newSViv(*ucharp++), 0);
- hv_store(self, "p_fan", 5, newSViv(*ucharp++), 0);
-}
-#endif
-
-/*
- * Definition in /usr/platform/sun4u/include/sys/fhc.h
- */
-
-#ifdef __sparc
-static void
-save_fault_list(HV *self, kstat_t *kp, int strip_str)
-{
- struct ft_list *faultp;
- int i;
- char name[KSTAT_STRLEN + 7]; /* room for 999999 faults */
-
- /* PERL_ASSERT(kp->ks_ndata == 1); */
- /* PERL_ASSERT(kp->ks_data_size == sizeof (struct ft_list)); */
-
- for (i = 1, faultp = (struct ft_list *)(kp->ks_data);
- i <= 999999 && i <= kp->ks_data_size / sizeof (struct ft_list);
- i++, faultp++) {
- (void) snprintf(name, sizeof (name), "unit_%d", i);
- hv_store(self, name, strlen(name), newSViv(faultp->unit), 0);
- (void) snprintf(name, sizeof (name), "type_%d", i);
- hv_store(self, name, strlen(name), newSViv(faultp->type), 0);
- (void) snprintf(name, sizeof (name), "fclass_%d", i);
- hv_store(self, name, strlen(name), newSViv(faultp->fclass), 0);
- (void) snprintf(name, sizeof (name), "create_time_%d", i);
- hv_store(self, name, strlen(name),
- NEW_UV(faultp->create_time), 0);
- (void) snprintf(name, sizeof (name), "msg_%d", i);
- hv_store(self, name, strlen(name), newSVpv(faultp->msg, 0), 0);
- }
-}
-#endif
-
-/*
* We need to be able to find the function corresponding to a particular raw
* kstat. To do this we ignore the instance and glue the module and name
* together to form a composite key. We can then use the data in the kstat
@@ -721,7 +556,7 @@ save_fault_list(HV *self, kstat_t *kp, int strip_str)
* Note that some kstats include the instance number as part of the module
* and/or name. This could be construed as a bug. However, to work around this
* we omit any digits from the module and name as we build the table in
- * build_raw_kstat_loopup(), and we remove any digits from the module and name
+ * build_raw_kstat_lookup(), and we remove any digits from the module and name
* when we look up the functions in lookup_raw_kstat_fn()
*/
@@ -747,11 +582,6 @@ build_raw_kstat_lookup()
"unix:sfmmu_global_stat");
SAVE_FNP(raw_kstat_lookup, save_sfmmu_tsbsize_stat,
"unix:sfmmu_tsbsize_stat");
- SAVE_FNP(raw_kstat_lookup, save_simmstat, "unix:simm-status");
- SAVE_FNP(raw_kstat_lookup, save_temperature, "unix:temperature");
- SAVE_FNP(raw_kstat_lookup, save_temp_over, "unix:temperature override");
- SAVE_FNP(raw_kstat_lookup, save_ps_shadow, "unix:ps_shadow");
- SAVE_FNP(raw_kstat_lookup, save_fault_list, "unix:fault_list");
#endif
}
diff --git a/usr/src/cmd/smbsrv/dtrace/smb-trace.d b/usr/src/cmd/smbsrv/dtrace/smb-trace.d
index 32162dca2b..40b3cba219 100644
--- a/usr/src/cmd/smbsrv/dtrace/smb-trace.d
+++ b/usr/src/cmd/smbsrv/dtrace/smb-trace.d
@@ -1,3 +1,4 @@
+#!/usr/sbin/dtrace -s
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
@@ -10,7 +11,7 @@
*/
/*
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -24,9 +25,38 @@
* args[2] smb_name_args_t
* args[2] smb_open_args_t
* args[2] smb_rw_args_t
+ *
+ * Usage: smb-trace.d [<client ip>|all [<share path>|all] [<zone id>]]]
+ *
+ * example: smb_trace.d 192.168.012.001 mypool_fs1 0
+ *
+ * It is valid to specify <client ip> or <share path> as "all" to
+ * print data for all clients and/or all shares.
+ * Omitting <zone id> will print data for all zones.
*/
+#pragma D option defaultargs
+
+dtrace:::BEGIN
+{
+ all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0;
+ all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0;
+ all_zones = ($$3 == NULL) ? 1 : 0;
+
+ client = $$1;
+ share = $$2;
+ zoneid = $3;
+
+ printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp,
+ (all_clients) ? "all" : client,
+ (all_shares) ? "all" : share,
+ (all_zones) ? "all" : $$3);
+}
+
smb:::op-*-start
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->soi_share == share)) &&
+ ((all_zones) || (args[1]->soi_zoneid == zoneid)) /
{
printf("clnt=%s mid=0x%x uid=0x%x tid=0x%x\n",
args[0]->ci_remote,
@@ -36,9 +66,16 @@ smb:::op-*-start
}
smb:::op-*-done
+/ ((all_clients) || (args[0]->ci_remote == client)) &&
+ ((all_shares) || (args[1]->soi_share == share)) &&
+ ((all_zones) || (args[1]->soi_zoneid == zoneid)) /
{
printf("clnt=%s mid=0x%x status=0x%x\n",
args[0]->ci_remote,
args[1]->soi_mid,
args[1]->soi_status);
}
+
+dtrace:::END
+{
+}
diff --git a/usr/src/cmd/smbsrv/dtrace/smb2-trace.d b/usr/src/cmd/smbsrv/dtrace/smb2-trace.d
index 4e83216785..409d878dea 100644
--- a/usr/src/cmd/smbsrv/dtrace/smb2-trace.d
+++ b/usr/src/cmd/smbsrv/dtrace/smb2-trace.d
@@ -1,3 +1,4 @@
+#!/usr/sbin/dtrace -s
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
@@ -10,7 +11,7 @@
*/
/*
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -23,9 +24,38 @@
* Some also provide one of: (not used here)
* args[2] smb_open_args_t
* args[2] smb_rw_args_t
+ *
+ * Usage: smb2-trace.d [<client ip>|all [<share path>|all] [<zone id>]]]
+ *
+ * example: smb2_trace.d 192.168.012.001 mypool_fs1 0
+ *
+ * It is valid to specify <client ip> or <share path> as "all" to
+ * print data for all clients and/or all shares.
+ * Omitting <zone id> will print data for all zones.
*/
+#pragma D option defaultargs
+
+dtrace:::BEGIN
+{
+ all_clients = (($$1 == NULL) || ($$1 == "all")) ? 1 : 0;
+ all_shares = (($$2 == NULL) || ($$2 == "all")) ? 1 : 0;
+ all_zones = ($$3 == NULL) ? 1 : 0;
+
+ client = $$1;
+ share = $$2;
+ zoneid = $3;
+
+ printf("%Y - client=%s share=%s zone=%s)\n", walltimestamp,
+ (all_clients) ? "all" : client,
+ (all_shares) ? "all" : share,
+ (all_zones) ? "all" : $$3);
+}
+
smb2:::op-*-start
+/ ((all_clients == 1) || (args[0]->ci_remote == client)) &&
+ ((all_shares == 1) || (args[1]->soi_share == share)) &&
+ ((all_zones == 1) || (args[1]->soi_zoneid == zoneid)) /
{
printf("clnt=%s mid=0x%x uid=0x%x tid=0x%x\n",
args[0]->ci_remote,
@@ -35,9 +65,16 @@ smb2:::op-*-start
}
smb2:::op-*-done
+/ ((all_clients == 1) || (args[0]->ci_remote == client)) &&
+ ((all_shares == 1) || (args[1]->soi_share == share)) &&
+ ((all_zones == 1) || (args[1]->soi_zoneid == zoneid)) /
{
printf("clnt=%s mid=0x%x status=0x%x\n",
args[0]->ci_remote,
args[1]->soi_mid,
args[1]->soi_status);
}
+
+dtrace:::END
+{
+}
diff --git a/usr/src/cmd/stat/kstat/Makefile b/usr/src/cmd/stat/kstat/Makefile
index 668c8ffec8..c315bf7c2c 100644
--- a/usr/src/cmd/stat/kstat/Makefile
+++ b/usr/src/cmd/stat/kstat/Makefile
@@ -22,6 +22,8 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright 2020 Peter tribble.
+#
PROG = kstat
OBJS = kstat.o
@@ -37,7 +39,6 @@ CERRWARN += -_gcc=-Wno-switch
CERRWARN += -_gcc=-Wno-parentheses
CPPFLAGS_sparc += -I$(SRC)/uts/sfmmu
-CPPFLAGS_sparc += -I$(SRC)/uts/sun4u/sunfire
CPPFLAGS += $(CPPFLAGS_$(MACH))
FILEMODE= 0555
diff --git a/usr/src/cmd/stat/kstat/kstat.c b/usr/src/cmd/stat/kstat/kstat.c
index 236437face..2276b9762a 100644
--- a/usr/src/cmd/stat/kstat/kstat.c
+++ b/usr/src/cmd/stat/kstat/kstat.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013 David Hoeppner. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Joyent, Inc.
+ * Copyright 2020 Peter Tribble.
*/
/*
@@ -518,7 +519,7 @@ static kstat_raw_reader_t
lookup_raw_kstat_fn(char *module, char *name)
{
char key[KSTAT_STRLEN * 2];
- register char *f, *t;
+ register char *f, *t;
int n = 0;
for (f = module, t = key; *f != '\0'; f++, t++) {
@@ -1324,171 +1325,6 @@ save_sfmmu_tsbsize_stat(kstat_t *kp, ks_instance_t *ksi)
}
#endif
-#ifdef __sparc
-static void
-save_simmstat(kstat_t *kp, ks_instance_t *ksi)
-{
- uchar_t *simmstat;
- char *simm_buf;
- char *list = NULL;
- int i;
-
- assert(kp->ks_data_size == sizeof (uchar_t) * SIMM_COUNT);
-
- for (i = 0, simmstat = (uchar_t *)(kp->ks_data); i < SIMM_COUNT - 1;
- i++, simmstat++) {
- if (list == NULL) {
- (void) asprintf(&simm_buf, "%d,", *simmstat);
- } else {
- (void) asprintf(&simm_buf, "%s%d,", list, *simmstat);
- free(list);
- }
- list = simm_buf;
- }
-
- (void) asprintf(&simm_buf, "%s%d", list, *simmstat);
- SAVE_STRING_X(ksi, "status", simm_buf);
- free(list);
- free(simm_buf);
-}
-#endif
-
-#ifdef __sparc
-/*
- * Helper function for save_temperature().
- */
-static char *
-short_array_to_string(short *shortp, int len)
-{
- char *list = NULL;
- char *list_buf;
-
- for (; len > 1; len--, shortp++) {
- if (list == NULL) {
- (void) asprintf(&list_buf, "%hd,", *shortp);
- } else {
- (void) asprintf(&list_buf, "%s%hd,", list, *shortp);
- free(list);
- }
- list = list_buf;
- }
-
- (void) asprintf(&list_buf, "%s%hd", list, *shortp);
- free(list);
- return (list_buf);
-}
-
-static void
-save_temperature(kstat_t *kp, ks_instance_t *ksi)
-{
- struct temp_stats *temps = (struct temp_stats *)(kp->ks_data);
- char *buf;
-
- assert(kp->ks_data_size == sizeof (struct temp_stats));
-
- SAVE_UINT32(ksi, temps, index);
-
- buf = short_array_to_string(temps->l1, L1_SZ);
- SAVE_STRING_X(ksi, "l1", buf);
- free(buf);
-
- buf = short_array_to_string(temps->l2, L2_SZ);
- SAVE_STRING_X(ksi, "l2", buf);
- free(buf);
-
- buf = short_array_to_string(temps->l3, L3_SZ);
- SAVE_STRING_X(ksi, "l3", buf);
- free(buf);
-
- buf = short_array_to_string(temps->l4, L4_SZ);
- SAVE_STRING_X(ksi, "l4", buf);
- free(buf);
-
- buf = short_array_to_string(temps->l5, L5_SZ);
- SAVE_STRING_X(ksi, "l5", buf);
- free(buf);
-
- SAVE_INT32(ksi, temps, max);
- SAVE_INT32(ksi, temps, min);
- SAVE_INT32(ksi, temps, state);
- SAVE_INT32(ksi, temps, temp_cnt);
- SAVE_INT32(ksi, temps, shutdown_cnt);
- SAVE_INT32(ksi, temps, version);
- SAVE_INT32(ksi, temps, trend);
- SAVE_INT32(ksi, temps, override);
-}
-#endif
-
-#ifdef __sparc
-static void
-save_temp_over(kstat_t *kp, ks_instance_t *ksi)
-{
- short *sh = (short *)(kp->ks_data);
- char *value;
-
- assert(kp->ks_data_size == sizeof (short));
-
- (void) asprintf(&value, "%hu", *sh);
- SAVE_STRING_X(ksi, "override", value);
- free(value);
-}
-#endif
-
-#ifdef __sparc
-static void
-save_ps_shadow(kstat_t *kp, ks_instance_t *ksi)
-{
- uchar_t *uchar = (uchar_t *)(kp->ks_data);
-
- assert(kp->ks_data_size == SYS_PS_COUNT);
-
- SAVE_CHAR_X(ksi, "core_0", *uchar++);
- SAVE_CHAR_X(ksi, "core_1", *uchar++);
- SAVE_CHAR_X(ksi, "core_2", *uchar++);
- SAVE_CHAR_X(ksi, "core_3", *uchar++);
- SAVE_CHAR_X(ksi, "core_4", *uchar++);
- SAVE_CHAR_X(ksi, "core_5", *uchar++);
- SAVE_CHAR_X(ksi, "core_6", *uchar++);
- SAVE_CHAR_X(ksi, "core_7", *uchar++);
- SAVE_CHAR_X(ksi, "pps_0", *uchar++);
- SAVE_CHAR_X(ksi, "clk_33", *uchar++);
- SAVE_CHAR_X(ksi, "clk_50", *uchar++);
- SAVE_CHAR_X(ksi, "v5_p", *uchar++);
- SAVE_CHAR_X(ksi, "v12_p", *uchar++);
- SAVE_CHAR_X(ksi, "v5_aux", *uchar++);
- SAVE_CHAR_X(ksi, "v5_p_pch", *uchar++);
- SAVE_CHAR_X(ksi, "v12_p_pch", *uchar++);
- SAVE_CHAR_X(ksi, "v3_pch", *uchar++);
- SAVE_CHAR_X(ksi, "v5_pch", *uchar++);
- SAVE_CHAR_X(ksi, "p_fan", *uchar++);
-}
-#endif
-
-#ifdef __sparc
-static void
-save_fault_list(kstat_t *kp, ks_instance_t *ksi)
-{
- struct ft_list *fault;
- char name[KSTAT_STRLEN + 7];
- int i;
-
- for (i = 1, fault = (struct ft_list *)(kp->ks_data);
- i <= 999999 && i <= kp->ks_data_size / sizeof (struct ft_list);
- i++, fault++) {
- (void) snprintf(name, sizeof (name), "unit_%d", i);
- SAVE_INT32_X(ksi, name, fault->unit);
- (void) snprintf(name, sizeof (name), "type_%d", i);
- SAVE_INT32_X(ksi, name, fault->type);
- (void) snprintf(name, sizeof (name), "fclass_%d", i);
- SAVE_INT32_X(ksi, name, fault->fclass);
- (void) snprintf(name, sizeof (name), "create_time_%d", i);
- SAVE_HRTIME_X(ksi, name, fault->create_time);
- (void) snprintf(name, sizeof (name), "msg_%d", i);
- SAVE_STRING_X(ksi, name, fault->msg);
- }
-}
-#endif
-
static void
save_named(kstat_t *kp, ks_instance_t *ksi)
{
diff --git a/usr/src/cmd/stat/kstat/kstat.h b/usr/src/cmd/stat/kstat/kstat.h
index b59263398c..9fd2077dce 100644
--- a/usr/src/cmd/stat/kstat/kstat.h
+++ b/usr/src/cmd/stat/kstat/kstat.h
@@ -22,6 +22,7 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright 2013 David Hoeppner. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2020 Peter Tribble.
*/
#ifndef _STAT_KSTAT_H
@@ -41,9 +42,6 @@
#ifdef __sparc
#include <vm/hat_sfmmu.h>
-#include <sys/simmstat.h>
-#include <sys/sysctrl.h>
-#include <sys/fhc.h>
#endif
#define KSTAT_DATA_HRTIME (KSTAT_DATA_STRING + 1)
@@ -85,7 +83,7 @@ typedef union ks_value {
nvpair_insert(I, #N, &v, KSTAT_DATA_UINT32); \
}
-#define SAVE_INT64(I, S, N) \
+#define SAVE_INT64(I, S, N) \
{ \
ks_value_t v; \
v.i64 = S->N; \
@@ -167,7 +165,7 @@ typedef struct ks_instance {
char ks_name[KSTAT_STRLEN];
char ks_module[KSTAT_STRLEN];
char ks_class[KSTAT_STRLEN];
- int ks_instance;
+ int ks_instance;
uchar_t ks_type;
hrtime_t ks_snaptime;
list_t ks_nvlist;
@@ -214,13 +212,6 @@ static void save_nfs(kstat_t *, ks_instance_t *);
#ifdef __sparc
static void save_sfmmu_global_stat(kstat_t *, ks_instance_t *);
static void save_sfmmu_tsbsize_stat(kstat_t *, ks_instance_t *);
-static void save_simmstat(kstat_t *, ks_instance_t *);
-/* Helper function for save_temperature() */
-static char *short_array_to_string(short *, int);
-static void save_temperature(kstat_t *, ks_instance_t *);
-static void save_temp_over(kstat_t *, ks_instance_t *);
-static void save_ps_shadow(kstat_t *, ks_instance_t *);
-static void save_fault_list(kstat_t *, ks_instance_t *);
#endif
/* Named kstat readers */
@@ -246,11 +237,6 @@ static struct {
#ifdef __sparc
{save_sfmmu_global_stat, "unix:sfmmu_global_stat"},
{save_sfmmu_tsbsize_stat, "unix:sfmmu_tsbsize_stat"},
- {save_simmstat, "unix:simm-status"},
- {save_temperature, "unix:temperature"},
- {save_temp_over, "unix:temperature override"},
- {save_ps_shadow, "unix:ps_shadow"},
- {save_fault_list, "unix:fault_list"},
#endif
{NULL, NULL},
};
diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com
index 1834e9b2f8..42925319f4 100644
--- a/usr/src/cmd/zdb/Makefile.com
+++ b/usr/src/cmd/zdb/Makefile.com
@@ -23,7 +23,7 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
# Copyright 2017 RackTop Systems.
#
@@ -37,8 +37,9 @@ include ../../Makefile.ctf
INCS += -I../../../lib/libzpool/common
INCS += -I../../../uts/common/fs/zfs
INCS += -I../../../common/zfs
+INCS += -I../../../lib/libzutil/common
-LDLIBS += -lzpool -lumem -lnvpair -lzfs -lavl -lcmdutils -lfakekernel
+LDLIBS += -lzpool -lumem -lnvpair -lzutil -lavl -lfakekernel
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 03e78ce4ba..8272bbf64f 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -71,6 +71,9 @@
#undef verify
#include <libzfs.h>
+#include <libnvpair.h>
+#include <libzutil.h>
+
#include "zdb.h"
#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
@@ -101,7 +104,6 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
uint64_t *zopt_object = NULL;
static unsigned zopt_objects = 0;
-libzfs_handle_t *g_zfs;
uint64_t max_inflight = 1000;
static int leaked_objects = 0;
@@ -5884,8 +5886,6 @@ main(int argc, char **argv)
spa_load_verify_dryrun = B_TRUE;
kernel_init(FREAD);
- g_zfs = libzfs_init();
- ASSERT(g_zfs != NULL);
if (dump_all)
verbose = MAX(verbose, 1);
@@ -5964,7 +5964,8 @@ main(int argc, char **argv)
args.path = searchdirs;
args.can_be_active = B_TRUE;
- error = zpool_tryimport(g_zfs, target_pool, &cfg, &args);
+ error = zpool_find_config(NULL, target_pool, &cfg, &args,
+ &libzpool_config_ops);
if (error == 0) {
@@ -6094,7 +6095,6 @@ main(int argc, char **argv)
dump_debug_buffer();
- libzfs_fini(g_zfs);
kernel_fini();
return (error);
diff --git a/usr/src/cmd/zfs/Makefile b/usr/src/cmd/zfs/Makefile
index a65371609c..97642e44f9 100644
--- a/usr/src/cmd/zfs/Makefile
+++ b/usr/src/cmd/zfs/Makefile
@@ -24,7 +24,7 @@
# Copyright 2010 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
PROG= zfs
@@ -41,12 +41,13 @@ LINKPROGS= mount umount
ROOTETCFSTYPE= $(ROOTETC)/fs/$(FSTYPE)
USRLIBFSTYPE= $(ROOTLIB)/fs/$(FSTYPE)
-LDLIBS += -lzfs_core -lzfs -luutil -lumem -lnvpair -lsec -lidmap
+LDLIBS += -lzfs_core -lzfs -luutil -lumem -lnvpair -lsec -lidmap -lzutil
# cmdutils has list(9F) functions used by the project code.
LDLIBS += -lcmdutils
INCS += -I../../common/zfs
INCS += -I$(SRC)/uts/common/fs/zfs
+INCS += -I../../lib/libzutil/common
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index ef3f9155f3..ecc049c1fc 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -68,6 +68,7 @@
#include <libzfs_core.h>
#include <zfs_prop.h>
#include <zfs_deleg.h>
+#include <libzutil.h>
#include <libuutil.h>
#include <aclutils.h>
#include <directory.h>
diff --git a/usr/src/cmd/zhack/Makefile.com b/usr/src/cmd/zhack/Makefile.com
index 8a87de4ec9..d45962c16d 100644
--- a/usr/src/cmd/zhack/Makefile.com
+++ b/usr/src/cmd/zhack/Makefile.com
@@ -27,6 +27,7 @@
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
# Copyright 2017 RackTop Systems.
+# Copyright 2020 Joyent, Inc.
#
PROG= zhack
@@ -40,8 +41,9 @@ INCS += -I../../../lib/libzpool/common
INCS += -I../../../uts/common/fs/zfs
INCS += -I../../../uts/common/fs/zfs/lua
INCS += -I../../../common/zfs
+INCS += -I../../../lib/libzutil/common
-LDLIBS += -lzpool -lumem -lnvpair -lzfs
+LDLIBS += -lzpool -lumem -lnvpair -lzutil
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zhack/zhack.c b/usr/src/cmd/zhack/zhack.c
index 1f90f97bdf..235b1fd0ca 100644
--- a/usr/src/cmd/zhack/zhack.c
+++ b/usr/src/cmd/zhack/zhack.c
@@ -49,12 +49,11 @@
#include <sys/zfeature.h>
#include <sys/dmu_tx.h>
#undef verify
-#include <libzfs.h>
+#include <libzutil.h>
extern boolean_t zfeature_checks_disable;
const char cmdname[] = "zhack";
-libzfs_handle_t *g_zfs;
static importargs_t g_importargs;
static char *g_pool;
static boolean_t g_readonly;
@@ -128,20 +127,17 @@ zhack_import(char *target, boolean_t readonly)
int error;
kernel_init(readonly ? FREAD : (FREAD | FWRITE));
- g_zfs = libzfs_init();
- ASSERT(g_zfs != NULL);
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
g_readonly = readonly;
- g_importargs.unique = B_TRUE;
g_importargs.can_be_active = readonly;
g_pool = strdup(target);
- error = zpool_tryimport(g_zfs, target, &config, &g_importargs);
+ error = zpool_find_config(NULL, target, &config, &g_importargs,
+ &libzpool_config_ops);
if (error)
- fatal(NULL, FTAG, "cannot import '%s': %s", target,
- libzfs_error_description(g_zfs));
+ fatal(NULL, FTAG, "cannot import '%s'", target);
props = NULL;
if (readonly) {
@@ -528,7 +524,6 @@ main(int argc, char **argv)
"changes may not be committed to disk\n");
}
- libzfs_fini(g_zfs);
kernel_fini();
return (rv);
diff --git a/usr/src/cmd/zinject/Makefile.com b/usr/src/cmd/zinject/Makefile.com
index 8644e8185d..220c877b42 100644
--- a/usr/src/cmd/zinject/Makefile.com
+++ b/usr/src/cmd/zinject/Makefile.com
@@ -24,6 +24,7 @@
#
# Copyright (c) 2016 by Delphix. All rights reserved.
# Copyright 2017 RackTop Systems.
+# Copyright 2020 Joyent, Inc.
#
PROG:sh= cd ..; basename `pwd`
@@ -36,7 +37,7 @@ INCS += -I../../../lib/libzpool/common
INCS += -I../../../uts/common/fs/zfs
INCS += -I../../../uts/common/fs/zfs/lua
-LDLIBS += -lzpool -lzfs -lnvpair
+LDLIBS += -lzfs -lnvpair
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c
index 090f2448b0..546009ab88 100644
--- a/usr/src/cmd/zinject/translate.c
+++ b/usr/src/cmd/zinject/translate.c
@@ -25,8 +25,6 @@
#include <libzfs.h>
-#include <sys/zfs_context.h>
-
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
@@ -49,9 +47,6 @@
#include "zinject.h"
-extern void kernel_init(int);
-extern void kernel_fini(void);
-
static int debug;
static void
@@ -157,51 +152,32 @@ parse_pathname(const char *inpath, char *dataset, char *relpath,
}
/*
- * Convert from a (dataset, path) pair into a (objset, object) pair. Note that
- * we grab the object number from the inode number, since looking this up via
- * libzpool is a real pain.
+ * Convert from a dataset to a objset id. Note that
+ * we grab the object number from the inode number.
*/
-/* ARGSUSED */
static int
-object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
- zinject_record_t *record)
+object_from_path(const char *dataset, uint64_t object, zinject_record_t *record)
{
- objset_t *os;
- int err;
-
- /*
- * Before doing any libzpool operations, call sync() to ensure that the
- * on-disk state is consistent with the in-core state.
- */
- sync();
+ zfs_handle_t *zhp;
- err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os);
- if (err != 0) {
- (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
- dataset, strerror(err));
+ if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
return (-1);
- }
- record->zi_objset = dmu_objset_id(os);
- record->zi_object = statbuf->st_ino;
+ record->zi_objset = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
+ record->zi_object = object;
- dmu_objset_disown(os, B_FALSE, FTAG);
+ zfs_close(zhp);
return (0);
}
/*
- * Calculate the real range based on the type, level, and range given.
+ * Initialize the range based on the type, level, and range given.
*/
static int
-calculate_range(const char *dataset, err_type_t type, int level, char *range,
+initialize_range(err_type_t type, int level, char *range,
zinject_record_t *record)
{
- objset_t *os = NULL;
- dnode_t *dn = NULL;
- int err;
- int ret = -1;
-
/*
* Determine the numeric range from the string.
*/
@@ -229,7 +205,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
(void) fprintf(stderr, "invalid range '%s': must be "
"a numeric range of the form 'start[,end]'\n",
range);
- goto out;
+ return (-1);
}
}
@@ -247,7 +223,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
if (range != NULL) {
(void) fprintf(stderr, "range cannot be specified when "
"type is 'dnode'\n");
- goto out;
+ return (-1);
}
record->zi_start = record->zi_object * sizeof (dnode_phys_t);
@@ -256,76 +232,9 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
break;
}
- /*
- * Get the dnode associated with object, so we can calculate the block
- * size.
- */
- if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
- B_TRUE, B_FALSE, FTAG, &os)) != 0) {
- (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
- dataset, strerror(err));
- goto out;
- }
-
- if (record->zi_object == 0) {
- dn = DMU_META_DNODE(os);
- } else {
- err = dnode_hold(os, record->zi_object, FTAG, &dn);
- if (err != 0) {
- (void) fprintf(stderr, "failed to hold dnode "
- "for object %llu\n",
- (u_longlong_t)record->zi_object);
- goto out;
- }
- }
-
-
- ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
- ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
-
- /*
- * Translate range into block IDs.
- */
- if (record->zi_start != 0 || record->zi_end != -1ULL) {
- record->zi_start >>= dn->dn_datablkshift;
- record->zi_end >>= dn->dn_datablkshift;
- }
-
- /*
- * Check level, and then translate level 0 blkids into ranges
- * appropriate for level of indirection.
- */
record->zi_level = level;
- if (level > 0) {
- ziprintf("level 0 blkid range: [%llu, %llu]\n",
- record->zi_start, record->zi_end);
-
- if (level >= dn->dn_nlevels) {
- (void) fprintf(stderr, "level %d exceeds max level "
- "of object (%d)\n", level, dn->dn_nlevels - 1);
- goto out;
- }
-
- if (record->zi_start != 0 || record->zi_end != 0) {
- int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (; level > 0; level--) {
- record->zi_start >>= shift;
- record->zi_end >>= shift;
- }
- }
- }
-
- ret = 0;
-out:
- if (dn) {
- if (dn != DMU_META_DNODE(os))
- dnode_rele(dn, FTAG);
- }
- if (os)
- dmu_objset_disown(os, B_FALSE, FTAG);
-
- return (ret);
+ return (0);
}
int
@@ -337,8 +246,6 @@ translate_record(err_type_t type, const char *object, const char *range,
struct stat64 statbuf;
int ret = -1;
- kernel_init(FREAD);
-
debug = (getenv("ZINJECT_DEBUG") != NULL);
ziprintf("translating: %s\n", object);
@@ -388,16 +295,16 @@ translate_record(err_type_t type, const char *object, const char *range,
/*
* Convert (dataset, file) into (objset, object)
*/
- if (object_from_path(dataset, path, &statbuf, record) != 0)
+ if (object_from_path(dataset, statbuf.st_ino, record) != 0)
goto err;
ziprintf("raw objset: %llu\n", record->zi_objset);
ziprintf("raw object: %llu\n", record->zi_object);
/*
- * For the given object, calculate the real (type, level, range)
+ * For the given object, initialize the range in bytes
*/
- if (calculate_range(dataset, type, level, (char *)range, record) != 0)
+ if (initialize_range(type, level, (char *)range, record) != 0)
goto err;
ziprintf(" objset: %llu\n", record->zi_objset);
@@ -419,7 +326,6 @@ translate_record(err_type_t type, const char *object, const char *range,
ret = 0;
err:
- kernel_fini();
return (ret);
}
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
index fc836f11e5..16e659ca6f 100644
--- a/usr/src/cmd/zinject/zinject.c
+++ b/usr/src/cmd/zinject/zinject.c
@@ -564,6 +564,7 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
(void) fprintf(stderr, "failed to add handler: %s\n",
+ errno == EDOM ? "block level exceeds max level of object" :
strerror(errno));
return (1);
}
@@ -886,6 +887,7 @@ main(int argc, char **argv)
break;
case 'r':
range = optarg;
+ flags |= ZINJECT_CALC_RANGE;
break;
case 's':
dur_secs = 1;
diff --git a/usr/src/cmd/zpool/Makefile b/usr/src/cmd/zpool/Makefile
index b849d9c759..55d8abc80f 100644
--- a/usr/src/cmd/zpool/Makefile
+++ b/usr/src/cmd/zpool/Makefile
@@ -22,7 +22,7 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
# Copyright (c) 2015 by Delphix. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
PROG= zpool
@@ -40,9 +40,10 @@ STAT_COMMON_OBJS = timestamp.o
STAT_COMMON_SRCS = $(STAT_COMMON_OBJS:%.o=$(STATCOMMONDIR)/%.c)
SRCS += $(STAT_COMMON_SRCS)
-LDLIBS += -lzfs -lnvpair -ldevid -lefi -ldiskmgt -luutil -lumem
+LDLIBS += -lzfs -lnvpair -ldevid -lefi -ldiskmgt -luutil -lumem -lzutil
INCS += -I../../common/zfs -I../../uts/common/fs/zfs -I$(STATCOMMONDIR)
+INCS += -I../../lib/libzutil/common
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/zpool/zpool_iter.c b/usr/src/cmd/zpool/zpool_iter.c
index 6e77f85fa3..c05c665ada 100644
--- a/usr/src/cmd/zpool/zpool_iter.c
+++ b/usr/src/cmd/zpool/zpool_iter.c
@@ -34,6 +34,7 @@
#include <strings.h>
#include <libzfs.h>
+#include <libzutil.h>
#include "zpool_util.h"
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 96ee2f0850..93df0e1772 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -59,6 +59,7 @@
#include <sys/debug.h>
#include <libzfs.h>
+#include <libzutil.h>
#include "zpool_util.h"
#include "zfs_comutil.h"
@@ -914,7 +915,7 @@ zpool_do_labelclear(int argc, char **argv)
return (1);
}
- if (zpool_read_label(fd, &config) != 0) {
+ if (zpool_read_label(fd, &config, NULL) != 0) {
(void) fprintf(stderr,
gettext("failed to read label from %s\n"), vdev);
return (1);
@@ -2535,6 +2536,40 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
return (ret);
}
+typedef struct target_exists_args {
+ const char *poolname;
+ uint64_t poolguid;
+} target_exists_args_t;
+
+static int
+name_or_guid_exists(zpool_handle_t *zhp, void *data)
+{
+ target_exists_args_t *args = data;
+ nvlist_t *config = zpool_get_config(zhp, NULL);
+ int found = 0;
+
+ if (config == NULL)
+ return (0);
+
+ if (args->poolname != NULL) {
+ char *pool_name;
+
+ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+ &pool_name) == 0);
+ if (strcmp(pool_name, args->poolname) == 0)
+ found = 1;
+ } else {
+ uint64_t pool_guid;
+
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &pool_guid) == 0);
+ if (pool_guid == args->poolguid)
+ found = 1;
+ }
+ zpool_close(zhp);
+
+ return (found);
+}
/*
* zpool checkpoint <pool>
* checkpoint --discard <pool>
@@ -2687,6 +2722,7 @@ zpool_do_import(int argc, char **argv)
boolean_t dryrun = B_FALSE;
boolean_t do_rewind = B_FALSE;
boolean_t xtreme_rewind = B_FALSE;
+ boolean_t pool_exists = B_FALSE;
uint64_t pool_state, txg = -1ULL;
char *cachefile = NULL;
importargs_t idata = { 0 };
@@ -2895,10 +2931,10 @@ zpool_do_import(int argc, char **argv)
/*
* User specified a name or guid. Ensure it's unique.
*/
- idata.unique = B_TRUE;
+ target_exists_args_t search = {searchname, searchguid};
+ pool_exists = zpool_iter(g_zfs, name_or_guid_exists, &search);
}
-
idata.path = searchdirs;
idata.paths = nsearch;
idata.poolname = searchname;
@@ -2906,9 +2942,9 @@ zpool_do_import(int argc, char **argv)
idata.cachefile = cachefile;
idata.policy = policy;
- pools = zpool_search_import(g_zfs, &idata);
+ pools = zpool_search_import(g_zfs, &idata, &libzfs_config_ops);
- if (pools != NULL && idata.exists &&
+ if (pools != NULL && pool_exists &&
(argc == 1 || strcmp(argv[0], argv[1]) == 0)) {
(void) fprintf(stderr, gettext("cannot import '%s': "
"a pool with that name already exists\n"),
@@ -2917,7 +2953,7 @@ zpool_do_import(int argc, char **argv)
"[-t] <pool | id> <newpool>' to give it a new temporary "
"or permanent name\n"));
err = 1;
- } else if (pools == NULL && idata.exists) {
+ } else if (pools == NULL && pool_exists) {
(void) fprintf(stderr, gettext("cannot import '%s': "
"a pool with that name is already created/imported,\n"),
argv[0]);
@@ -3389,7 +3425,7 @@ get_interval_count(int *argcp, char **argv, unsigned long *iv,
/*
* Determine if the last argument is an integer or a pool name
*/
- if (argc > 0 && isdigit(argv[argc - 1][0])) {
+ if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
char *end;
errno = 0;
@@ -3419,7 +3455,7 @@ get_interval_count(int *argcp, char **argv, unsigned long *iv,
* If the last argument is also an integer, then we have both a count
* and an interval.
*/
- if (argc > 0 && isdigit(argv[argc - 1][0])) {
+ if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
char *end;
errno = 0;
diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c
index 6e6589ab47..652bece3ab 100644
--- a/usr/src/cmd/zpool/zpool_vdev.c
+++ b/usr/src/cmd/zpool/zpool_vdev.c
@@ -69,6 +69,7 @@
#include <libdiskmgt.h>
#include <libintl.h>
#include <libnvpair.h>
+#include <libzutil.h>
#include <limits.h>
#include <sys/spa.h>
#include <stdio.h>
@@ -1124,7 +1125,7 @@ is_spare(nvlist_t *config, const char *path)
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
!inuse ||
state != POOL_STATE_SPARE ||
- zpool_read_label(fd, &label) != 0) {
+ zpool_read_label(fd, &label, NULL) != 0) {
free(name);
(void) close(fd);
return (B_FALSE);
diff --git a/usr/src/cmd/ztest/Makefile.com b/usr/src/cmd/ztest/Makefile.com
index fb32fdd938..0db755a6f7 100644
--- a/usr/src/cmd/ztest/Makefile.com
+++ b/usr/src/cmd/ztest/Makefile.com
@@ -22,7 +22,7 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
# Copyright 2017 RackTop Systems.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
PROG= ztest
OBJS= $(PROG).o
@@ -35,8 +35,9 @@ INCS += -I../../../lib/libzpool/common
INCS += -I../../../uts/common/fs/zfs
INCS += -I../../../uts/common/fs/zfs/lua
INCS += -I../../../common/zfs
+INCS += -I../../../lib/libzutil/common
-LDLIBS += -lumem -lzpool -lcmdutils -lm -lnvpair -lfakekernel -lzfs
+LDLIBS += -lumem -lzpool -lcmdutils -lm -lnvpair -lfakekernel -lzutil
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 83922cf376..f4f577e4d3 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -128,7 +128,7 @@
#include <math.h>
#include <sys/fs/zfs.h>
#include <libnvpair.h>
-#include <libzfs.h>
+#include <libzutil.h>
#include <libcmdutils.h>
static int ztest_fd_data = -1;
@@ -6811,7 +6811,6 @@ make_random_props()
static void
ztest_import(ztest_shared_t *zs)
{
- libzfs_handle_t *hdl;
importargs_t args = { 0 };
spa_t *spa;
nvlist_t *cfg = NULL;
@@ -6825,14 +6824,14 @@ ztest_import(ztest_shared_t *zs)
rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
kernel_init(FREAD | FWRITE);
- hdl = libzfs_init();
searchdirs[0] = ztest_opts.zo_dir;
args.paths = nsearch;
args.path = searchdirs;
args.can_be_active = B_FALSE;
- error = zpool_tryimport(hdl, name, &cfg, &args);
+ error = zpool_find_config(NULL, name, &cfg, &args,
+ &libzpool_config_ops);
if (error)
(void) fatal(0, "No pools found\n");
@@ -6842,7 +6841,6 @@ ztest_import(ztest_shared_t *zs)
1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
spa_close(spa, FTAG);
- libzfs_fini(hdl);
kernel_fini();
if (!ztest_opts.zo_mmp_test) {
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 3ea7f02985..5a89beb017 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -22,7 +22,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright 2019, Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
# Copyright (c) 2013 Gary Mills
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
# Copyright (c) 2015 Gary Mills
@@ -258,6 +258,7 @@ SUBDIRS += \
libzoneinfo \
libzonestat \
libzpool \
+ libzutil \
madv \
mpapi \
mpss \
@@ -713,14 +714,15 @@ libvolmgt: libadm
libvrrpadm: libdladm libscf
libvscan: libscf libsecdb
libzdoor: libc libzonecfg libcontract
-libzfs: libdevid libgen libuutil libadm libavl libefi libidmap \
- libumem libtsol libzfs_core libcryptoutil pkcs11 libmd libcmdutils
+libzfs: libdevid libgen libuutil libavl libefi libidmap \
+ libumem libtsol libzfs_core libcryptoutil pkcs11 libmd libzutil
libzfs_core: libnvpair
-libzfs_jni: libdiskmgt libzfs
+libzfs_jni: libdiskmgt libzfs libzutil
libzonecfg: libuuid libsysevent libsec libbrand libpool libscf libproc \
libuutil libbsm libsecdb
libzonestat: libcmdutils libumem
-libzpool: libavl libumem libcmdutils libsysevent libfakekernel libzfs
+libzpool: libavl libumem libcmdutils libsysevent libfakekernel libzutil
+libzutil: libadm libavl libdevid libefi
madv: libgen
mpapi: libpthread libdevinfo libsysevent
mpss: libgen
diff --git a/usr/src/lib/brand/ipkg/zone/platform.xml b/usr/src/lib/brand/ipkg/zone/platform.xml
index af417212f0..4075fe10d2 100644
--- a/usr/src/lib/brand/ipkg/zone/platform.xml
+++ b/usr/src/lib/brand/ipkg/zone/platform.xml
@@ -20,8 +20,8 @@
CDDL HEADER END
- Copyright 2011 Nexenta Systems, Inc. All rights reserved.
Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright 2018 Nexenta Systems, Inc.
DO NOT EDIT THIS FILE.
-->
@@ -41,6 +41,7 @@
<mount special="mnttab" directory="/etc/mnttab" type="mntfs" />
<mount special="objfs" directory="/system/object" type="objfs" />
<mount special="swap" directory="/etc/svc/volatile" type="tmpfs" />
+ <mount special="sharefs" directory="/etc/dfs/sharetab" type="sharefs" />
<!-- Devices to create under /dev -->
<device match="arp" />
diff --git a/usr/src/lib/libdtrace/common/nfs.d b/usr/src/lib/libdtrace/common/nfs.d
index d14c706aca..7a97cd5e6d 100644
--- a/usr/src/lib/libdtrace/common/nfs.d
+++ b/usr/src/lib/libdtrace/common/nfs.d
@@ -23,6 +23,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ */
#pragma D depends_on library ip.d
#pragma D depends_on library net.d
@@ -35,6 +38,8 @@ typedef struct nfsv4opinfo {
uint64_t noi_xid; /* unique transation ID */
cred_t *noi_cred; /* credentials for operation */
string noi_curpath; /* current file handle path (if any) */
+ string noi_shrpath; /* current share path */
+ zoneid_t noi_zoneid; /* zone identifier */
} nfsv4opinfo_t;
typedef struct nfsv4cbinfo {
@@ -101,12 +106,17 @@ translator nfsv4opinfo_t < struct compound_state *P > {
noi_xid = P->req->rq_xprt->xp_xid;
noi_cred = P->basecr;
noi_curpath = (P->vp == NULL) ? "<unknown>" : P->vp->v_path;
+ noi_shrpath = (P->exi == NULL || P->exi->exi_export.ex_path == NULL) ?
+ "<unknown>" : P->exi->exi_export.ex_path;
+ noi_zoneid = (P->exi == NULL) ? -1 : P->exi->exi_zoneid;
};
typedef struct nfsv3opinfo {
uint64_t noi_xid; /* unique transation ID */
cred_t *noi_cred; /* credentials for operation */
string noi_curpath; /* current file handle path (if any) */
+ string noi_shrpath; /* current share path */
+ zoneid_t noi_zoneid; /* zone identifier */
} nfsv3opinfo_t;
typedef struct nfsv3oparg nfsv3oparg_t;
@@ -117,4 +127,9 @@ translator nfsv3opinfo_t < nfsv3oparg_t *P > {
noi_cred = (cred_t *)arg1;
noi_curpath = (arg2 == 0 || ((vnode_t *)arg2)->v_path == NULL) ?
"<unknown>" : ((vnode_t *)arg2)->v_path;
+ noi_shrpath =
+ (arg3 == 0 || ((exportinfo_t *)arg3)->exi_export.ex_path == NULL) ?
+ "<unknown>" : ((exportinfo_t *)arg3)->exi_export.ex_path;
+ noi_zoneid =
+ (arg3 == 0) ? -1 : ((exportinfo_t *)arg3)->exi_zoneid;
};
diff --git a/usr/src/lib/libdtrace/common/smb.d b/usr/src/lib/libdtrace/common/smb.d
index c58cb4bf1c..9b2f8bdc1f 100644
--- a/usr/src/lib/libdtrace/common/smb.d
+++ b/usr/src/lib/libdtrace/common/smb.d
@@ -23,7 +23,7 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#pragma D depends_on library ip.d
@@ -58,6 +58,7 @@ typedef struct smbopinfo {
uint16_t soi_fid; /* file id */
uint16_t soi_flags2; /* flags2 */
uint8_t soi_flags; /* flags */
+ zoneid_t soi_zoneid; /* zone identifier */
} smbopinfo_t;
#pragma D binding "1.5" translator
@@ -72,6 +73,7 @@ translator smbopinfo_t < struct smb_request *P > {
soi_fid = P->smb_fid;
soi_flags2 = P->smb_flg2;
soi_flags = P->smb_flg;
+ soi_zoneid = P->sr_server->sv_zid;
soi_share = (P->tid_tree == NULL) ? "<NULL>" :
P->tid_tree->t_sharename;
@@ -137,6 +139,7 @@ typedef struct smb2opinfo {
uint32_t soi_tid; /* tree ID */
uint32_t soi_status;
uint32_t soi_flags;
+ zoneid_t soi_zoneid; /* zone identifier */
} smb2opinfo_t;
#pragma D binding "1.5" translator
@@ -149,6 +152,7 @@ translator smb2opinfo_t < struct smb_request *P > {
soi_tid = P->smb_tid;
soi_status = P->smb2_status;
soi_flags = P->smb2_hdr_flags;
+ soi_zoneid = P->sr_server->sv_zid;
soi_share = (P->tid_tree == NULL) ? "<NULL>" :
P->tid_tree->t_sharename;
diff --git a/usr/src/lib/libshare/common/libshare_zfs.c b/usr/src/lib/libshare/common/libshare_zfs.c
index a4fda8c424..0db6a56cd2 100644
--- a/usr/src/lib/libshare/common/libshare_zfs.c
+++ b/usr/src/lib/libshare/common/libshare_zfs.c
@@ -22,10 +22,11 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+
/*
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright 2017 RackTop Systems.
+ * Copyright 2019 Nexenta Systems, Inc.
*/
#include <stdio.h>
@@ -33,6 +34,7 @@
#include <string.h>
#include <strings.h>
#include <errno.h>
+#include <zone.h>
#include <libshare.h>
#include "libshare_impl.h"
#include <libintl.h>
@@ -238,6 +240,7 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen,
{
FILE *fp;
struct mnttab entry;
+ int rc = 1;
if ((fp = fopen(MNTTAB, "r")) == NULL) {
return (1);
@@ -256,11 +259,12 @@ get_legacy_mountpoint(const char *path, char *dataset, size_t dlen,
if (dlen > 0)
(void) strlcpy(dataset, entry.mnt_special,
dlen);
+ rc = 0;
break;
}
}
(void) fclose(fp);
- return (1);
+ return (rc);
}
@@ -817,6 +821,13 @@ sa_get_zfs_share_common(sa_handle_t handle, zfs_handle_t *fs_handle, char *path,
if (!zfs_is_mounted(fs_handle, NULL))
return (SA_SYSTEM_ERR);
+ /*
+ * Ignore "zoned" datasets in global zone.
+ */
+ if (getzoneid() == GLOBAL_ZONEID &&
+ zfs_prop_get_int(fs_handle, ZFS_PROP_ZONED))
+ return (SA_SYSTEM_ERR);
+
nfs = nfs_inherited = B_FALSE;
if (zfs_prop_get(fs_handle, ZFS_PROP_SHARENFS, nfsshareopts,
@@ -977,6 +988,29 @@ sa_get_zfs_shares(sa_handle_t handle, char *groupname)
}
/*
+ * Initializes shares for only the dataset specified fs_handle.
+ * This is used as a performance optimization relative to sa_get_zfs_shares.
+ */
+int
+sa_get_zfs_share(sa_handle_t handle, char *groupname, zfs_handle_t *fs_handle)
+{
+ sa_group_t zfsgroup;
+ libzfs_handle_t *zfs_libhandle;
+ int err;
+
+ if ((err = prep_zfs_handle_and_group(handle, groupname, &zfs_libhandle,
+ &zfsgroup, &err)) != SA_OK) {
+ return (err);
+ }
+ /* Not an error, this could be a legacy condition */
+ if (zfsgroup == NULL)
+ return (SA_OK);
+
+ err = sa_get_zfs_share_common(handle, fs_handle, NULL, zfsgroup);
+ return (err);
+}
+
+/*
* Initializes only the handles specified in the sharearg for use with libshare.
* This is used as a performance optimization relative to sa_get_zfs_shares.
*/
diff --git a/usr/src/lib/libshare/common/mapfile-vers b/usr/src/lib/libshare/common/mapfile-vers
index c661589eda..48a62e0bfd 100644
--- a/usr/src/lib/libshare/common/mapfile-vers
+++ b/usr/src/lib/libshare/common/mapfile-vers
@@ -21,7 +21,7 @@
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2016 by Delphix. All rights reserved.
-# Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2019 Nexenta Systems, Inc. All rights reserved.
#
#
@@ -152,7 +152,7 @@ SYMBOL_VERSION SUNWprivate {
sa_proto_delete_section;
sa_needs_refresh;
sa_get_zfs_handle;
- sa_zfs_process_share;
+ sa_get_zfs_share;
sa_update_sharetab_ts;
sa_zfs_setprop;
local:
diff --git a/usr/src/lib/libshare/nfs/libshare_nfs.c b/usr/src/lib/libshare/nfs/libshare_nfs.c
index d5fc9fbbfc..57118ce038 100644
--- a/usr/src/lib/libshare/nfs/libshare_nfs.c
+++ b/usr/src/lib/libshare/nfs/libshare_nfs.c
@@ -22,19 +22,19 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.
*/
/*
* NFS specific functions
*/
+
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>
-#include <zone.h>
#include <errno.h>
#include <locale.h>
#include <signal.h>
@@ -1906,12 +1906,7 @@ nfs_enable_share(sa_share_t share)
sa_free_attr_string(sectype);
}
}
- /*
- * when we get here, we can do the exportfs system call and
- * initiate things. We probably want to enable the
- * svc:/network/nfs/server service first if it isn't running.
- */
- /* check svc:/network/nfs/server status and start if needed */
+
/* now add the share to the internal tables */
printarg(path, &export);
/*
@@ -1921,52 +1916,17 @@ nfs_enable_share(sa_share_t share)
if (iszfs) {
struct exportfs_args ea;
share_t sh;
- char *str;
- priv_set_t *priv_effective;
- int privileged;
-
- /*
- * If we aren't a privileged user
- * and NFS server service isn't running
- * then print out an error message
- * and return EPERM
- */
- priv_effective = priv_allocset();
- (void) getppriv(PRIV_EFFECTIVE, priv_effective);
-
- privileged = (priv_isfullset(priv_effective) == B_TRUE);
- priv_freeset(priv_effective);
-
- if (!privileged &&
- (str = smf_get_state(NFS_SERVER_SVC)) != NULL) {
- err = 0;
- if (strcmp(str, SCF_STATE_STRING_ONLINE) != 0) {
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS: Cannot share remote "
- "filesystem: %s\n"), path);
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS: Service needs to be enabled "
- "by a privileged user\n"));
- err = SA_SYSTEM_ERR;
- errno = EPERM;
- }
- free(str);
- }
-
- if (err == 0) {
- ea.dname = path;
- ea.uex = &export;
+ ea.dname = path;
+ ea.uex = &export;
- (void) sa_sharetab_fill_zfs(share, &sh, "nfs");
- err = sa_share_zfs(share, NULL, path, &sh,
- &ea, ZFS_SHARE_NFS);
- if (err != SA_OK) {
- errno = err;
- err = -1;
- }
- sa_emptyshare(&sh);
+ (void) sa_sharetab_fill_zfs(share, &sh, "nfs");
+ err = sa_share_zfs(share, NULL, path, &sh, &ea, ZFS_SHARE_NFS);
+ if (err != SA_OK) {
+ errno = err;
+ err = -1;
}
+ sa_emptyshare(&sh);
} else {
err = exportfs(path, &export);
}
@@ -1974,20 +1934,7 @@ nfs_enable_share(sa_share_t share)
if (err < 0) {
err = SA_SYSTEM_ERR;
switch (errno) {
- case EREMOTE:
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS: Cannot share filesystems "
- "in non-global zones: %s\n"), path);
- err = SA_NOT_SUPPORTED;
- break;
case EPERM:
- if (getzoneid() != GLOBAL_ZONEID) {
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS: Cannot share file systems "
- "in non-global zones: %s\n"), path);
- err = SA_NOT_SUPPORTED;
- break;
- }
err = SA_NO_PERMISSION;
break;
case EEXIST:
@@ -2099,9 +2046,6 @@ nfs_disable_share(sa_share_t share, char *path)
case EPERM:
case EACCES:
ret = SA_NO_PERMISSION;
- if (getzoneid() != GLOBAL_ZONEID) {
- ret = SA_NOT_SUPPORTED;
- }
break;
case EINVAL:
case ENOENT:
diff --git a/usr/src/lib/libshare/smb/libshare_smb.c b/usr/src/lib/libshare/smb/libshare_smb.c
index f567e7818b..da14ea94c3 100644
--- a/usr/src/lib/libshare/smb/libshare_smb.c
+++ b/usr/src/lib/libshare/smb/libshare_smb.c
@@ -391,9 +391,7 @@ smb_enable_share(sa_share_t share)
smb_share_t si;
sa_resource_t resource;
boolean_t iszfs;
- boolean_t privileged;
int err = SA_OK;
- priv_set_t *priv_effective;
boolean_t online;
/*
@@ -405,11 +403,6 @@ smb_enable_share(sa_share_t share)
return (SA_NOT_SUPPORTED);
}
- priv_effective = priv_allocset();
- (void) getppriv(PRIV_EFFECTIVE, priv_effective);
- privileged = (priv_isfullset(priv_effective) == B_TRUE);
- priv_freeset(priv_effective);
-
/* get the path since it is important in several places */
path = sa_get_share_attr(share, "path");
if (path == NULL)
@@ -424,29 +417,7 @@ smb_enable_share(sa_share_t share)
iszfs = sa_path_is_zfs(path);
- if (iszfs) {
-
- if (privileged == B_FALSE && !online) {
-
- if (!online) {
- (void) printf(dgettext(TEXT_DOMAIN,
- "SMB: Cannot share remove "
- "file system: %s\n"), path);
- (void) printf(dgettext(TEXT_DOMAIN,
- "SMB: Service needs to be enabled "
- "by a privileged user\n"));
- err = SA_NO_PERMISSION;
- errno = EPERM;
- }
- if (err) {
- sa_free_attr_string(path);
- return (err);
- }
-
- }
- }
-
- if (privileged == B_TRUE && !online) {
+ if (!online) {
err = smb_enable_service();
if (err != SA_OK) {
(void) printf(dgettext(TEXT_DOMAIN,
diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com
index ba69fc44ca..189d585456 100644
--- a/usr/src/lib/libzfs/Makefile.com
+++ b/usr/src/lib/libzfs/Makefile.com
@@ -22,7 +22,7 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
# Copyright (c) 2011, 2017 by Delphix. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
LIBRARY= libzfs.a
@@ -69,12 +69,13 @@ INCS += -I$(SRCDIR)
INCS += -I../../../uts/common/fs/zfs
INCS += -I../../../common/zfs
INCS += -I../../libc/inc
+INCS += -I../../libzutil/common
CSTD= $(CSTD_GNU99)
C99LMODE= -Xc99=%all
LDLIBS += -lc -lm -ldevid -lgen -lnvpair -luutil -lavl -lefi \
- -ladm -lidmap -ltsol -lcryptoutil -lpkcs11 -lmd -lumem -lzfs_core \
- -lcmdutils -ldevinfo
+ -lidmap -ltsol -lcryptoutil -lpkcs11 -lmd -lumem -lzfs_core \
+ -ldevinfo -lzutil
CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT
$(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 483f2c7db6..8d14d37866 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -23,7 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc.
@@ -397,7 +397,6 @@ extern zpool_status_t zpool_get_status(zpool_handle_t *, char **,
zpool_errata_t *);
extern zpool_status_t zpool_import_status(nvlist_t *, char **,
zpool_errata_t *);
-extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
/*
* Statistics and configuration functions.
@@ -420,31 +419,6 @@ extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
extern void zpool_print_unsup_feat(nvlist_t *config);
/*
- * Search for pools to import
- */
-
-typedef struct importargs {
- char **path; /* a list of paths to search */
- int paths; /* number of paths to search */
- char *poolname; /* name of a pool to find */
- uint64_t guid; /* guid of a pool to find */
- char *cachefile; /* cachefile to use for import */
- int can_be_active : 1; /* can the pool be active? */
- int unique : 1; /* does 'poolname' already exist? */
- int exists : 1; /* set on return if pool already exists */
- nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
-} importargs_t;
-
-extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
-extern int zpool_tryimport(libzfs_handle_t *hdl, char *target,
- nvlist_t **configp, importargs_t *args);
-
-/* legacy pool search routines */
-extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
-extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
- char *, uint64_t);
-
-/*
* Miscellaneous pool functions
*/
struct zfs_cmd;
@@ -463,8 +437,6 @@ extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
extern int zpool_upgrade(zpool_handle_t *, uint64_t);
extern int zpool_get_history(zpool_handle_t *, nvlist_t **, uint64_t *,
boolean_t *);
-extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
- nvlist_t ***, uint_t *);
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
@@ -795,7 +767,6 @@ extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *);
extern boolean_t zfs_bookmark_exists(const char *path);
-extern ulong_t get_system_hostid(void);
/*
* Mount support functions.
@@ -839,10 +810,6 @@ extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
#define verify(EX) assert(EX)
#endif
-/*
- * Utility function to convert a number to a human-readable form.
- */
-extern void zfs_nicenum(uint64_t, char *, size_t);
extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
/*
@@ -854,7 +821,6 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
/*
* Label manipulation.
*/
-extern int zpool_read_label(int, nvlist_t **);
extern int zpool_clear_label(int);
/* is this zvol valid for use as a dump device? */
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index b412d2086c..18f43453a2 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -21,14 +21,17 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
- * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright 2018 Nexenta Systems, Inc.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright 2017-2018 RackTop Systems.
*/
@@ -36,7 +39,6 @@
#include <ctype.h>
#include <errno.h>
#include <libintl.h>
-#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
@@ -61,6 +63,7 @@
#include <sys/zap.h>
#include <sys/dsl_crypt.h>
#include <libzfs.h>
+#include <libzutil.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@@ -1315,8 +1318,8 @@ badlabel:
* global zone non-global zone
* --------------------------------------------------
* zoned=on mountpoint (no) mountpoint (yes)
- * sharenfs (no) sharenfs (no)
- * sharesmb (no) sharesmb (no)
+ * sharenfs (no) sharenfs (yes)
+ * sharesmb (no) sharesmb (yes)
*
* zoned=off mountpoint (yes) N/A
* sharenfs (yes)
@@ -1331,14 +1334,6 @@ badlabel:
(void) zfs_error(hdl, EZFS_ZONED,
errbuf);
goto error;
- } else if (prop == ZFS_PROP_SHARENFS ||
- prop == ZFS_PROP_SHARESMB) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "'%s' cannot be set in "
- "a non-global zone"), propname);
- (void) zfs_error(hdl, EZFS_ZONED,
- errbuf);
- goto error;
}
} else if (getzoneid() != GLOBAL_ZONEID) {
/*
diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c
index ce5864a62b..706f08e6ec 100644
--- a/usr/src/lib/libzfs/common/libzfs_import.c
+++ b/usr/src/lib/libzfs/common/libzfs_import.c
@@ -59,264 +59,12 @@
#include <thread_pool.h>
#include <sys/vdev_impl.h>
+#include <libzutil.h>
#include "libzfs.h"
#include "libzfs_impl.h"
/*
- * Intermediate structures used to gather configuration information.
- */
-typedef struct config_entry {
- uint64_t ce_txg;
- nvlist_t *ce_config;
- struct config_entry *ce_next;
-} config_entry_t;
-
-typedef struct vdev_entry {
- uint64_t ve_guid;
- config_entry_t *ve_configs;
- struct vdev_entry *ve_next;
-} vdev_entry_t;
-
-typedef struct pool_entry {
- uint64_t pe_guid;
- vdev_entry_t *pe_vdevs;
- struct pool_entry *pe_next;
-} pool_entry_t;
-
-typedef struct name_entry {
- char *ne_name;
- uint64_t ne_guid;
- struct name_entry *ne_next;
-} name_entry_t;
-
-typedef struct pool_list {
- pool_entry_t *pools;
- name_entry_t *names;
-} pool_list_t;
-
-/*
- * Go through and fix up any path and/or devid information for the given vdev
- * configuration.
- */
-static int
-fix_paths(nvlist_t *nv, name_entry_t *names)
-{
- nvlist_t **child;
- uint_t c, children;
- uint64_t guid;
- name_entry_t *ne, *best;
- char *path, *devid;
- int matched;
-
- if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0) {
- for (c = 0; c < children; c++)
- if (fix_paths(child[c], names) != 0)
- return (-1);
- return (0);
- }
-
- /*
- * This is a leaf (file or disk) vdev. In either case, go through
- * the name list and see if we find a matching guid. If so, replace
- * the path and see if we can calculate a new devid.
- *
- * There may be multiple names associated with a particular guid, in
- * which case we have overlapping slices or multiple paths to the same
- * disk. If this is the case, then we want to pick the path that is
- * the most similar to the original, where "most similar" is the number
- * of matching characters starting from the end of the path. This will
- * preserve slice numbers even if the disks have been reorganized, and
- * will also catch preferred disk names if multiple paths exist.
- */
- verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
- if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
- path = NULL;
-
- matched = 0;
- best = NULL;
- for (ne = names; ne != NULL; ne = ne->ne_next) {
- if (ne->ne_guid == guid) {
- const char *src, *dst;
- int count;
-
- if (path == NULL) {
- best = ne;
- break;
- }
-
- src = ne->ne_name + strlen(ne->ne_name) - 1;
- dst = path + strlen(path) - 1;
- for (count = 0; src >= ne->ne_name && dst >= path;
- src--, dst--, count++)
- if (*src != *dst)
- break;
-
- /*
- * At this point, 'count' is the number of characters
- * matched from the end.
- */
- if (count > matched || best == NULL) {
- best = ne;
- matched = count;
- }
- }
- }
-
- if (best == NULL)
- return (0);
-
- if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
- return (-1);
-
- if ((devid = devid_str_from_path(best->ne_name)) == NULL) {
- (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
- } else {
- if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
- devid_str_free(devid);
- return (-1);
- }
- devid_str_free(devid);
- }
-
- return (0);
-}
-
-/*
- * Add the given configuration to the list of known devices.
- */
-static int
-add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
- nvlist_t *config)
-{
- uint64_t pool_guid, vdev_guid, top_guid, txg, state;
- pool_entry_t *pe;
- vdev_entry_t *ve;
- config_entry_t *ce;
- name_entry_t *ne;
-
- /*
- * If this is a hot spare not currently in use or level 2 cache
- * device, add it to the list of names to translate, but don't do
- * anything else.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- &state) == 0 &&
- (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
- return (-1);
-
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
- free(ne);
- return (-1);
- }
-
- ne->ne_guid = vdev_guid;
- ne->ne_next = pl->names;
- pl->names = ne;
-
- return (0);
- }
-
- /*
- * If we have a valid config but cannot read any of these fields, then
- * it means we have a half-initialized label. In vdev_label_init()
- * we write a label with txg == 0 so that we can identify the device
- * in case the user refers to the same disk later on. If we fail to
- * create the pool, we'll be left with a label in this state
- * which should not be considered part of a valid pool.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &pool_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
- &vdev_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
- &top_guid) != 0 ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0 || txg == 0) {
- return (0);
- }
-
- /*
- * First, see if we know about this pool. If not, then add it to the
- * list of known pools.
- */
- for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
- if (pe->pe_guid == pool_guid)
- break;
- }
-
- if (pe == NULL) {
- if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
- return (-1);
- }
- pe->pe_guid = pool_guid;
- pe->pe_next = pl->pools;
- pl->pools = pe;
- }
-
- /*
- * Second, see if we know about this toplevel vdev. Add it if its
- * missing.
- */
- for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
- if (ve->ve_guid == top_guid)
- break;
- }
-
- if (ve == NULL) {
- if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
- return (-1);
- }
- ve->ve_guid = top_guid;
- ve->ve_next = pe->pe_vdevs;
- pe->pe_vdevs = ve;
- }
-
- /*
- * Third, see if we have a config with a matching transaction group. If
- * so, then we do nothing. Otherwise, add it to the list of known
- * configs.
- */
- for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
- if (ce->ce_txg == txg)
- break;
- }
-
- if (ce == NULL) {
- if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
- return (-1);
- }
- ce->ce_txg = txg;
- ce->ce_config = fnvlist_dup(config);
- ce->ce_next = ve->ve_configs;
- ve->ve_configs = ce;
- }
-
- /*
- * At this point we've successfully added our config to the list of
- * known configs. The last thing to do is add the vdev guid -> path
- * mappings so that we can fix up the configuration as necessary before
- * doing the import.
- */
- if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
- return (-1);
-
- if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
- free(ne);
- return (-1);
- }
-
- ne->ne_guid = vdev_guid;
- ne->ne_next = pl->names;
- pl->names = ne;
-
- return (0);
-}
-
-/*
* Returns true if the named pool matches the given GUID.
*/
static int
@@ -347,7 +95,7 @@ static nvlist_t *
refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
{
nvlist_t *nvl;
- zfs_cmd_t zc = { 0 };
+ zfs_cmd_t zc = {"\0"};
int err, dstbuf_size;
if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
@@ -360,7 +108,7 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
return (NULL);
}
- while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
+ while ((err = zfs_ioctl(hdl, ZFS_IOC_POOL_TRYIMPORT,
&zc)) != 0 && errno == ENOMEM) {
if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
zcmd_free_nvlists(&zc);
@@ -382,451 +130,24 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
return (nvl);
}
-/*
- * Determine if the vdev id is a hole in the namespace.
- */
-boolean_t
-vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+static nvlist_t *
+refresh_config_libzfs(void *handle, nvlist_t *tryconfig)
{
- for (int c = 0; c < holes; c++) {
-
- /* Top-level is a hole */
- if (hole_array[c] == id)
- return (B_TRUE);
- }
- return (B_FALSE);
+ return (refresh_config((libzfs_handle_t *)handle, tryconfig));
}
-/*
- * Convert our list of pools into the definitive set of configurations. We
- * start by picking the best config for each toplevel vdev. Once that's done,
- * we assemble the toplevel vdevs into a full config for the pool. We make a
- * pass to fix up any incorrect paths, and then add it to the main list to
- * return to the user.
- */
-static nvlist_t *
-get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
- nvlist_t *policy)
+static int
+pool_active_libzfs(void *handle, const char *name, uint64_t guid,
+ boolean_t *isactive)
{
- pool_entry_t *pe;
- vdev_entry_t *ve;
- config_entry_t *ce;
- nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
- nvlist_t **spares, **l2cache;
- uint_t i, nspares, nl2cache;
- boolean_t config_seen;
- uint64_t best_txg;
- char *name, *hostname = NULL;
- uint64_t guid;
- uint_t children = 0;
- nvlist_t **child = NULL;
- uint_t holes;
- uint64_t *hole_array, max_id;
- uint_t c;
- boolean_t isactive;
- uint64_t hostid;
- nvlist_t *nvl;
- boolean_t found_one = B_FALSE;
- boolean_t valid_top_config = B_FALSE;
-
- if (nvlist_alloc(&ret, 0, 0) != 0)
- goto nomem;
-
- for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
- uint64_t id, max_txg = 0;
-
- if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
- goto nomem;
- config_seen = B_FALSE;
-
- /*
- * Iterate over all toplevel vdevs. Grab the pool configuration
- * from the first one we find, and then go through the rest and
- * add them as necessary to the 'vdevs' member of the config.
- */
- for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
-
- /*
- * Determine the best configuration for this vdev by
- * selecting the config with the latest transaction
- * group.
- */
- best_txg = 0;
- for (ce = ve->ve_configs; ce != NULL;
- ce = ce->ce_next) {
-
- if (ce->ce_txg > best_txg) {
- tmp = ce->ce_config;
- best_txg = ce->ce_txg;
- }
- }
-
- /*
- * We rely on the fact that the max txg for the
- * pool will contain the most up-to-date information
- * about the valid top-levels in the vdev namespace.
- */
- if (best_txg > max_txg) {
- (void) nvlist_remove(config,
- ZPOOL_CONFIG_VDEV_CHILDREN,
- DATA_TYPE_UINT64);
- (void) nvlist_remove(config,
- ZPOOL_CONFIG_HOLE_ARRAY,
- DATA_TYPE_UINT64_ARRAY);
-
- max_txg = best_txg;
- hole_array = NULL;
- holes = 0;
- max_id = 0;
- valid_top_config = B_FALSE;
-
- if (nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
- verify(nvlist_add_uint64(config,
- ZPOOL_CONFIG_VDEV_CHILDREN,
- max_id) == 0);
- valid_top_config = B_TRUE;
- }
-
- if (nvlist_lookup_uint64_array(tmp,
- ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
- &holes) == 0) {
- verify(nvlist_add_uint64_array(config,
- ZPOOL_CONFIG_HOLE_ARRAY,
- hole_array, holes) == 0);
- }
- }
-
- if (!config_seen) {
- /*
- * Copy the relevant pieces of data to the pool
- * configuration:
- *
- * version
- * pool guid
- * name
- * comment (if available)
- * pool state
- * hostid (if available)
- * hostname (if available)
- */
- uint64_t state, version;
- char *comment = NULL;
-
- version = fnvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_VERSION);
- fnvlist_add_uint64(config,
- ZPOOL_CONFIG_VERSION, version);
- guid = fnvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_POOL_GUID);
- fnvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_GUID, guid);
- name = fnvlist_lookup_string(tmp,
- ZPOOL_CONFIG_POOL_NAME);
- fnvlist_add_string(config,
- ZPOOL_CONFIG_POOL_NAME, name);
-
- if (nvlist_lookup_string(tmp,
- ZPOOL_CONFIG_COMMENT, &comment) == 0)
- fnvlist_add_string(config,
- ZPOOL_CONFIG_COMMENT, comment);
-
- state = fnvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_POOL_STATE);
- fnvlist_add_uint64(config,
- ZPOOL_CONFIG_POOL_STATE, state);
-
- hostid = 0;
- if (nvlist_lookup_uint64(tmp,
- ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
- fnvlist_add_uint64(config,
- ZPOOL_CONFIG_HOSTID, hostid);
- hostname = fnvlist_lookup_string(tmp,
- ZPOOL_CONFIG_HOSTNAME);
- fnvlist_add_string(config,
- ZPOOL_CONFIG_HOSTNAME, hostname);
- }
-
- config_seen = B_TRUE;
- }
-
- /*
- * Add this top-level vdev to the child array.
- */
- verify(nvlist_lookup_nvlist(tmp,
- ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
- verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
- &id) == 0);
-
- if (id >= children) {
- nvlist_t **newchild;
-
- newchild = zfs_alloc(hdl, (id + 1) *
- sizeof (nvlist_t *));
- if (newchild == NULL)
- goto nomem;
-
- for (c = 0; c < children; c++)
- newchild[c] = child[c];
-
- free(child);
- child = newchild;
- children = id + 1;
- }
- if (nvlist_dup(nvtop, &child[id], 0) != 0)
- goto nomem;
-
- }
-
- /*
- * If we have information about all the top-levels then
- * clean up the nvlist which we've constructed. This
- * means removing any extraneous devices that are
- * beyond the valid range or adding devices to the end
- * of our array which appear to be missing.
- */
- if (valid_top_config) {
- if (max_id < children) {
- for (c = max_id; c < children; c++)
- nvlist_free(child[c]);
- children = max_id;
- } else if (max_id > children) {
- nvlist_t **newchild;
-
- newchild = zfs_alloc(hdl, (max_id) *
- sizeof (nvlist_t *));
- if (newchild == NULL)
- goto nomem;
-
- for (c = 0; c < children; c++)
- newchild[c] = child[c];
-
- free(child);
- child = newchild;
- children = max_id;
- }
- }
-
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
-
- /*
- * The vdev namespace may contain holes as a result of
- * device removal. We must add them back into the vdev
- * tree before we process any missing devices.
- */
- if (holes > 0) {
- ASSERT(valid_top_config);
-
- for (c = 0; c < children; c++) {
- nvlist_t *holey;
-
- if (child[c] != NULL ||
- !vdev_is_hole(hole_array, holes, c))
- continue;
-
- if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
- 0) != 0)
- goto nomem;
-
- /*
- * Holes in the namespace are treated as
- * "hole" top-level vdevs and have a
- * special flag set on them.
- */
- if (nvlist_add_string(holey,
- ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_HOLE) != 0 ||
- nvlist_add_uint64(holey,
- ZPOOL_CONFIG_ID, c) != 0 ||
- nvlist_add_uint64(holey,
- ZPOOL_CONFIG_GUID, 0ULL) != 0) {
- nvlist_free(holey);
- goto nomem;
- }
- child[c] = holey;
- }
- }
-
- /*
- * Look for any missing top-level vdevs. If this is the case,
- * create a faked up 'missing' vdev as a placeholder. We cannot
- * simply compress the child array, because the kernel performs
- * certain checks to make sure the vdev IDs match their location
- * in the configuration.
- */
- for (c = 0; c < children; c++) {
- if (child[c] == NULL) {
- nvlist_t *missing;
- if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
- 0) != 0)
- goto nomem;
- if (nvlist_add_string(missing,
- ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_MISSING) != 0 ||
- nvlist_add_uint64(missing,
- ZPOOL_CONFIG_ID, c) != 0 ||
- nvlist_add_uint64(missing,
- ZPOOL_CONFIG_GUID, 0ULL) != 0) {
- nvlist_free(missing);
- goto nomem;
- }
- child[c] = missing;
- }
- }
-
- /*
- * Put all of this pool's top-level vdevs into a root vdev.
- */
- if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
- goto nomem;
- if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
- VDEV_TYPE_ROOT) != 0 ||
- nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
- nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
- nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
- child, children) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
-
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
- children = 0;
- child = NULL;
-
- /*
- * Go through and fix up any paths and/or devids based on our
- * known list of vdev GUID -> path mappings.
- */
- if (fix_paths(nvroot, pl->names) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
-
- /*
- * Add the root vdev to this pool's configuration.
- */
- if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- nvroot) != 0) {
- nvlist_free(nvroot);
- goto nomem;
- }
- nvlist_free(nvroot);
-
- /*
- * zdb uses this path to report on active pools that were
- * imported or created using -R.
- */
- if (active_ok)
- goto add_pool;
-
- /*
- * Determine if this pool is currently active, in which case we
- * can't actually import it.
- */
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- &guid) == 0);
-
- if (pool_active(hdl, name, guid, &isactive) != 0)
- goto error;
-
- if (isactive) {
- nvlist_free(config);
- config = NULL;
- continue;
- }
-
- if (policy != NULL) {
- if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
- policy) != 0)
- goto nomem;
- }
-
- if ((nvl = refresh_config(hdl, config)) == NULL) {
- nvlist_free(config);
- config = NULL;
- continue;
- }
-
- nvlist_free(config);
- config = nvl;
-
- /*
- * Go through and update the paths for spares, now that we have
- * them.
- */
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
- &spares, &nspares) == 0) {
- for (i = 0; i < nspares; i++) {
- if (fix_paths(spares[i], pl->names) != 0)
- goto nomem;
- }
- }
-
- /*
- * Update the paths for l2cache devices.
- */
- if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
- &l2cache, &nl2cache) == 0) {
- for (i = 0; i < nl2cache; i++) {
- if (fix_paths(l2cache[i], pl->names) != 0)
- goto nomem;
- }
- }
-
- /*
- * Restore the original information read from the actual label.
- */
- (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
- DATA_TYPE_UINT64);
- (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
- DATA_TYPE_STRING);
- if (hostid != 0) {
- verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
- hostid) == 0);
- verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
- hostname) == 0);
- }
-
-add_pool:
- /*
- * Add this pool to the list of configs.
- */
- verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
- &name) == 0);
- if (nvlist_add_nvlist(ret, name, config) != 0)
- goto nomem;
-
- found_one = B_TRUE;
- nvlist_free(config);
- config = NULL;
- }
-
- if (!found_one) {
- nvlist_free(ret);
- ret = NULL;
- }
-
- return (ret);
-
-nomem:
- (void) no_memory(hdl);
-error:
- nvlist_free(config);
- nvlist_free(ret);
- for (c = 0; c < children; c++)
- nvlist_free(child[c]);
- free(child);
-
- return (NULL);
+ return (pool_active((libzfs_handle_t *)handle, name, guid, isactive));
}
+const pool_config_ops_t libzfs_config_ops = {
+ .pco_refresh_config = refresh_config_libzfs,
+ .pco_pool_active = pool_active_libzfs,
+};
+
/*
* Return the offset of the given label.
*/
@@ -839,232 +160,6 @@ label_offset(uint64_t size, int l)
}
/*
- * Given a file descriptor, read the label information and return an nvlist
- * describing the configuration, if there is one.
- * Return 0 on success, or -1 on failure
- */
-int
-zpool_read_label(int fd, nvlist_t **config)
-{
- struct stat64 statbuf;
- int l;
- vdev_label_t *label;
- uint64_t state, txg, size;
-
- *config = NULL;
-
- if (fstat64(fd, &statbuf) == -1)
- return (-1);
- size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
-
- if ((label = malloc(sizeof (vdev_label_t))) == NULL)
- return (-1);
-
- for (l = 0; l < VDEV_LABELS; l++) {
- if (pread64(fd, label, sizeof (vdev_label_t),
- label_offset(size, l)) != sizeof (vdev_label_t))
- continue;
-
- if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
- sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
- continue;
-
- if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
- &state) != 0 || state > POOL_STATE_L2CACHE) {
- nvlist_free(*config);
- continue;
- }
-
- if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
- (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
- &txg) != 0 || txg == 0)) {
- nvlist_free(*config);
- continue;
- }
-
- free(label);
- return (0);
- }
-
- free(label);
- *config = NULL;
- errno = ENOENT;
- return (-1);
-}
-
-typedef struct rdsk_node {
- char *rn_name;
- int rn_dfd;
- libzfs_handle_t *rn_hdl;
- nvlist_t *rn_config;
- avl_tree_t *rn_avl;
- avl_node_t rn_node;
- boolean_t rn_nozpool;
-} rdsk_node_t;
-
-static int
-slice_cache_compare(const void *arg1, const void *arg2)
-{
- const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
- const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
- char *nm1slice, *nm2slice;
- int rv;
-
- /*
- * slices zero and two are the most likely to provide results,
- * so put those first
- */
- nm1slice = strstr(nm1, "s0");
- nm2slice = strstr(nm2, "s0");
- if (nm1slice && !nm2slice) {
- return (-1);
- }
- if (!nm1slice && nm2slice) {
- return (1);
- }
- nm1slice = strstr(nm1, "s2");
- nm2slice = strstr(nm2, "s2");
- if (nm1slice && !nm2slice) {
- return (-1);
- }
- if (!nm1slice && nm2slice) {
- return (1);
- }
-
- rv = strcmp(nm1, nm2);
- if (rv == 0)
- return (0);
- return (rv > 0 ? 1 : -1);
-}
-
-static void
-check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
- diskaddr_t size, uint_t blksz)
-{
- rdsk_node_t tmpnode;
- rdsk_node_t *node;
- char sname[MAXNAMELEN];
-
- tmpnode.rn_name = &sname[0];
- (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
- diskname, partno);
- /*
- * protect against division by zero for disk labels that
- * contain a bogus sector size
- */
- if (blksz == 0)
- blksz = DEV_BSIZE;
- /* too small to contain a zpool? */
- if ((size < (SPA_MINDEVSIZE / blksz)) &&
- (node = avl_find(r, &tmpnode, NULL)))
- node->rn_nozpool = B_TRUE;
-}
-
-static void
-nozpool_all_slices(avl_tree_t *r, const char *sname)
-{
- char diskname[MAXNAMELEN];
- char *ptr;
- int i;
-
- (void) strncpy(diskname, sname, MAXNAMELEN);
- if (((ptr = strrchr(diskname, 's')) == NULL) &&
- ((ptr = strrchr(diskname, 'p')) == NULL))
- return;
- ptr[0] = 's';
- ptr[1] = '\0';
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i, 0, 1);
- ptr[0] = 'p';
- for (i = 0; i <= FD_NUMPART; i++)
- check_one_slice(r, diskname, i, 0, 1);
-}
-
-static void
-check_slices(avl_tree_t *r, int fd, const char *sname)
-{
- struct extvtoc vtoc;
- struct dk_gpt *gpt;
- char diskname[MAXNAMELEN];
- char *ptr;
- int i;
-
- (void) strncpy(diskname, sname, MAXNAMELEN);
- if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
- return;
- ptr[1] = '\0';
-
- if (read_extvtoc(fd, &vtoc) >= 0) {
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i,
- vtoc.v_part[i].p_size, vtoc.v_sectorsz);
- } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
- /*
- * on x86 we'll still have leftover links that point
- * to slices s[9-15], so use NDKMAP instead
- */
- for (i = 0; i < NDKMAP; i++)
- check_one_slice(r, diskname, i,
- gpt->efi_parts[i].p_size, gpt->efi_lbasize);
- /* nodes p[1-4] are never used with EFI labels */
- ptr[0] = 'p';
- for (i = 1; i <= FD_NUMPART; i++)
- check_one_slice(r, diskname, i, 0, 1);
- efi_free(gpt);
- }
-}
-
-static void
-zpool_open_func(void *arg)
-{
- rdsk_node_t *rn = arg;
- struct stat64 statbuf;
- nvlist_t *config;
- int fd;
-
- if (rn->rn_nozpool)
- return;
- if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
- /* symlink to a device that's no longer there */
- if (errno == ENOENT)
- nozpool_all_slices(rn->rn_avl, rn->rn_name);
- return;
- }
- /*
- * Ignore failed stats. We only want regular
- * files, character devs and block devs.
- */
- if (fstat64(fd, &statbuf) != 0 ||
- (!S_ISREG(statbuf.st_mode) &&
- !S_ISCHR(statbuf.st_mode) &&
- !S_ISBLK(statbuf.st_mode))) {
- (void) close(fd);
- return;
- }
- /* this file is too small to hold a zpool */
- if (S_ISREG(statbuf.st_mode) &&
- statbuf.st_size < SPA_MINDEVSIZE) {
- (void) close(fd);
- return;
- } else if (!S_ISREG(statbuf.st_mode)) {
- /*
- * Try to read the disk label first so we don't have to
- * open a bunch of minor nodes that can't have a zpool.
- */
- check_slices(rn->rn_avl, fd, rn->rn_name);
- }
-
- if ((zpool_read_label(fd, &config)) != 0 && errno == ENOMEM) {
- (void) close(fd);
- (void) no_memory(rn->rn_hdl);
- return;
- }
- (void) close(fd);
-
- rn->rn_config = config;
-}
-
-/*
* Given a file descriptor, clear (zero) the label information.
*/
int
@@ -1094,433 +189,6 @@ zpool_clear_label(int fd)
return (0);
}
-/*
- * Given a list of directories to search, find all pools stored on disk. This
- * includes partial pools which are not available to import. If no args are
- * given (argc is 0), then the default directory (/dev/dsk) is searched.
- * poolname or guid (but not both) are provided by the caller when trying
- * to import a specific pool.
- */
-static nvlist_t *
-zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
-{
- int i, dirs = iarg->paths;
- struct dirent64 *dp;
- char path[MAXPATHLEN];
- char *end, **dir = iarg->path;
- size_t pathleft;
- nvlist_t *ret = NULL;
- static char *default_dir = ZFS_DISK_ROOT;
- pool_list_t pools = { 0 };
- pool_entry_t *pe, *penext;
- vdev_entry_t *ve, *venext;
- config_entry_t *ce, *cenext;
- name_entry_t *ne, *nenext;
- avl_tree_t slice_cache;
- rdsk_node_t *slice;
- void *cookie;
-
- if (dirs == 0) {
- dirs = 1;
- dir = &default_dir;
- }
-
- /*
- * Go through and read the label configuration information from every
- * possible device, organizing the information according to pool GUID
- * and toplevel GUID.
- */
- for (i = 0; i < dirs; i++) {
- tpool_t *t;
- char rdsk[MAXPATHLEN];
- int dfd;
- boolean_t config_failed = B_FALSE;
- DIR *dirp;
-
- /* use realpath to normalize the path */
- if (realpath(dir[i], path) == 0) {
- (void) zfs_error_fmt(hdl, EZFS_BADPATH,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
- goto error;
- }
- end = &path[strlen(path)];
- *end++ = '/';
- *end = 0;
- pathleft = &path[sizeof (path)] - end;
-
- /*
- * Using raw devices instead of block devices when we're
- * reading the labels skips a bunch of slow operations during
- * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
- */
- if (strcmp(path, ZFS_DISK_ROOTD) == 0)
- (void) strlcpy(rdsk, ZFS_RDISK_ROOTD, sizeof (rdsk));
- else
- (void) strlcpy(rdsk, path, sizeof (rdsk));
-
- if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
- (dirp = fdopendir(dfd)) == NULL) {
- if (dfd >= 0)
- (void) close(dfd);
- zfs_error_aux(hdl, strerror(errno));
- (void) zfs_error_fmt(hdl, EZFS_BADPATH,
- dgettext(TEXT_DOMAIN, "cannot open '%s'"),
- rdsk);
- goto error;
- }
-
- avl_create(&slice_cache, slice_cache_compare,
- sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
- /*
- * This is not MT-safe, but we have no MT consumers of libzfs
- */
- while ((dp = readdir64(dirp)) != NULL) {
- const char *name = dp->d_name;
- if (name[0] == '.' &&
- (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
- continue;
-
- slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
- slice->rn_name = zfs_strdup(hdl, name);
- slice->rn_avl = &slice_cache;
- slice->rn_dfd = dfd;
- slice->rn_hdl = hdl;
- slice->rn_nozpool = B_FALSE;
- avl_add(&slice_cache, slice);
- }
- /*
- * create a thread pool to do all of this in parallel;
- * rn_nozpool is not protected, so this is racy in that
- * multiple tasks could decide that the same slice can
- * not hold a zpool, which is benign. Also choose
- * double the number of processors; we hold a lot of
- * locks in the kernel, so going beyond this doesn't
- * buy us much.
- */
- t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
- 0, NULL);
- for (slice = avl_first(&slice_cache); slice;
- (slice = avl_walk(&slice_cache, slice,
- AVL_AFTER)))
- (void) tpool_dispatch(t, zpool_open_func, slice);
- tpool_wait(t);
- tpool_destroy(t);
-
- cookie = NULL;
- while ((slice = avl_destroy_nodes(&slice_cache,
- &cookie)) != NULL) {
- if (slice->rn_config != NULL && !config_failed) {
- nvlist_t *config = slice->rn_config;
- boolean_t matched = B_TRUE;
-
- if (iarg->poolname != NULL) {
- char *pname;
-
- matched = nvlist_lookup_string(config,
- ZPOOL_CONFIG_POOL_NAME,
- &pname) == 0 &&
- strcmp(iarg->poolname, pname) == 0;
- } else if (iarg->guid != 0) {
- uint64_t this_guid;
-
- matched = nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_POOL_GUID,
- &this_guid) == 0 &&
- iarg->guid == this_guid;
- }
- if (matched) {
- /*
- * use the non-raw path for the config
- */
- (void) strlcpy(end, slice->rn_name,
- pathleft);
- if (add_config(hdl, &pools, path,
- config) != 0)
- config_failed = B_TRUE;
- }
- nvlist_free(config);
- }
- free(slice->rn_name);
- free(slice);
- }
- avl_destroy(&slice_cache);
-
- (void) closedir(dirp);
-
- if (config_failed)
- goto error;
- }
-
- ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy);
-
-error:
- for (pe = pools.pools; pe != NULL; pe = penext) {
- penext = pe->pe_next;
- for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
- venext = ve->ve_next;
- for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
- cenext = ce->ce_next;
- nvlist_free(ce->ce_config);
- free(ce);
- }
- free(ve);
- }
- free(pe);
- }
-
- for (ne = pools.names; ne != NULL; ne = nenext) {
- nenext = ne->ne_next;
- free(ne->ne_name);
- free(ne);
- }
-
- return (ret);
-}
-
-nvlist_t *
-zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
-{
- importargs_t iarg = { 0 };
-
- iarg.paths = argc;
- iarg.path = argv;
-
- return (zpool_find_import_impl(hdl, &iarg));
-}
-
-/*
- * Given a cache file, return the contents as a list of importable pools.
- * poolname or guid (but not both) are provided by the caller when trying
- * to import a specific pool.
- */
-nvlist_t *
-zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
- char *poolname, uint64_t guid)
-{
- char *buf;
- int fd;
- struct stat64 statbuf;
- nvlist_t *raw, *src, *dst;
- nvlist_t *pools;
- nvpair_t *elem;
- char *name;
- uint64_t this_guid;
- boolean_t active;
-
- verify(poolname == NULL || guid == 0);
-
- if ((fd = open(cachefile, O_RDONLY)) < 0) {
- zfs_error_aux(hdl, "%s", strerror(errno));
- (void) zfs_error(hdl, EZFS_BADCACHE,
- dgettext(TEXT_DOMAIN, "failed to open cache file"));
- return (NULL);
- }
-
- if (fstat64(fd, &statbuf) != 0) {
- zfs_error_aux(hdl, "%s", strerror(errno));
- (void) close(fd);
- (void) zfs_error(hdl, EZFS_BADCACHE,
- dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
- return (NULL);
- }
-
- if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
- (void) close(fd);
- return (NULL);
- }
-
- if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
- (void) close(fd);
- free(buf);
- (void) zfs_error(hdl, EZFS_BADCACHE,
- dgettext(TEXT_DOMAIN,
- "failed to read cache file contents"));
- return (NULL);
- }
-
- (void) close(fd);
-
- if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
- free(buf);
- (void) zfs_error(hdl, EZFS_BADCACHE,
- dgettext(TEXT_DOMAIN,
- "invalid or corrupt cache file contents"));
- return (NULL);
- }
-
- free(buf);
-
- /*
- * Go through and get the current state of the pools and refresh their
- * state.
- */
- if (nvlist_alloc(&pools, 0, 0) != 0) {
- (void) no_memory(hdl);
- nvlist_free(raw);
- return (NULL);
- }
-
- elem = NULL;
- while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
- src = fnvpair_value_nvlist(elem);
-
- name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
- if (poolname != NULL && strcmp(poolname, name) != 0)
- continue;
-
- this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
- if (guid != 0 && guid != this_guid)
- continue;
-
- if (pool_active(hdl, name, this_guid, &active) != 0) {
- nvlist_free(raw);
- nvlist_free(pools);
- return (NULL);
- }
-
- if (active)
- continue;
-
- if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE,
- cachefile) != 0) {
- (void) no_memory(hdl);
- nvlist_free(raw);
- nvlist_free(pools);
- return (NULL);
- }
-
- if ((dst = refresh_config(hdl, src)) == NULL) {
- nvlist_free(raw);
- nvlist_free(pools);
- return (NULL);
- }
-
- if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
- (void) no_memory(hdl);
- nvlist_free(dst);
- nvlist_free(raw);
- nvlist_free(pools);
- return (NULL);
- }
- nvlist_free(dst);
- }
-
- nvlist_free(raw);
- return (pools);
-}
-
-static int
-name_or_guid_exists(zpool_handle_t *zhp, void *data)
-{
- importargs_t *import = data;
- int found = 0;
-
- if (import->poolname != NULL) {
- char *pool_name;
-
- verify(nvlist_lookup_string(zhp->zpool_config,
- ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
- if (strcmp(pool_name, import->poolname) == 0)
- found = 1;
- } else {
- uint64_t pool_guid;
-
- verify(nvlist_lookup_uint64(zhp->zpool_config,
- ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
- if (pool_guid == import->guid)
- found = 1;
- }
-
- zpool_close(zhp);
- return (found);
-}
-
-nvlist_t *
-zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
-{
- nvlist_t *pools = NULL;
-
- verify(import->poolname == NULL || import->guid == 0);
-
- if (import->unique)
- import->exists = zpool_iter(hdl, name_or_guid_exists, import);
-
- if (import->cachefile != NULL)
- pools = zpool_find_import_cached(hdl, import->cachefile,
- import->poolname, import->guid);
- else
- pools = zpool_find_import_impl(hdl, import);
-
- return (pools);
-}
-
-static boolean_t
-pool_match(nvlist_t *cfg, char *tgt)
-{
- uint64_t v, guid = strtoull(tgt, NULL, 0);
- char *s;
-
- if (guid != 0) {
- if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
- return (v == guid);
- } else {
- if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
- return (strcmp(s, tgt) == 0);
- }
- return (B_FALSE);
-}
-
-int
-zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp,
- importargs_t *args)
-{
- nvlist_t *pools;
- nvlist_t *match = NULL;
- nvlist_t *config = NULL;
- char *sepp = NULL;
- int count = 0;
- char *targetdup = strdup(target);
-
- *configp = NULL;
-
- if ((sepp = strpbrk(targetdup, "/@")) != NULL) {
- *sepp = '\0';
- }
-
- pools = zpool_search_import(hdl, args);
-
- if (pools != NULL) {
- nvpair_t *elem = NULL;
- while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
- VERIFY0(nvpair_value_nvlist(elem, &config));
- if (pool_match(config, targetdup)) {
- count++;
- if (match != NULL) {
- /* multiple matches found */
- continue;
- } else {
- match = config;
- }
- }
- }
- }
-
- if (count == 0) {
- free(targetdup);
- return (ENOENT);
- }
-
- if (count > 1) {
- free(targetdup);
- return (EINVAL);
- }
-
- *configp = match;
- free(targetdup);
-
- return (0);
-}
-
boolean_t
find_guid(nvlist_t *nv, uint64_t guid)
{
@@ -1597,7 +265,7 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
*inuse = B_FALSE;
- if (zpool_read_label(fd, &config) != 0 && errno == ENOMEM) {
+ if (zpool_read_label(fd, &config, NULL) != 0 && errno == ENOMEM) {
(void) no_memory(hdl);
return (-1);
}
diff --git a/usr/src/lib/libzfs/common/libzfs_iter.c b/usr/src/lib/libzfs/common/libzfs_iter.c
index 9f36f1234f..9e160087b3 100644
--- a/usr/src/lib/libzfs/common/libzfs_iter.c
+++ b/usr/src/lib/libzfs/common/libzfs_iter.c
@@ -34,6 +34,7 @@
#include <stddef.h>
#include <libintl.h>
#include <libzfs.h>
+#include <libzutil.h>
#include "libzfs_impl.h"
diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c
index aea93fb91e..faa750f121 100644
--- a/usr/src/lib/libzfs/common/libzfs_mount.c
+++ b/usr/src/lib/libzfs/common/libzfs_mount.c
@@ -20,9 +20,12 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
+ */
+
+/*
+ * Copyright 2019 Nexenta Systems, Inc.
+ * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright 2017 Joyent, Inc.
* Copyright 2017 RackTop Systems.
@@ -621,8 +624,7 @@ static char *(*_sa_errorstr)(int);
static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
-static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t,
- char *, char *, zprop_source_t, char *, char *, char *);
+static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *);
static void (*_sa_update_sharetab_ts)(sa_handle_t);
/*
@@ -670,9 +672,8 @@ _zfs_init_libshare(void)
dlsym(libshare, "sa_needs_refresh");
_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
dlsym(libshare, "sa_get_zfs_handle");
- _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t,
- sa_share_t, char *, char *, zprop_source_t, char *,
- char *, char *))dlsym(libshare, "sa_zfs_process_share");
+ _sa_get_zfs_share = (int (*)(sa_handle_t, char *,
+ zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share");
_sa_update_sharetab_ts = (void (*)(sa_handle_t))
dlsym(libshare, "sa_update_sharetab_ts");
if (_sa_init == NULL || _sa_init_arg == NULL ||
@@ -680,7 +681,7 @@ _zfs_init_libshare(void)
_sa_enable_share == NULL || _sa_disable_share == NULL ||
_sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
_sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
- _sa_zfs_process_share == NULL || _sa_service == NULL ||
+ _sa_get_zfs_share == NULL || _sa_service == NULL ||
_sa_update_sharetab_ts == NULL) {
_sa_init = NULL;
_sa_init_arg = NULL;
@@ -693,7 +694,7 @@ _zfs_init_libshare(void)
(void) dlclose(libshare);
_sa_needs_refresh = NULL;
_sa_get_zfs_handle = NULL;
- _sa_zfs_process_share = NULL;
+ _sa_get_zfs_share = NULL;
_sa_update_sharetab_ts = NULL;
}
}
@@ -880,30 +881,17 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
return (-1);
}
- /*
- * If the 'zoned' property is set, then zfs_is_mountable()
- * will have already bailed out if we are in the global zone.
- * But local zones cannot be NFS servers, so we ignore it for
- * local zones as well.
- */
- if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
- continue;
-
share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
if (share == NULL) {
/*
* This may be a new file system that was just
- * created so isn't in the internal cache
- * (second time through). Rather than
- * reloading the entire configuration, we can
- * assume ZFS has done the checking and it is
- * safe to add this to the internal
- * configuration.
+ * created so isn't in the internal cache.
+ * Rather than reloading the entire configuration,
+ * we can add just this one share to the cache.
*/
- if (_sa_zfs_process_share(hdl->libzfs_sharehdl,
- NULL, NULL, mountpoint,
- proto_table[*curr_proto].p_name, sourcetype,
- shareopts, sourcestr, zhp->zfs_name) != SA_OK) {
+ if ((_sa_get_zfs_share == NULL) ||
+ (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp)
+ != SA_OK)) {
(void) zfs_error_fmt(hdl,
proto_table[*curr_proto].p_share_err,
dgettext(TEXT_DOMAIN, "cannot share '%s'"),
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index ae8d607b6c..c5b8b60f0d 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -43,6 +43,7 @@
#include <sys/vtoc.h>
#include <sys/zfs_ioctl.h>
#include <dlfcn.h>
+#include <libzutil.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@@ -4302,47 +4303,6 @@ get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
}
/*
- * Process the buffer of nvlists, unpacking and storing each nvlist record
- * into 'records'. 'leftover' is set to the number of bytes that weren't
- * processed as there wasn't a complete record.
- */
-int
-zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
- nvlist_t ***records, uint_t *numrecords)
-{
- uint64_t reclen;
- nvlist_t *nv;
- int i;
-
- while (bytes_read > sizeof (reclen)) {
-
- /* get length of packed record (stored as little endian) */
- for (i = 0, reclen = 0; i < sizeof (reclen); i++)
- reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
-
- if (bytes_read < sizeof (reclen) + reclen)
- break;
-
- /* unpack record */
- if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
- return (ENOMEM);
- bytes_read -= sizeof (reclen) + reclen;
- buf += sizeof (reclen) + reclen;
-
- /* add record to nvlist array */
- (*numrecords)++;
- if (ISP2(*numrecords + 1)) {
- *records = realloc(*records,
- *numrecords * 2 * sizeof (nvlist_t *));
- }
- (*records)[*numrecords - 1] = nv;
- }
-
- *leftover = bytes_read;
- return (0);
-}
-
-/*
* Retrieve the command history of a pool.
*/
int
diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
index 174380a777..2bfb49875d 100644
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
@@ -49,6 +49,7 @@
#include <libzfs.h>
#include <libzfs_core.h>
+#include <libzutil.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c
index 46ea7f944f..c12736e4de 100644
--- a/usr/src/lib/libzfs/common/libzfs_status.c
+++ b/usr/src/lib/libzfs/common/libzfs_status.c
@@ -42,6 +42,7 @@
*/
#include <libzfs.h>
+#include <libzutil.h>
#include <string.h>
#include <unistd.h>
#include "libzfs_impl.h"
@@ -437,68 +438,3 @@ zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata)
return (ret);
}
-
-static void
-dump_ddt_stat(const ddt_stat_t *dds, int h)
-{
- char refcnt[6];
- char blocks[6], lsize[6], psize[6], dsize[6];
- char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
-
- if (dds == NULL || dds->dds_blocks == 0)
- return;
-
- if (h == -1)
- (void) strcpy(refcnt, "Total");
- else
- zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
-
- zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
- zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
- zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
- zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
- zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
- zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
- zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
- zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
-
- (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
- refcnt,
- blocks, lsize, psize, dsize,
- ref_blocks, ref_lsize, ref_psize, ref_dsize);
-}
-
-/*
- * Print the DDT histogram and the column totals.
- */
-void
-zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
-{
- int h;
-
- (void) printf("\n");
-
- (void) printf("bucket "
- " allocated "
- " referenced \n");
- (void) printf("______ "
- "______________________________ "
- "______________________________\n");
-
- (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
- "refcnt",
- "blocks", "LSIZE", "PSIZE", "DSIZE",
- "blocks", "LSIZE", "PSIZE", "DSIZE");
-
- (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
- "------",
- "------", "-----", "-----", "-----",
- "------", "-----", "-----", "-----");
-
- for (h = 0; h < 64; h++)
- dump_ddt_stat(&ddh->ddh_stat[h], h);
-
- dump_ddt_stat(dds_total, -1);
-
- (void) printf("\n");
-}
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index 4d91e7b1ae..31d66e9469 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
@@ -31,6 +31,7 @@
* Internal utility routines for the ZFS library.
*/
+#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libintl.h>
@@ -38,8 +39,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
-#include <unistd.h>
-#include <ctype.h>
#include <math.h>
#include <sys/filio.h>
#include <sys/mnttab.h>
@@ -54,6 +53,7 @@
#include "zfs_prop.h"
#include "zfs_comutil.h"
#include "zfeature_common.h"
+#include <libzutil.h>
int
libzfs_errno(libzfs_handle_t *hdl)
@@ -633,15 +633,6 @@ zfs_strdup(libzfs_handle_t *hdl, const char *str)
return (ret);
}
-/*
- * Convert a number to an appropriately human-readable output.
- */
-void
-zfs_nicenum(uint64_t num, char *buf, size_t buflen)
-{
- nicenum(num, buf, buflen);
-}
-
void
libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
{
@@ -1662,20 +1653,3 @@ zfs_get_hole_count(const char *path, uint64_t *count, uint64_t *bs)
}
return (0);
}
-
-ulong_t
-get_system_hostid(void)
-{
- char *env;
-
- /*
- * Allow the hostid to be subverted for testing.
- */
- env = getenv("ZFS_HOSTID");
- if (env) {
- ulong_t hostid = strtoull(env, NULL, 16);
- return (hostid & 0xFFFFFFFF);
- }
-
- return (gethostid());
-}
diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers
index c402a25dd6..5ad0f9d4eb 100644
--- a/usr/src/lib/libzfs/common/mapfile-vers
+++ b/usr/src/lib/libzfs/common/mapfile-vers
@@ -24,7 +24,7 @@
# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright (c) 2011, 2017 by Delphix. All rights reserved.
# Copyright 2016 Nexenta Systems, Inc.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
#
@@ -51,8 +51,8 @@ SYMBOL_VERSION SUNWprivate_1.1 {
fletcher_4_byteswap;
fletcher_4_incremental_native;
fletcher_4_incremental_byteswap;
- get_system_hostid;
libzfs_add_handle;
+ libzfs_config_ops;
libzfs_errno;
libzfs_error_action;
libzfs_error_description;
@@ -113,7 +113,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zfs_mount;
zfs_name_to_prop;
zfs_name_valid;
- zfs_nicenum;
zfs_nicestrtonum;
zfs_open;
zfs_path_to_zhandle;
@@ -198,14 +197,11 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zpool_destroy;
zpool_disable_datasets;
zpool_discard_checkpoint;
- zpool_dump_ddt;
zpool_enable_datasets;
zpool_expand_proplist;
zpool_explain_recover;
zpool_export;
zpool_export_force;
- zpool_find_import;
- zpool_find_import_cached;
zpool_find_vdev;
zpool_find_vdev_by_physpath;
zpool_fru_set;
@@ -220,7 +216,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zpool_get_prop_int;
zpool_get_state;
zpool_get_status;
- zpool_history_unpack;
zpool_import;
zpool_import_props;
zpool_import_status;
@@ -245,18 +240,15 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zpool_prop_to_name;
zpool_prop_unsupported;
zpool_prop_values;
- zpool_read_label;
zpool_refresh_stats;
zpool_reguid;
zpool_reopen;
zpool_scan;
- zpool_search_import;
zpool_set_prop;
zpool_skip_pool;
zpool_state_to_name;
zpool_sync_one;
zpool_trim;
- zpool_tryimport;
zpool_unmount_datasets;
zpool_upgrade;
zpool_vdev_attach;
diff --git a/usr/src/lib/libzfs_jni/Makefile.com b/usr/src/lib/libzfs_jni/Makefile.com
index 7d2d4bca6c..9c3bc1e523 100644
--- a/usr/src/lib/libzfs_jni/Makefile.com
+++ b/usr/src/lib/libzfs_jni/Makefile.com
@@ -24,7 +24,7 @@
#
# Copyright (c) 2015 by Delphix. All rights reserved.
#
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
LIBRARY= libzfs_jni.a
@@ -43,9 +43,10 @@ include ../../Makefile.lib
LIBS= $(DYNLIB)
INCS += -I$(JAVA_ROOT)/include \
- -I$(JAVA_ROOT)/include/solaris
+ -I$(JAVA_ROOT)/include/solaris \
+ -I../../libzutil/common
-LDLIBS += -lc -lnvpair -ldiskmgt -lzfs
+LDLIBS += -lc -lnvpair -ldiskmgt -lzfs -lzutil
CPPFLAGS += $(INCS)
$(NOT_RELEASE_BUILD) CPPFLAGS += -DDEBUG
CERRWARN += -_gcc=-Wno-switch
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
index 2eb36c0145..e4fa074f91 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
@@ -21,10 +21,12 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
#include "libzfs_jni_util.h"
#include "libzfs_jni_pool.h"
+#include <libzutil.h>
#include <strings.h>
/*
@@ -753,7 +755,8 @@ create_MirrorVirtualDeviceBean(JNIEnv *env, zpool_handle_t *zhp,
}
static char *
-find_field(const zjni_field_mapping_t *mapping, int value) {
+find_field(const zjni_field_mapping_t *mapping, int value)
+{
int i;
for (i = 0; mapping[i].name != NULL; i++) {
if (value == mapping[i].value) {
@@ -1046,7 +1049,8 @@ zjni_get_VirtualDevices_from_vdev(JNIEnv *env, zpool_handle_t *zhp,
}
int
-zjni_create_add_ImportablePool(nvlist_t *config, void *data) {
+zjni_create_add_ImportablePool(nvlist_t *config, void *data)
+{
JNIEnv *env = ((zjni_ArrayCallbackData_t *)data)->env;
zjni_Collection_t *list = ((zjni_ArrayCallbackData_t *)data)->list;
@@ -1154,7 +1158,14 @@ zjni_pool_status_to_obj(JNIEnv *env, zpool_status_t status)
int
zjni_ipool_iter(int argc, char **argv, zjni_ipool_iter_f func, void *data)
{
- nvlist_t *pools = zpool_find_import(g_zfs, argc, argv);
+ nvlist_t *pools;
+ importargs_t iarg = { 0 };
+
+ iarg.paths = argc;
+ iarg.path = argv;
+ iarg.can_be_active = B_TRUE;
+
+ pools = zpool_search_import(g_zfs, &iarg, &libzfs_config_ops);
if (pools != NULL) {
nvpair_t *elem = NULL;
@@ -1173,21 +1184,25 @@ zjni_ipool_iter(int argc, char **argv, zjni_ipool_iter_f func, void *data)
}
char *
-zjni_vdev_state_to_str(vdev_state_t state) {
+zjni_vdev_state_to_str(vdev_state_t state)
+{
return (find_field(vdev_state_map, state));
}
char *
-zjni_vdev_aux_to_str(vdev_aux_t aux) {
+zjni_vdev_aux_to_str(vdev_aux_t aux)
+{
return (find_field(vdev_aux_map, aux));
}
char *
-zjni_pool_state_to_str(pool_state_t state) {
+zjni_pool_state_to_str(pool_state_t state)
+{
return (find_field(pool_state_map, state));
}
char *
-zjni_pool_status_to_str(zpool_status_t status) {
+zjni_pool_status_to_str(zpool_status_t status)
+{
return (find_field(zpool_status_map, status));
}
diff --git a/usr/src/lib/libzpool/Makefile.com b/usr/src/lib/libzpool/Makefile.com
index 01cd35020f..78c9c0b29a 100644
--- a/usr/src/lib/libzpool/Makefile.com
+++ b/usr/src/lib/libzpool/Makefile.com
@@ -21,7 +21,7 @@
#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2013, 2016 by Delphix. All rights reserved.
-# Copyright 2019, Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
LIBRARY= libzpool.a
@@ -56,6 +56,8 @@ INCS += -I../../../uts/common/fs/zfs/lua
INCS += -I../../../common/zfs
INCS += -I../../../common/lz4
INCS += -I../../../common
+INCS += -I../../libzutil/common
+
CLEANFILES += ../common/zfs.h
CLEANFILES += $(EXTPICS)
@@ -70,7 +72,7 @@ C99LMODE= -Xc99=%all
CFLAGS += $(CCGDEBUG) $(CCVERBOSE) $(CNOGLOBAL)
CFLAGS64 += $(CCGDEBUG) $(CCVERBOSE) $(CNOGLOBAL)
LDLIBS += -lcmdutils -lumem -lavl -lnvpair -lz -lc -lsysevent -lmd \
- -lfakekernel -lzfs
+ -lfakekernel -lzutil
NATIVE_LIBS += libz.so
CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common
CPPFLAGS += $(INCS) -DDEBUG -D_FAKE_KERNEL
diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c
index 95a1c54622..0d43302861 100644
--- a/usr/src/lib/libzpool/common/kernel.c
+++ b/usr/src/lib/libzpool/common/kernel.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2017 RackTop Systems.
*/
@@ -41,7 +41,7 @@
#include <sys/zmod.h>
#include <sys/utsname.h>
#include <sys/systeminfo.h>
-#include <libzfs.h>
+#include <libzutil.h>
#include <sys/crypto/common.h>
#include <sys/crypto/impl.h>
#include <sys/crypto/api.h>
diff --git a/usr/src/lib/libzpool/common/util.c b/usr/src/lib/libzpool/common/util.c
index 38b2e9e458..8525b5f299 100644
--- a/usr/src/lib/libzpool/common/util.c
+++ b/usr/src/lib/libzpool/common/util.c
@@ -34,7 +34,9 @@
#include <sys/spa.h>
#include <sys/fs/zfs.h>
#include <sys/refcount.h>
+#include <sys/zfs_ioctl.h>
#include <dlfcn.h>
+#include <libzutil.h>
extern void nicenum(uint64_t num, char *buf, size_t);
@@ -197,3 +199,56 @@ set_global_var(char *arg)
return (0);
}
+
+static nvlist_t *
+refresh_config(void *unused, nvlist_t *tryconfig)
+{
+ return (spa_tryimport(tryconfig));
+}
+
+static int
+pool_active(void *unused, const char *name, uint64_t guid,
+ boolean_t *isactive)
+{
+ zfs_cmd_t *zcp;
+ nvlist_t *innvl;
+ char *packed = NULL;
+ size_t size = 0;
+ int fd, ret;
+
+ /*
+ * Use ZFS_IOC_POOL_SYNC to confirm if a pool is active
+ */
+
+ fd = open("/dev/zfs", O_RDWR);
+ if (fd < 0)
+ return (-1);
+
+ zcp = umem_zalloc(sizeof (zfs_cmd_t), UMEM_NOFAIL);
+
+ innvl = fnvlist_alloc();
+ fnvlist_add_boolean_value(innvl, "force", B_FALSE);
+
+ (void) strlcpy(zcp->zc_name, name, sizeof (zcp->zc_name));
+ packed = fnvlist_pack(innvl, &size);
+ zcp->zc_nvlist_src = (uint64_t)(uintptr_t)packed;
+ zcp->zc_nvlist_src_size = size;
+
+ ret = ioctl(fd, ZFS_IOC_POOL_SYNC, zcp);
+
+ fnvlist_pack_free(packed, size);
+ free((void *)(uintptr_t)zcp->zc_nvlist_dst);
+ nvlist_free(innvl);
+ umem_free(zcp, sizeof (zfs_cmd_t));
+
+ (void) close(fd);
+
+ *isactive = (ret == 0);
+
+ return (0);
+}
+
+const pool_config_ops_t libzpool_config_ops = {
+ .pco_refresh_config = refresh_config,
+ .pco_pool_active = pool_active,
+};
diff --git a/usr/src/lib/libzutil/Makefile b/usr/src/lib/libzutil/Makefile
new file mode 100644
index 0000000000..745a6eab95
--- /dev/null
+++ b/usr/src/lib/libzutil/Makefile
@@ -0,0 +1,51 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.lib
+
+HDRS= libzutil.h
+
+HDRDIR= common
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+
+MSGFILES = `$(GREP) -l gettext $(HDRDIR)/*.[ch]`
+POFILE = libzutil.po
+
+.KEEP_STATE:
+
+all clean clobber install: $(SUBDIRS)
+
+$(POFILE): pofile_MSGFILES
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+_msg: $(MSGDOMAINPOFILE)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
+include ../../Makefile.msg.targ
diff --git a/usr/src/lib/libzutil/Makefile.com b/usr/src/lib/libzutil/Makefile.com
new file mode 100644
index 0000000000..9ec9f67dab
--- /dev/null
+++ b/usr/src/lib/libzutil/Makefile.com
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+LIBRARY= libzutil.a
+VERS= .1
+
+OBJECTS= \
+ zutil_import.o \
+ zutil_nicenum.o \
+ zutil_pool.o
+
+include ../../Makefile.lib
+
+# libzutil must be installed in the root filesystem for mount(1M)
+include ../../Makefile.rootfs
+
+LIBS= $(DYNLIB) $(LINTLIB)
+
+SRCDIR = ../common
+
+INCS += -I$(SRCDIR)
+INCS += -I../../../uts/common/fs/zfs
+INCS += -I../../libc/inc
+
+CSTD= $(CSTD_GNU99)
+C99LMODE= -Xc99=%all
+LDLIBS += -lc -lm -ldevid -lnvpair -ladm -lavl -lefi
+CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT
+$(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG
+
+SRCS= $(OBJECTS:%.o=$(SRCDIR)/%.c)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/libzutil/amd64/Makefile b/usr/src/lib/libzutil/amd64/Makefile
new file mode 100644
index 0000000000..5a2ea08b45
--- /dev/null
+++ b/usr/src/lib/libzutil/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libzutil/common/libzutil.h b/usr/src/lib/libzutil/common/libzutil.h
new file mode 100644
index 0000000000..37ab9ee125
--- /dev/null
+++ b/usr/src/lib/libzutil/common/libzutil.h
@@ -0,0 +1,122 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018 by Delphix. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _LIBZUTIL_H
+#define _LIBZUTIL_H
+
+#include <sys/nvpair.h>
+#include <sys/fs/zfs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Default wait time for a device name to be created.
+ */
+#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
+
+
+/*
+ * Pool Config Operations
+ *
+ * These are specific to the library libzfs or libzpool instance.
+ */
+typedef nvlist_t *refresh_config_func_t(void *, nvlist_t *);
+
+typedef int pool_active_func_t(void *, const char *, uint64_t, boolean_t *);
+
+typedef struct pool_config_ops {
+ refresh_config_func_t *pco_refresh_config;
+ pool_active_func_t *pco_pool_active;
+} pool_config_ops_t;
+
+/*
+ * An instance of pool_config_ops_t is expected in the caller's binary.
+ */
+extern const pool_config_ops_t libzfs_config_ops;
+extern const pool_config_ops_t libzpool_config_ops;
+
+typedef struct importargs {
+ char **path; /* a list of paths to search */
+ int paths; /* number of paths to search */
+ const char *poolname; /* name of a pool to find */
+ uint64_t guid; /* guid of a pool to find */
+ const char *cachefile; /* cachefile to use for import */
+ boolean_t can_be_active; /* can the pool be active? */
+ boolean_t scan; /* prefer scanning to libblkid cache */
+ nvlist_t *policy; /* load policy (max txg, rewind, etc.) */
+} importargs_t;
+
+extern nvlist_t *zpool_search_import(void *, importargs_t *,
+ const pool_config_ops_t *);
+extern int zpool_find_config(void *, const char *, nvlist_t **, importargs_t *,
+ const pool_config_ops_t *);
+
+extern int zpool_read_label(int, nvlist_t **, int *);
+
+extern boolean_t zfs_isnumber(const char *);
+
+/*
+ * Formats for iostat numbers. Examples: "12K", "30ms", "4B", "2321234", "-".
+ *
+ * ZFS_NICENUM_1024: Print kilo, mega, tera, peta, exa..
+ * ZFS_NICENUM_BYTES: Print single bytes ("13B"), kilo, mega, tera...
+ * ZFS_NICENUM_TIME: Print nanosecs, microsecs, millisecs, seconds...
+ * ZFS_NICENUM_RAW: Print the raw number without any formatting
+ * ZFS_NICENUM_RAWTIME: Same as RAW, but print dashes ('-') for zero.
+ */
+enum zfs_nicenum_format {
+ ZFS_NICENUM_1024 = 0,
+ ZFS_NICENUM_BYTES = 1,
+ ZFS_NICENUM_TIME = 2,
+ ZFS_NICENUM_RAW = 3,
+ ZFS_NICENUM_RAWTIME = 4
+};
+
+/*
+ * Convert a number to a human-readable form.
+ */
+extern void zfs_nicebytes(uint64_t, char *, size_t);
+extern void zfs_nicenum(uint64_t, char *, size_t);
+extern void zfs_nicenum_format(uint64_t, char *, size_t,
+ enum zfs_nicenum_format);
+extern void zfs_nicetime(uint64_t, char *, size_t);
+
+#define nicenum(num, buf, size) zfs_nicenum(num, buf, size)
+
+extern void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *);
+extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***,
+ uint_t *);
+
+/* Part of SPL in OpenZFS */
+extern ulong_t get_system_hostid(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBZUTIL_H */
diff --git a/usr/src/lib/libzutil/common/mapfile-vers b/usr/src/lib/libzutil/common/mapfile-vers
new file mode 100644
index 0000000000..bd73d6c951
--- /dev/null
+++ b/usr/src/lib/libzutil/common/mapfile-vers
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate_1.1 {
+ global:
+ get_system_hostid;
+ zfs_isnumber;
+ zfs_nicebytes;
+ zfs_nicenum;
+ zfs_nicenum_format;
+ zfs_niceraw;
+ zfs_nicetime;
+ zpool_dump_ddt;
+ zpool_find_config;
+ zpool_history_unpack;
+ zpool_read_label;
+ zpool_search_import;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libzutil/common/zutil_import.c b/usr/src/lib/libzutil/common/zutil_import.c
new file mode 100644
index 0000000000..961247c5c0
--- /dev/null
+++ b/usr/src/lib/libzutil/common/zutil_import.c
@@ -0,0 +1,1548 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+
+/*
+ * Pool import support functions.
+ *
+ * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
+ * these commands are expected to run in the global zone, we can assume
+ * that the devices are all readable when called.
+ *
+ * To import a pool, we rely on reading the configuration information from the
+ * ZFS label of each device. If we successfully read the label, then we
+ * organize the configuration information in the following hierarchy:
+ *
+ * pool guid -> toplevel vdev guid -> label txg
+ *
+ * Duplicate entries matching this same tuple will be discarded. Once we have
+ * examined every device, we pick the best label txg config for each toplevel
+ * vdev. We then arrange these toplevel vdevs into a complete pool config, and
+ * update any paths that have changed. Finally, we attempt to import the pool
+ * using our derived config, and record the results.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <ctype.h>
+#include <devid.h>
+#include <dirent.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/vtoc.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/efi_partition.h>
+#include <sys/vdev_impl.h>
+#include <sys/fs/zfs.h>
+
+#include <thread_pool.h>
+#include <libzutil.h>
+#include <libnvpair.h>
+
+#include "zutil_import.h"
+
+#ifdef NDEBUG
+#define verify(EX) ((void)(EX))
+#else
+#define verify(EX) assert(EX)
+#endif
+
+/*PRINTFLIKE2*/
+static void
+zutil_error_aux(libpc_handle_t *hdl, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ (void) vsnprintf(hdl->lpc_desc, sizeof (hdl->lpc_desc), fmt, ap);
+ hdl->lpc_desc_active = B_TRUE;
+
+ va_end(ap);
+}
+
+static void
+zutil_verror(libpc_handle_t *hdl, const char *error, const char *fmt,
+ va_list ap)
+{
+ char action[1024];
+
+ (void) vsnprintf(action, sizeof (action), fmt, ap);
+
+ if (hdl->lpc_desc_active)
+ hdl->lpc_desc_active = B_FALSE;
+ else
+ hdl->lpc_desc[0] = '\0';
+
+ if (hdl->lpc_printerr) {
+ if (hdl->lpc_desc[0] != '\0')
+ error = hdl->lpc_desc;
+
+ (void) fprintf(stderr, "%s: %s\n", action, error);
+ }
+}
+
+/*PRINTFLIKE3*/
+static int
+zutil_error_fmt(libpc_handle_t *hdl, const char *error, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ zutil_verror(hdl, error, fmt, ap);
+
+ va_end(ap);
+
+ return (-1);
+}
+
+static int
+zutil_error(libpc_handle_t *hdl, const char *error, const char *msg)
+{
+ return (zutil_error_fmt(hdl, error, "%s", msg));
+}
+
+static int
+zutil_no_memory(libpc_handle_t *hdl)
+{
+ (void) zutil_error(hdl, EZFS_NOMEM, "internal error");
+ exit(1);
+}
+
+void *
+zutil_alloc(libpc_handle_t *hdl, size_t size)
+{
+ void *data;
+
+ if ((data = calloc(1, size)) == NULL)
+ (void) zutil_no_memory(hdl);
+
+ return (data);
+}
+
+char *
+zutil_strdup(libpc_handle_t *hdl, const char *str)
+{
+ char *ret;
+
+ if ((ret = strdup(str)) == NULL)
+ (void) zutil_no_memory(hdl);
+
+ return (ret);
+}
+
+/*
+ * Intermediate structures used to gather configuration information.
+ */
+typedef struct config_entry {
+ uint64_t ce_txg;
+ nvlist_t *ce_config;
+ struct config_entry *ce_next;
+} config_entry_t;
+
+typedef struct vdev_entry {
+ uint64_t ve_guid;
+ config_entry_t *ve_configs;
+ struct vdev_entry *ve_next;
+} vdev_entry_t;
+
+typedef struct pool_entry {
+ uint64_t pe_guid;
+ vdev_entry_t *pe_vdevs;
+ struct pool_entry *pe_next;
+} pool_entry_t;
+
+typedef struct name_entry {
+ char *ne_name;
+ uint64_t ne_guid;
+ struct name_entry *ne_next;
+} name_entry_t;
+
+typedef struct pool_list {
+ pool_entry_t *pools;
+ name_entry_t *names;
+} pool_list_t;
+
+/*
+ * Go through and fix up any path and/or devid information for the given vdev
+ * configuration.
+ */
+static int
+fix_paths(nvlist_t *nv, name_entry_t *names)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ uint64_t guid;
+ name_entry_t *ne, *best;
+ char *path, *devid;
+ int matched;
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ if (fix_paths(child[c], names) != 0)
+ return (-1);
+ return (0);
+ }
+
+ /*
+ * This is a leaf (file or disk) vdev. In either case, go through
+ * the name list and see if we find a matching guid. If so, replace
+ * the path and see if we can calculate a new devid.
+ *
+ * There may be multiple names associated with a particular guid, in
+ * which case we have overlapping slices or multiple paths to the same
+ * disk. If this is the case, then we want to pick the path that is
+ * the most similar to the original, where "most similar" is the number
+ * of matching characters starting from the end of the path. This will
+ * preserve slice numbers even if the disks have been reorganized, and
+ * will also catch preferred disk names if multiple paths exist.
+ */
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+ path = NULL;
+
+ matched = 0;
+ best = NULL;
+ for (ne = names; ne != NULL; ne = ne->ne_next) {
+ if (ne->ne_guid == guid) {
+ const char *src, *dst;
+ int count;
+
+ if (path == NULL) {
+ best = ne;
+ break;
+ }
+
+ src = ne->ne_name + strlen(ne->ne_name) - 1;
+ dst = path + strlen(path) - 1;
+ for (count = 0; src >= ne->ne_name && dst >= path;
+ src--, dst--, count++)
+ if (*src != *dst)
+ break;
+
+ /*
+ * At this point, 'count' is the number of characters
+ * matched from the end.
+ */
+ if (count > matched || best == NULL) {
+ best = ne;
+ matched = count;
+ }
+ }
+ }
+
+ if (best == NULL)
+ return (0);
+
+ if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+ return (-1);
+
+ if ((devid = devid_str_from_path(best->ne_name)) == NULL) {
+ (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+ } else {
+ if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) {
+ devid_str_free(devid);
+ return (-1);
+ }
+ devid_str_free(devid);
+ }
+
+ return (0);
+}
+
+/*
+ * Add the given configuration to the list of known devices.
+ */
+static int
+add_config(libpc_handle_t *hdl, pool_list_t *pl, const char *path,
+ int order, int num_labels, nvlist_t *config)
+{
+ uint64_t pool_guid, vdev_guid, top_guid, txg, state;
+ pool_entry_t *pe;
+ vdev_entry_t *ve;
+ config_entry_t *ce;
+ name_entry_t *ne;
+
+ /*
+ * If this is a hot spare not currently in use or level 2 cache
+ * device, add it to the list of names to translate, but don't do
+ * anything else.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+ &state) == 0 &&
+ (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+ if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ return (-1);
+
+ if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
+ free(ne);
+ return (-1);
+ }
+
+ ne->ne_guid = vdev_guid;
+ ne->ne_next = pl->names;
+ pl->names = ne;
+
+ return (0);
+ }
+
+ /*
+ * If we have a valid config but cannot read any of these fields, then
+ * it means we have a half-initialized label. In vdev_label_init()
+ * we write a label with txg == 0 so that we can identify the device
+ * in case the user refers to the same disk later on. If we fail to
+ * create the pool, we'll be left with a label in this state
+ * which should not be considered part of a valid pool.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &pool_guid) != 0 ||
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
+ &vdev_guid) != 0 ||
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+ &top_guid) != 0 ||
+ nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+ &txg) != 0 || txg == 0) {
+ return (0);
+ }
+
+ /*
+ * First, see if we know about this pool. If not, then add it to the
+ * list of known pools.
+ */
+ for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+ if (pe->pe_guid == pool_guid)
+ break;
+ }
+
+ if (pe == NULL) {
+ if ((pe = zutil_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+ return (-1);
+ }
+ pe->pe_guid = pool_guid;
+ pe->pe_next = pl->pools;
+ pl->pools = pe;
+ }
+
+ /*
+ * Second, see if we know about this toplevel vdev. Add it if its
+ * missing.
+ */
+ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+ if (ve->ve_guid == top_guid)
+ break;
+ }
+
+ if (ve == NULL) {
+ if ((ve = zutil_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+ return (-1);
+ }
+ ve->ve_guid = top_guid;
+ ve->ve_next = pe->pe_vdevs;
+ pe->pe_vdevs = ve;
+ }
+
+ /*
+ * Third, see if we have a config with a matching transaction group. If
+ * so, then we do nothing. Otherwise, add it to the list of known
+ * configs.
+ */
+ for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
+ if (ce->ce_txg == txg)
+ break;
+ }
+
+ if (ce == NULL) {
+ if ((ce = zutil_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+ return (-1);
+ }
+ ce->ce_txg = txg;
+ ce->ce_config = fnvlist_dup(config);
+ ce->ce_next = ve->ve_configs;
+ ve->ve_configs = ce;
+ }
+
+ /*
+ * At this point we've successfully added our config to the list of
+ * known configs. The last thing to do is add the vdev guid -> path
+ * mappings so that we can fix up the configuration as necessary before
+ * doing the import.
+ */
+ if ((ne = zutil_alloc(hdl, sizeof (name_entry_t))) == NULL)
+ return (-1);
+
+ if ((ne->ne_name = zutil_strdup(hdl, path)) == NULL) {
+ free(ne);
+ return (-1);
+ }
+
+ ne->ne_guid = vdev_guid;
+ ne->ne_next = pl->names;
+ pl->names = ne;
+
+ return (0);
+}
+
+/*
+ * Returns true if the named pool matches the given GUID.
+ */
+static int
+zutil_pool_active(libpc_handle_t *hdl, const char *name, uint64_t guid,
+ boolean_t *isactive)
+{
+ ASSERT(hdl->lpc_ops->pco_pool_active != NULL);
+
+ int error = hdl->lpc_ops->pco_pool_active(hdl->lpc_lib_handle, name,
+ guid, isactive);
+
+ return (error);
+}
+
+static nvlist_t *
+zutil_refresh_config(libpc_handle_t *hdl, nvlist_t *tryconfig)
+{
+ ASSERT(hdl->lpc_ops->pco_refresh_config != NULL);
+
+ return (hdl->lpc_ops->pco_refresh_config(hdl->lpc_lib_handle,
+ tryconfig));
+}
+
+/*
+ * Determine if the vdev id is a hole in the namespace.
+ */
+static boolean_t
+vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+{
+ for (int c = 0; c < holes; c++) {
+
+ /* Top-level is a hole */
+ if (hole_array[c] == id)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * Convert our list of pools into the definitive set of configurations. We
+ * start by picking the best config for each toplevel vdev. Once that's done,
+ * we assemble the toplevel vdevs into a full config for the pool. We make a
+ * pass to fix up any incorrect paths, and then add it to the main list to
+ * return to the user.
+ */
+static nvlist_t *
+get_configs(libpc_handle_t *hdl, pool_list_t *pl, boolean_t active_ok,
+ nvlist_t *policy)
+{
+ pool_entry_t *pe;
+ vdev_entry_t *ve;
+ config_entry_t *ce;
+ nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
+ nvlist_t **spares, **l2cache;
+ uint_t i, nspares, nl2cache;
+ boolean_t config_seen;
+ uint64_t best_txg;
+ char *name, *hostname = NULL;
+ uint64_t guid;
+ uint_t children = 0;
+ nvlist_t **child = NULL;
+ uint_t holes;
+ uint64_t *hole_array, max_id;
+ uint_t c;
+ boolean_t isactive;
+ uint64_t hostid;
+ nvlist_t *nvl;
+ boolean_t found_one = B_FALSE;
+ boolean_t valid_top_config = B_FALSE;
+
+ if (nvlist_alloc(&ret, 0, 0) != 0)
+ goto nomem;
+
+ for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
+ uint64_t id, max_txg = 0;
+
+ if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+ goto nomem;
+ config_seen = B_FALSE;
+
+ /*
+ * Iterate over all toplevel vdevs. Grab the pool configuration
+ * from the first one we find, and then go through the rest and
+ * add them as necessary to the 'vdevs' member of the config.
+ */
+ for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
+
+ /*
+ * Determine the best configuration for this vdev by
+ * selecting the config with the latest transaction
+ * group.
+ */
+ best_txg = 0;
+ for (ce = ve->ve_configs; ce != NULL;
+ ce = ce->ce_next) {
+
+ if (ce->ce_txg > best_txg) {
+ tmp = ce->ce_config;
+ best_txg = ce->ce_txg;
+ }
+ }
+
+ /*
+ * We rely on the fact that the max txg for the
+ * pool will contain the most up-to-date information
+ * about the valid top-levels in the vdev namespace.
+ */
+ if (best_txg > max_txg) {
+ (void) nvlist_remove(config,
+ ZPOOL_CONFIG_VDEV_CHILDREN,
+ DATA_TYPE_UINT64);
+ (void) nvlist_remove(config,
+ ZPOOL_CONFIG_HOLE_ARRAY,
+ DATA_TYPE_UINT64_ARRAY);
+
+ max_txg = best_txg;
+ hole_array = NULL;
+ holes = 0;
+ max_id = 0;
+ valid_top_config = B_FALSE;
+
+ if (nvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
+ verify(nvlist_add_uint64(config,
+ ZPOOL_CONFIG_VDEV_CHILDREN,
+ max_id) == 0);
+ valid_top_config = B_TRUE;
+ }
+
+ if (nvlist_lookup_uint64_array(tmp,
+ ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
+ &holes) == 0) {
+ verify(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_HOLE_ARRAY,
+ hole_array, holes) == 0);
+ }
+ }
+
+ if (!config_seen) {
+ /*
+ * Copy the relevant pieces of data to the pool
+ * configuration:
+ *
+ * version
+ * pool guid
+ * name
+ * comment (if available)
+ * pool state
+ * hostid (if available)
+ * hostname (if available)
+ */
+ uint64_t state, version;
+ char *comment = NULL;
+
+ version = fnvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_VERSION);
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_VERSION, version);
+ guid = fnvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_POOL_GUID);
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_POOL_GUID, guid);
+ name = fnvlist_lookup_string(tmp,
+ ZPOOL_CONFIG_POOL_NAME);
+ fnvlist_add_string(config,
+ ZPOOL_CONFIG_POOL_NAME, name);
+
+ if (nvlist_lookup_string(tmp,
+ ZPOOL_CONFIG_COMMENT, &comment) == 0)
+ fnvlist_add_string(config,
+ ZPOOL_CONFIG_COMMENT, comment);
+
+ state = fnvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_POOL_STATE);
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_POOL_STATE, state);
+
+ hostid = 0;
+ if (nvlist_lookup_uint64(tmp,
+ ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_HOSTID, hostid);
+ hostname = fnvlist_lookup_string(tmp,
+ ZPOOL_CONFIG_HOSTNAME);
+ fnvlist_add_string(config,
+ ZPOOL_CONFIG_HOSTNAME, hostname);
+ }
+
+ config_seen = B_TRUE;
+ }
+
+ /*
+ * Add this top-level vdev to the child array.
+ */
+ verify(nvlist_lookup_nvlist(tmp,
+ ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
+ verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
+ &id) == 0);
+
+ if (id >= children) {
+ nvlist_t **newchild;
+
+ newchild = zutil_alloc(hdl, (id + 1) *
+ sizeof (nvlist_t *));
+ if (newchild == NULL)
+ goto nomem;
+
+ for (c = 0; c < children; c++)
+ newchild[c] = child[c];
+
+ free(child);
+ child = newchild;
+ children = id + 1;
+ }
+ if (nvlist_dup(nvtop, &child[id], 0) != 0)
+ goto nomem;
+
+ }
+
+ /*
+ * If we have information about all the top-levels then
+ * clean up the nvlist which we've constructed. This
+ * means removing any extraneous devices that are
+ * beyond the valid range or adding devices to the end
+ * of our array which appear to be missing.
+ */
+ if (valid_top_config) {
+ if (max_id < children) {
+ for (c = max_id; c < children; c++)
+ nvlist_free(child[c]);
+ children = max_id;
+ } else if (max_id > children) {
+ nvlist_t **newchild;
+
+ newchild = zutil_alloc(hdl, (max_id) *
+ sizeof (nvlist_t *));
+ if (newchild == NULL)
+ goto nomem;
+
+ for (c = 0; c < children; c++)
+ newchild[c] = child[c];
+
+ free(child);
+ child = newchild;
+ children = max_id;
+ }
+ }
+
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &guid) == 0);
+
+ /*
+ * The vdev namespace may contain holes as a result of
+ * device removal. We must add them back into the vdev
+ * tree before we process any missing devices.
+ */
+ if (holes > 0) {
+ ASSERT(valid_top_config);
+
+ for (c = 0; c < children; c++) {
+ nvlist_t *holey;
+
+ if (child[c] != NULL ||
+ !vdev_is_hole(hole_array, holes, c))
+ continue;
+
+ if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
+ 0) != 0)
+ goto nomem;
+
+ /*
+ * Holes in the namespace are treated as
+ * "hole" top-level vdevs and have a
+ * special flag set on them.
+ */
+ if (nvlist_add_string(holey,
+ ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_HOLE) != 0 ||
+ nvlist_add_uint64(holey,
+ ZPOOL_CONFIG_ID, c) != 0 ||
+ nvlist_add_uint64(holey,
+ ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+ nvlist_free(holey);
+ goto nomem;
+ }
+ child[c] = holey;
+ }
+ }
+
+ /*
+ * Look for any missing top-level vdevs. If this is the case,
+ * create a faked up 'missing' vdev as a placeholder. We cannot
+ * simply compress the child array, because the kernel performs
+ * certain checks to make sure the vdev IDs match their location
+ * in the configuration.
+ */
+ for (c = 0; c < children; c++) {
+ if (child[c] == NULL) {
+ nvlist_t *missing;
+ if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+ 0) != 0)
+ goto nomem;
+ if (nvlist_add_string(missing,
+ ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_MISSING) != 0 ||
+ nvlist_add_uint64(missing,
+ ZPOOL_CONFIG_ID, c) != 0 ||
+ nvlist_add_uint64(missing,
+ ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+ nvlist_free(missing);
+ goto nomem;
+ }
+ child[c] = missing;
+ }
+ }
+
+ /*
+ * Put all of this pool's top-level vdevs into a root vdev.
+ */
+ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+ goto nomem;
+ if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) != 0 ||
+ nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+ nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+ nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+ child, children) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
+
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+ children = 0;
+ child = NULL;
+
+ /*
+ * Go through and fix up any paths and/or devids based on our
+ * known list of vdev GUID -> path mappings.
+ */
+ if (fix_paths(nvroot, pl->names) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
+
+ /*
+ * Add the root vdev to this pool's configuration.
+ */
+ if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ nvroot) != 0) {
+ nvlist_free(nvroot);
+ goto nomem;
+ }
+ nvlist_free(nvroot);
+
+ /*
+ * zdb uses this path to report on active pools that were
+ * imported or created using -R.
+ */
+ if (active_ok)
+ goto add_pool;
+
+ /*
+ * Determine if this pool is currently active, in which case we
+ * can't actually import it.
+ */
+ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+ &name) == 0);
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ &guid) == 0);
+
+ if (zutil_pool_active(hdl, name, guid, &isactive) != 0)
+ goto error;
+
+ if (isactive) {
+ nvlist_free(config);
+ config = NULL;
+ continue;
+ }
+
+ if (policy != NULL) {
+ if (nvlist_add_nvlist(config, ZPOOL_LOAD_POLICY,
+ policy) != 0)
+ goto nomem;
+ }
+
+ if ((nvl = zutil_refresh_config(hdl, config)) == NULL) {
+ nvlist_free(config);
+ config = NULL;
+ continue;
+ }
+
+ nvlist_free(config);
+ config = nvl;
+
+ /*
+ * Go through and update the paths for spares, now that we have
+ * them.
+ */
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+ &spares, &nspares) == 0) {
+ for (i = 0; i < nspares; i++) {
+ if (fix_paths(spares[i], pl->names) != 0)
+ goto nomem;
+ }
+ }
+
+ /*
+ * Update the paths for l2cache devices.
+ */
+ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+ &l2cache, &nl2cache) == 0) {
+ for (i = 0; i < nl2cache; i++) {
+ if (fix_paths(l2cache[i], pl->names) != 0)
+ goto nomem;
+ }
+ }
+
+ /*
+ * Restore the original information read from the actual label.
+ */
+ (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
+ DATA_TYPE_UINT64);
+ (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
+ DATA_TYPE_STRING);
+ if (hostid != 0) {
+ verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
+ hostid) == 0);
+ verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+ hostname) == 0);
+ }
+
+add_pool:
+ /*
+ * Add this pool to the list of configs.
+ */
+ verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+ &name) == 0);
+ if (nvlist_add_nvlist(ret, name, config) != 0)
+ goto nomem;
+
+ found_one = B_TRUE;
+ nvlist_free(config);
+ config = NULL;
+ }
+
+ if (!found_one) {
+ nvlist_free(ret);
+ ret = NULL;
+ }
+
+ return (ret);
+
+nomem:
+ (void) zutil_no_memory(hdl);
+error:
+ nvlist_free(config);
+ nvlist_free(ret);
+ for (c = 0; c < children; c++)
+ nvlist_free(child[c]);
+ free(child);
+
+ return (NULL);
+}
+
+/*
+ * Return the offset of the given label.
+ */
+static uint64_t
+label_offset(uint64_t size, int l)
+{
+ ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
+ return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
+ 0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
+}
+
+/*
+ * Given a file descriptor, read the label information and return an nvlist
+ * describing the configuration, if there is one. The number of valid
+ * labels found will be returned in num_labels when non-NULL.
+ */
+int
+zpool_read_label(int fd, nvlist_t **config, int *num_labels)
+{
+ struct stat64 statbuf;
+ int l, count = 0;
+ vdev_label_t *label;
+ nvlist_t *expected_config = NULL;
+ uint64_t expected_guid = 0, size;
+
+ *config = NULL;
+
+ if (fstat64(fd, &statbuf) == -1)
+ return (-1);
+ size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+ if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+ return (-1);
+
+ for (l = 0; l < VDEV_LABELS; l++) {
+ uint64_t state, guid, txg;
+
+ if (pread64(fd, label, sizeof (vdev_label_t),
+ label_offset(size, l)) != sizeof (vdev_label_t))
+ continue;
+
+ if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
+ sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
+ continue;
+
+ if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
+ &guid) != 0 || guid == 0) {
+ nvlist_free(*config);
+ continue;
+ }
+
+ if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+ &state) != 0 || state > POOL_STATE_L2CACHE) {
+ nvlist_free(*config);
+ continue;
+ }
+
+ if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+ (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+ &txg) != 0 || txg == 0)) {
+ nvlist_free(*config);
+ continue;
+ }
+
+ if (expected_guid) {
+ if (expected_guid == guid)
+ count++;
+
+ nvlist_free(*config);
+ } else {
+ expected_config = *config;
+ expected_guid = guid;
+ count++;
+ }
+ }
+
+ if (num_labels != NULL)
+ *num_labels = count;
+
+ free(label);
+ *config = expected_config;
+
+ if (count == 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+slice_cache_compare(const void *arg1, const void *arg2)
+{
+ const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
+ const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
+ char *nm1slice, *nm2slice;
+ int rv;
+
+ /*
+ * slices zero and two are the most likely to provide results,
+ * so put those first
+ */
+ nm1slice = strstr(nm1, "s0");
+ nm2slice = strstr(nm2, "s0");
+ if (nm1slice && !nm2slice) {
+ return (-1);
+ }
+ if (!nm1slice && nm2slice) {
+ return (1);
+ }
+ nm1slice = strstr(nm1, "s2");
+ nm2slice = strstr(nm2, "s2");
+ if (nm1slice && !nm2slice) {
+ return (-1);
+ }
+ if (!nm1slice && nm2slice) {
+ return (1);
+ }
+
+ rv = strcmp(nm1, nm2);
+ if (rv == 0)
+ return (0);
+ return (rv > 0 ? 1 : -1);
+}
+
+static void
+check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
+ diskaddr_t size, uint_t blksz)
+{
+ rdsk_node_t tmpnode;
+ rdsk_node_t *node;
+ char sname[MAXNAMELEN];
+
+ tmpnode.rn_name = &sname[0];
+ (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
+ diskname, partno);
+ /*
+ * protect against division by zero for disk labels that
+ * contain a bogus sector size
+ */
+ if (blksz == 0)
+ blksz = DEV_BSIZE;
+ /* too small to contain a zpool? */
+ if ((size < (SPA_MINDEVSIZE / blksz)) &&
+ (node = avl_find(r, &tmpnode, NULL)))
+ node->rn_nozpool = B_TRUE;
+}
+
+static void
+nozpool_all_slices(avl_tree_t *r, const char *sname)
+{
+ char diskname[MAXNAMELEN];
+ char *ptr;
+ int i;
+
+ (void) strncpy(diskname, sname, MAXNAMELEN);
+ if (((ptr = strrchr(diskname, 's')) == NULL) &&
+ ((ptr = strrchr(diskname, 'p')) == NULL))
+ return;
+ ptr[0] = 's';
+ ptr[1] = '\0';
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+ ptr[0] = 'p';
+ for (i = 0; i <= FD_NUMPART; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+}
+
+static void
+check_slices(avl_tree_t *r, int fd, const char *sname)
+{
+ struct extvtoc vtoc;
+ struct dk_gpt *gpt;
+ char diskname[MAXNAMELEN];
+ char *ptr;
+ int i;
+
+ (void) strncpy(diskname, sname, MAXNAMELEN);
+ if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
+ return;
+ ptr[1] = '\0';
+
+ if (read_extvtoc(fd, &vtoc) >= 0) {
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i,
+ vtoc.v_part[i].p_size, vtoc.v_sectorsz);
+ } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
+ /*
+ * on x86 we'll still have leftover links that point
+ * to slices s[9-15], so use NDKMAP instead
+ */
+ for (i = 0; i < NDKMAP; i++)
+ check_one_slice(r, diskname, i,
+ gpt->efi_parts[i].p_size, gpt->efi_lbasize);
+ /* nodes p[1-4] are never used with EFI labels */
+ ptr[0] = 'p';
+ for (i = 1; i <= FD_NUMPART; i++)
+ check_one_slice(r, diskname, i, 0, 1);
+ efi_free(gpt);
+ }
+}
+
+void
+zpool_open_func(void *arg)
+{
+ rdsk_node_t *rn = arg;
+ struct stat64 statbuf;
+ nvlist_t *config;
+ int error;
+ int num_labels = 0;
+ int fd;
+
+ if (rn->rn_nozpool)
+ return;
+ if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
+ /* symlink to a device that's no longer there */
+ if (errno == ENOENT)
+ nozpool_all_slices(rn->rn_avl, rn->rn_name);
+ return;
+ }
+ /*
+ * Ignore failed stats. We only want regular
+ * files, character devs and block devs.
+ */
+ if (fstat64(fd, &statbuf) != 0 ||
+ (!S_ISREG(statbuf.st_mode) &&
+ !S_ISCHR(statbuf.st_mode) &&
+ !S_ISBLK(statbuf.st_mode))) {
+ (void) close(fd);
+ return;
+ }
+ /* this file is too small to hold a zpool */
+ if (S_ISREG(statbuf.st_mode) &&
+ statbuf.st_size < SPA_MINDEVSIZE) {
+ (void) close(fd);
+ return;
+ } else if (!S_ISREG(statbuf.st_mode)) {
+ /*
+ * Try to read the disk label first so we don't have to
+ * open a bunch of minor nodes that can't have a zpool.
+ */
+ check_slices(rn->rn_avl, fd, rn->rn_name);
+ }
+
+ error = zpool_read_label(fd, &config, &num_labels);
+ if (error != 0) {
+ (void) close(fd);
+ return;
+ }
+
+ if (num_labels == 0) {
+ (void) close(fd);
+ nvlist_free(config);
+ return;
+ }
+
+ (void) close(fd);
+
+ rn->rn_config = config;
+ rn->rn_num_labels = num_labels;
+}
+
+/*
+ * Given a list of directories to search, find all pools stored on disk. This
+ * includes partial pools which are not available to import. If no args are
+ * given (argc is 0), then the default directory (/dev/dsk) is searched.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+static nvlist_t *
+zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg)
+{
+ int i, dirs = iarg->paths;
+ struct dirent64 *dp;
+ char path[MAXPATHLEN];
+ char *end, **dir = iarg->path;
+ size_t pathleft;
+ nvlist_t *ret = NULL;
+ static char *default_dir = ZFS_DISK_ROOT;
+ pool_list_t pools = { 0 };
+ pool_entry_t *pe, *penext;
+ vdev_entry_t *ve, *venext;
+ config_entry_t *ce, *cenext;
+ name_entry_t *ne, *nenext;
+ avl_tree_t slice_cache;
+ rdsk_node_t *slice;
+ void *cookie;
+
+ if (dirs == 0) {
+ dirs = 1;
+ dir = &default_dir;
+ }
+
+ /*
+ * Go through and read the label configuration information from every
+ * possible device, organizing the information according to pool GUID
+ * and toplevel GUID.
+ */
+ for (i = 0; i < dirs; i++) {
+ tpool_t *t;
+ char rdsk[MAXPATHLEN];
+ int dfd;
+ boolean_t config_failed = B_FALSE;
+ DIR *dirp;
+
+ /* use realpath to normalize the path */
+ if (realpath(dir[i], path) == 0) {
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
+ goto error;
+ }
+ end = &path[strlen(path)];
+ *end++ = '/';
+ *end = 0;
+ pathleft = &path[sizeof (path)] - end;
+
+ /*
+ * Using raw devices instead of block devices when we're
+ * reading the labels skips a bunch of slow operations during
+ * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
+ */
+ if (strcmp(path, ZFS_DISK_ROOTD) == 0)
+ (void) strlcpy(rdsk, ZFS_RDISK_ROOTD, sizeof (rdsk));
+ else
+ (void) strlcpy(rdsk, path, sizeof (rdsk));
+
+ if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
+ (dirp = fdopendir(dfd)) == NULL) {
+ if (dfd >= 0)
+ (void) close(dfd);
+ zutil_error_aux(hdl, strerror(errno));
+ (void) zutil_error_fmt(hdl, EZFS_BADPATH,
+ dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+ rdsk);
+ goto error;
+ }
+
+ avl_create(&slice_cache, slice_cache_compare,
+ sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
+ /*
+ * This is not MT-safe, but we have no MT consumers of libzutil
+ */
+ while ((dp = readdir64(dirp)) != NULL) {
+ const char *name = dp->d_name;
+ if (name[0] == '.' &&
+ (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+ continue;
+
+ slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
+ slice->rn_name = zutil_strdup(hdl, name);
+ slice->rn_avl = &slice_cache;
+ slice->rn_dfd = dfd;
+ slice->rn_hdl = hdl;
+ slice->rn_nozpool = B_FALSE;
+ avl_add(&slice_cache, slice);
+ }
+ /*
+ * create a thread pool to do all of this in parallel;
+ * rn_nozpool is not protected, so this is racy in that
+ * multiple tasks could decide that the same slice can
+ * not hold a zpool, which is benign. Also choose
+ * double the number of processors; we hold a lot of
+ * locks in the kernel, so going beyond this doesn't
+ * buy us much.
+ */
+ t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
+ 0, NULL);
+ for (slice = avl_first(&slice_cache); slice;
+ (slice = avl_walk(&slice_cache, slice,
+ AVL_AFTER)))
+ (void) tpool_dispatch(t, zpool_open_func, slice);
+ tpool_wait(t);
+ tpool_destroy(t);
+
+ cookie = NULL;
+ while ((slice = avl_destroy_nodes(&slice_cache,
+ &cookie)) != NULL) {
+ if (slice->rn_config != NULL && !config_failed) {
+ nvlist_t *config = slice->rn_config;
+ boolean_t matched = B_TRUE;
+
+ if (iarg->poolname != NULL) {
+ char *pname;
+
+ matched = nvlist_lookup_string(config,
+ ZPOOL_CONFIG_POOL_NAME,
+ &pname) == 0 &&
+ strcmp(iarg->poolname, pname) == 0;
+ } else if (iarg->guid != 0) {
+ uint64_t this_guid;
+
+ matched = nvlist_lookup_uint64(config,
+ ZPOOL_CONFIG_POOL_GUID,
+ &this_guid) == 0 &&
+ iarg->guid == this_guid;
+ }
+ if (matched) {
+ /*
+ * use the non-raw path for the config
+ */
+ (void) strlcpy(end, slice->rn_name,
+ pathleft);
+ (void) add_config(hdl, &pools,
+ path, slice->rn_order,
+ slice->rn_num_labels, config);
+ }
+ nvlist_free(config);
+ }
+ free(slice->rn_name);
+ free(slice);
+ }
+ avl_destroy(&slice_cache);
+
+ (void) closedir(dirp);
+
+ if (config_failed)
+ goto error;
+ }
+
+ ret = get_configs(hdl, &pools, iarg->can_be_active, iarg->policy);
+
+error:
+ for (pe = pools.pools; pe != NULL; pe = penext) {
+ penext = pe->pe_next;
+ for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+ venext = ve->ve_next;
+ for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+ cenext = ce->ce_next;
+ nvlist_free(ce->ce_config);
+ free(ce);
+ }
+ free(ve);
+ }
+ free(pe);
+ }
+
+ for (ne = pools.names; ne != NULL; ne = nenext) {
+ nenext = ne->ne_next;
+ free(ne->ne_name);
+ free(ne);
+ }
+
+ return (ret);
+}
+
+/*
+ * Given a cache file, return the contents as a list of importable pools.
+ * poolname or guid (but not both) are provided by the caller when trying
+ * to import a specific pool.
+ */
+static nvlist_t *
+zpool_find_import_cached(libpc_handle_t *hdl, const char *cachefile,
+ const char *poolname, uint64_t guid)
+{
+ char *buf;
+ int fd;
+ struct stat64 statbuf;
+ nvlist_t *raw, *src, *dst;
+ nvlist_t *pools;
+ nvpair_t *elem;
+ char *name;
+ uint64_t this_guid;
+ boolean_t active;
+
+ verify(poolname == NULL || guid == 0);
+
+ if ((fd = open(cachefile, O_RDONLY)) < 0) {
+ zutil_error_aux(hdl, "%s", strerror(errno));
+ (void) zutil_error(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "failed to open cache file"));
+ return (NULL);
+ }
+
+ if (fstat64(fd, &statbuf) != 0) {
+ zutil_error_aux(hdl, "%s", strerror(errno));
+ (void) close(fd);
+ (void) zutil_error(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
+ return (NULL);
+ }
+
+ if ((buf = zutil_alloc(hdl, statbuf.st_size)) == NULL) {
+ (void) close(fd);
+ return (NULL);
+ }
+
+ if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
+ (void) close(fd);
+ free(buf);
+ (void) zutil_error(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN,
+ "failed to read cache file contents"));
+ return (NULL);
+ }
+
+ (void) close(fd);
+
+ if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
+ free(buf);
+ (void) zutil_error(hdl, EZFS_BADCACHE,
+ dgettext(TEXT_DOMAIN,
+ "invalid or corrupt cache file contents"));
+ return (NULL);
+ }
+
+ free(buf);
+
+ /*
+ * Go through and get the current state of the pools and refresh their
+ * state.
+ */
+ if (nvlist_alloc(&pools, 0, 0) != 0) {
+ (void) zutil_no_memory(hdl);
+ nvlist_free(raw);
+ return (NULL);
+ }
+
+ elem = NULL;
+ while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
+ src = fnvpair_value_nvlist(elem);
+
+ name = fnvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME);
+ if (poolname != NULL && strcmp(poolname, name) != 0)
+ continue;
+
+ this_guid = fnvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID);
+ if (guid != 0 && guid != this_guid)
+ continue;
+
+ if (zutil_pool_active(hdl, name, this_guid, &active) != 0) {
+ nvlist_free(raw);
+ nvlist_free(pools);
+ return (NULL);
+ }
+
+ if (active)
+ continue;
+
+ if (nvlist_add_string(src, ZPOOL_CONFIG_CACHEFILE,
+ cachefile) != 0) {
+ (void) zutil_no_memory(hdl);
+ nvlist_free(raw);
+ nvlist_free(pools);
+ return (NULL);
+ }
+
+ if ((dst = zutil_refresh_config(hdl, src)) == NULL) {
+ nvlist_free(raw);
+ nvlist_free(pools);
+ return (NULL);
+ }
+
+ if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
+ (void) zutil_no_memory(hdl);
+ nvlist_free(dst);
+ nvlist_free(raw);
+ nvlist_free(pools);
+ return (NULL);
+ }
+ nvlist_free(dst);
+ }
+
+ nvlist_free(raw);
+ return (pools);
+}
+
+nvlist_t *
+zpool_search_import(void *hdl, importargs_t *import,
+ const pool_config_ops_t *pco)
+{
+ libpc_handle_t handle = { 0 };
+ nvlist_t *pools = NULL;
+
+ handle.lpc_lib_handle = hdl;
+ handle.lpc_ops = pco;
+ handle.lpc_printerr = B_TRUE;
+
+ verify(import->poolname == NULL || import->guid == 0);
+
+ if (import->cachefile != NULL)
+ pools = zpool_find_import_cached(&handle, import->cachefile,
+ import->poolname, import->guid);
+ else
+ pools = zpool_find_import_impl(&handle, import);
+
+ if ((pools == NULL || nvlist_empty(pools)) &&
+ handle.lpc_open_access_error && geteuid() != 0) {
+ (void) zutil_error(&handle, EZFS_EACESS, dgettext(TEXT_DOMAIN,
+ "no pools found"));
+ }
+
+ return (pools);
+}
+
+static boolean_t
+pool_match(nvlist_t *cfg, char *tgt)
+{
+ uint64_t v, guid = strtoull(tgt, NULL, 0);
+ char *s;
+
+ if (guid != 0) {
+ if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
+ return (v == guid);
+ } else {
+ if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
+ return (strcmp(s, tgt) == 0);
+ }
+ return (B_FALSE);
+}
+
+int
+zpool_find_config(void *hdl, const char *target, nvlist_t **configp,
+ importargs_t *args, const pool_config_ops_t *pco)
+{
+ nvlist_t *pools;
+ nvlist_t *match = NULL;
+ nvlist_t *config = NULL;
+ char *sepp = NULL;
+ int count = 0;
+ char *targetdup = strdup(target);
+
+ *configp = NULL;
+
+ if ((sepp = strpbrk(targetdup, "/@")) != NULL) {
+ *sepp = '\0';
+ }
+
+ pools = zpool_search_import(hdl, args, pco);
+
+ if (pools != NULL) {
+ nvpair_t *elem = NULL;
+ while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+ VERIFY0(nvpair_value_nvlist(elem, &config));
+ if (pool_match(config, targetdup)) {
+ count++;
+ if (match != NULL) {
+ /* multiple matches found */
+ continue;
+ } else {
+ match = config;
+ }
+ }
+ }
+ }
+
+ if (count == 0) {
+ free(targetdup);
+ return (ENOENT);
+ }
+
+ if (count > 1) {
+ free(targetdup);
+ return (EINVAL);
+ }
+
+ *configp = match;
+ free(targetdup);
+
+ return (0);
+}
diff --git a/usr/src/lib/libzutil/common/zutil_import.h b/usr/src/lib/libzutil/common/zutil_import.h
new file mode 100644
index 0000000000..a82ca38688
--- /dev/null
+++ b/usr/src/lib/libzutil/common/zutil_import.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
+ */
+#ifndef _LIBZUTIL_ZUTIL_IMPORT_H_
+#define _LIBZUTIL_ZUTIL_IMPORT_H_
+
+#define EZFS_BADCACHE "invalid or missing cache file"
+#define EZFS_BADPATH "must be an absolute path"
+#define EZFS_NOMEM "out of memory"
+#define EZFS_EACESS "some devices require root privileges"
+
+#define IMPORT_ORDER_PREFERRED_1 1
+#define IMPORT_ORDER_PREFERRED_2 2
+#define IMPORT_ORDER_SCAN_OFFSET 10
+#define IMPORT_ORDER_DEFAULT 100
+
+typedef struct libpc_handle {
+ boolean_t lpc_printerr;
+ boolean_t lpc_open_access_error;
+ boolean_t lpc_desc_active;
+ char lpc_desc[1024];
+ const pool_config_ops_t *lpc_ops;
+ void *lpc_lib_handle;
+} libpc_handle_t;
+
+
+int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path,
+ char **devid);
+int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
+ avl_tree_t **slice_cache);
+
+void * zutil_alloc(libpc_handle_t *hdl, size_t size);
+char *zutil_strdup(libpc_handle_t *hdl, const char *str);
+
+typedef struct rdsk_node {
+ char *rn_name; /* Full path to device */
+ int rn_dfd;
+ int rn_order; /* Preferred order (low to high) */
+ int rn_num_labels; /* Number of valid labels */
+ uint64_t rn_vdev_guid; /* Expected vdev guid when set */
+ libpc_handle_t *rn_hdl;
+ nvlist_t *rn_config; /* Label config */
+ avl_tree_t *rn_avl;
+ avl_node_t rn_node;
+ boolean_t rn_nozpool;
+ pthread_mutex_t *rn_lock;
+ boolean_t rn_labelpaths;
+} rdsk_node_t;
+
+void zpool_open_func(void *);
+
+#endif /* _LIBZUTIL_ZUTIL_IMPORT_H_ */
diff --git a/usr/src/lib/libzutil/common/zutil_nicenum.c b/usr/src/lib/libzutil/common/zutil_nicenum.c
new file mode 100644
index 0000000000..e11edd7d71
--- /dev/null
+++ b/usr/src/lib/libzutil/common/zutil_nicenum.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <ctype.h>
+#include <math.h>
+#include <stdio.h>
+#include <libzutil.h>
+
+/*
+ * Return B_TRUE if "str" is a number string, B_FALSE otherwise.
+ * Works for integer and floating point numbers.
+ */
+boolean_t
+zfs_isnumber(const char *str)
+{
+ for (; *str; str++)
+ if (!(isdigit(*str) || (*str == '.')))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum_format(uint64_t num, char *buf, size_t buflen,
+ enum zfs_nicenum_format format)
+{
+ uint64_t n = num;
+ int index = 0;
+ const char *u;
+ const char *units[3][7] = {
+ [ZFS_NICENUM_1024] = {"", "K", "M", "G", "T", "P", "E"},
+ [ZFS_NICENUM_BYTES] = {"B", "K", "M", "G", "T", "P", "E"},
+ [ZFS_NICENUM_TIME] = {"ns", "us", "ms", "s", "?", "?", "?"}
+ };
+
+ const int units_len[] = {[ZFS_NICENUM_1024] = 6,
+ [ZFS_NICENUM_BYTES] = 6,
+ [ZFS_NICENUM_TIME] = 4};
+
+ const int k_unit[] = { [ZFS_NICENUM_1024] = 1024,
+ [ZFS_NICENUM_BYTES] = 1024,
+ [ZFS_NICENUM_TIME] = 1000};
+
+ double val;
+
+ if (format == ZFS_NICENUM_RAW) {
+ (void) snprintf(buf, buflen, "%llu", (u_longlong_t)num);
+ return;
+ } else if (format == ZFS_NICENUM_RAWTIME && num > 0) {
+ (void) snprintf(buf, buflen, "%llu", (u_longlong_t)num);
+ return;
+ } else if (format == ZFS_NICENUM_RAWTIME && num == 0) {
+ (void) snprintf(buf, buflen, "%s", "-");
+ return;
+ }
+
+ while (n >= k_unit[format] && index < units_len[format]) {
+ n /= k_unit[format];
+ index++;
+ }
+
+ u = units[format][index];
+
+ /* Don't print zero latencies since they're invalid */
+ if ((format == ZFS_NICENUM_TIME) && (num == 0)) {
+ (void) snprintf(buf, buflen, "-");
+ } else if ((index == 0) || ((num %
+ (uint64_t)powl(k_unit[format], index)) == 0)) {
+ /*
+ * If this is an even multiple of the base, always display
+ * without any decimal precision.
+ */
+ (void) snprintf(buf, buflen, "%llu%s", (u_longlong_t)n, u);
+
+ } else {
+ /*
+ * We want to choose a precision that reflects the best choice
+ * for fitting in 5 characters. This can get rather tricky when
+ * we have numbers that are very close to an order of magnitude.
+ * For example, when displaying 10239 (which is really 9.999K),
+ * we want only a single place of precision for 10.0K. We could
+ * develop some complex heuristics for this, but it's much
+ * easier just to try each combination in turn.
+ */
+ int i;
+ for (i = 2; i >= 0; i--) {
+ val = (double)num /
+ (uint64_t)powl(k_unit[format], index);
+
+ /*
+ * Don't print floating point values for time. Note,
+ * we use floor() instead of round() here, since
+ * round can result in undesirable results. For
+ * example, if "num" is in the range of
+ * 999500-999999, it will print out "1000us". This
+ * doesn't happen if we use floor().
+ */
+ if (format == ZFS_NICENUM_TIME) {
+ if (snprintf(buf, buflen, "%d%s",
+ (unsigned int) floor(val), u) <= 5)
+ break;
+
+ } else {
+ if (snprintf(buf, buflen, "%.*f%s", i,
+ val, u) <= 5)
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+ zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_1024);
+}
+
+/*
+ * Convert a time to an appropriately human-readable output.
+ * @num: Time in nanoseconds
+ */
+void
+zfs_nicetime(uint64_t num, char *buf, size_t buflen)
+{
+ zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_TIME);
+}
+
+/*
+ * Print out a raw number with correct column spacing
+ */
+void
+zfs_niceraw(uint64_t num, char *buf, size_t buflen)
+{
+ zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_RAW);
+}
+
+/*
+ * Convert a number of bytes to an appropriately human-readable output.
+ */
+void
+zfs_nicebytes(uint64_t num, char *buf, size_t buflen)
+{
+ zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_BYTES);
+}
diff --git a/usr/src/lib/libzutil/common/zutil_pool.c b/usr/src/lib/libzutil/common/zutil_pool.c
new file mode 100644
index 0000000000..d7074bdc0b
--- /dev/null
+++ b/usr/src/lib/libzutil/common/zutil_pool.c
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/nvpair.h>
+#include <sys/fs/zfs.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include <libzutil.h>
+
+static void
+dump_ddt_stat(const ddt_stat_t *dds, int h)
+{
+ char refcnt[6];
+ char blocks[6], lsize[6], psize[6], dsize[6];
+ char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
+
+ if (dds == NULL || dds->dds_blocks == 0)
+ return;
+
+ if (h == -1)
+ (void) strcpy(refcnt, "Total");
+ else
+ zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
+
+ zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
+ zfs_nicebytes(dds->dds_lsize, lsize, sizeof (lsize));
+ zfs_nicebytes(dds->dds_psize, psize, sizeof (psize));
+ zfs_nicebytes(dds->dds_dsize, dsize, sizeof (dsize));
+ zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
+ zfs_nicebytes(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
+ zfs_nicebytes(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
+ zfs_nicebytes(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ refcnt,
+ blocks, lsize, psize, dsize,
+ ref_blocks, ref_lsize, ref_psize, ref_dsize);
+}
+
+/*
+ * Print the DDT histogram and the column totals.
+ */
+void
+zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
+{
+ int h;
+
+ (void) printf("\n");
+
+ (void) printf("bucket "
+ " allocated "
+ " referenced \n");
+ (void) printf("______ "
+ "______________________________ "
+ "______________________________\n");
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ "refcnt",
+ "blocks", "LSIZE", "PSIZE", "DSIZE",
+ "blocks", "LSIZE", "PSIZE", "DSIZE");
+
+ (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
+ "------",
+ "------", "-----", "-----", "-----",
+ "------", "-----", "-----", "-----");
+
+ for (h = 0; h < 64; h++)
+ dump_ddt_stat(&ddh->ddh_stat[h], h);
+
+ dump_ddt_stat(dds_total, -1);
+
+ (void) printf("\n");
+}
+
+/*
+ * Process the buffer of nvlists, unpacking and storing each nvlist record
+ * into 'records'. 'leftover' is set to the number of bytes that weren't
+ * processed as there wasn't a complete record.
+ */
+int
+zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
+ nvlist_t ***records, uint_t *numrecords)
+{
+ uint64_t reclen;
+ nvlist_t *nv;
+ int i;
+ void *tmp;
+
+ while (bytes_read > sizeof (reclen)) {
+
+ /* get length of packed record (stored as little endian) */
+ for (i = 0, reclen = 0; i < sizeof (reclen); i++)
+ reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
+
+ if (bytes_read < sizeof (reclen) + reclen)
+ break;
+
+ /* unpack record */
+ if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
+ return (ENOMEM);
+ bytes_read -= sizeof (reclen) + reclen;
+ buf += sizeof (reclen) + reclen;
+
+ /* add record to nvlist array */
+ (*numrecords)++;
+ if (ISP2(*numrecords + 1)) {
+ tmp = realloc(*records,
+ *numrecords * 2 * sizeof (nvlist_t *));
+ if (tmp == NULL) {
+ nvlist_free(nv);
+ (*numrecords)--;
+ return (ENOMEM);
+ }
+ *records = tmp;
+ }
+ (*records)[*numrecords - 1] = nv;
+ }
+
+ *leftover = bytes_read;
+ return (0);
+}
+
+ulong_t
+get_system_hostid(void)
+{
+ char *env;
+
+ /*
+ * Allow the hostid to be subverted for testing.
+ */
+ env = getenv("ZFS_HOSTID");
+ if (env) {
+ ulong_t hostid = strtoull(env, NULL, 16);
+ return (hostid & 0xFFFFFFFF);
+ }
+
+ return (gethostid());
+}
diff --git a/usr/src/lib/libzutil/i386/Makefile b/usr/src/lib/libzutil/i386/Makefile
new file mode 100644
index 0000000000..ada420dc55
--- /dev/null
+++ b/usr/src/lib/libzutil/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libzutil/inc.flg b/usr/src/lib/libzutil/inc.flg
new file mode 100644
index 0000000000..dfb8227b21
--- /dev/null
+++ b/usr/src/lib/libzutil/inc.flg
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+find_files "s.*" usr/src/common/zfs
+find_files "s.*" usr/src/uts/common/fs/zfs/sys
+echo_file usr/src/uts/common/sys/fs/zfs.h
diff --git a/usr/src/lib/libzutil/sparc/Makefile b/usr/src/lib/libzutil/sparc/Makefile
new file mode 100644
index 0000000000..ada420dc55
--- /dev/null
+++ b/usr/src/lib/libzutil/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libzutil/sparcv9/Makefile b/usr/src/lib/libzutil/sparcv9/Makefile
new file mode 100644
index 0000000000..e2931cd005
--- /dev/null
+++ b/usr/src/lib/libzutil/sparcv9/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+sparcv9_C_PICFLAGS= $(sparcv9_C_BIGPICFLAGS)
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/man/man1/compress.1 b/usr/src/man/man1/compress.1
index 40693a0408..e7a9f63519 100644
--- a/usr/src/man/man1/compress.1
+++ b/usr/src/man/man1/compress.1
@@ -44,12 +44,11 @@
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved
.\" Copyright (c) 2008 Sun Microsystems, Inc. All Rights Reserved
.\"
-.TH COMPRESS 1 "Mar 13, 2008"
+.TH COMPRESS 1 "Feb 5, 2020"
.SH NAME
compress, uncompress, zcat \- compress, uncompress files or display expanded
files
.SH SYNOPSIS
-.LP
.nf
\fBcompress\fR [\fB-fv/\fR] [\fB-b\fR \fIbits\fR] [\fIfile\fR]...
.fi
@@ -71,14 +70,12 @@ files
.SH DESCRIPTION
.SS "compress"
-.sp
-.LP
The \fBcompress\fR utility attempts to reduce the size of the named files by
using adaptive Lempel-Ziv coding. Except when the output is to the standard
output, each file is replaced by one with the extension \fB\&.Z\fR, while
keeping the same ownership modes, change times and modification times, ACLs,
-and extended attributes. The compress utility also attempt to set the owner and
-group of \fIfile\fR\fB\&.z\fR to the owner and group of file, but does not fail
+and extended attributes. The compress utility also attempts to set the owner and
+group of \fIfile\fR\fB\&.Z\fR to the owner and group of file, but does not fail
if this cannot be done. If appending the \fB\&.Z\fR to the file pathname would
make the pathname exceed \fB1023\fR bytes, the command fails. If no files are
specified, the standard input is compressed to the standard output.
@@ -93,8 +90,6 @@ specified during compression is encoded within the compressed file, along with
a magic number to ensure that neither decompression of random data nor
recompression of compressed data is subsequently allowed.
.SS "uncompress"
-.sp
-.LP
The \fBuncompress\fR utility restores files to their original state after they
have been compressed using the \fBcompress\fR utility. If no files are
specified, the standard input is uncompressed to the standard output.
@@ -104,14 +99,10 @@ This utility supports the uncompressing of any files produced by
\fBcompress\fR. For files produced by \fBcompress\fR on other systems,
\fBuncompress\fR supports 9- to 16-bit compression (see \fB-b\fR).
.SS "zcat"
-.sp
-.LP
The \fBzcat\fR utility writes to standard output the uncompressed form of files
that have been compressed using \fBcompress\fR. It is the equivalent of
-\fBuncompress\fR\fB-c\fR. Input files are not affected.
+\fBuncompress\fR \fB-c\fR. Input files are not affected.
.SH OPTIONS
-.sp
-.LP
The following options are supported:
.sp
.ne 2
@@ -181,8 +172,6 @@ diagnostic is written to \fBstderr\fR, and the final exit status is
.RE
.SH OPERANDS
-.sp
-.LP
The following operand is supported:
.sp
.ne 2
@@ -197,14 +186,10 @@ standard input is used.
.RE
.SH USAGE
-.sp
-.LP
See \fBlargefile\fR(5) for the description of the behavior of \fBcompress\fR,
\fBuncompress\fR, and \fBzcat\fR when encountering files greater than or equal
-to 2 Gbyte ( 2^31 bytes).
+to 2 Gbyte (2^31 bytes).
.SH ENVIRONMENT VARIABLES
-.sp
-.LP
See \fBenviron\fR(5) for descriptions of the following environment variables
that affect the execution of \fBcompress\fR, \fBuncompress\fR, and \fBzcat\fR:
\fBLANG\fR, \fBLC_ALL\fR, \fBLC_COLLATE\fR, \fBLC_CTYPE\fR, \fBLC_MESSAGES\fR,
@@ -220,8 +205,6 @@ in \fBLC_CTYPE\fR determines the locale for interpretation of sequences of
bytes of text data a characters, the behavior of character classes used in the
expression defined for the \fByesexpr\fR. See \fBlocale\fR(5).
.SH EXIT STATUS
-.sp
-.LP
The following error values are returned:
.sp
.ne 2
@@ -261,8 +244,6 @@ An error occurred.
.RE
.SH ATTRIBUTES
-.sp
-.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -281,20 +262,17 @@ Standard See \fBstandards\fR(5).
.TE
.SH SEE ALSO
-.sp
-.LP
\fBln\fR(1), \fBpack\fR(1), \fBfgetattr\fR(3C), \fBfsetattr\fR(3C),
\fBattributes\fR(5), \fBenviron\fR(5), \fBlargefile\fR(5), \fBlocale\fR(5),
\fBstandards\fR(5)
.SH DIAGNOSTICS
-.sp
.ne 2
.na
-\fBUsage: \fBcompress [-fv/] [-b\fR \fIbits\fR] [\fIfile\fR\|.\|.\|. ]\fR
+\fBUsage: \fBcompress [-fv/] [-b\fR \fImaxbits\fR\fB]\fR [\fIfile\fR\|.\|.\|. ]\fR
.ad
.br
.na
-\fB\fBcompress c [-fv] [-b\fR \fIbits\fR] [\fIfile\fR\|.\|.\|. ]\fR
+\fB\fBcompress c [-fv] [-b\fR \fImaxbits\fR\fB]\fR [\fIfile\fR]\fR
.ad
.sp .6
.RS 4n
@@ -356,7 +334,7 @@ The file is assumed to be already compressed. Rename the file and try again.
.sp
.ne 2
.na
-\fB\fIfile\fR: already exists; do you wish to overwrite (y or n)?\fR
+\fB\fIfile\fR already exists; do you wish to overwrite (yes or no)?\fR
.ad
.sp .6
.RS 4n
@@ -437,8 +415,6 @@ Extended system attributes could not be copied.
.RE
.SH NOTES
-.sp
-.LP
Although compressed files are compatible between machines with large memory,
\fB-b\fR 12 should be used for file transfer to architectures with a small
process data space (64KB or less).
diff --git a/usr/src/man/man1/pack.1 b/usr/src/man/man1/pack.1
index 08e885a3dd..5dcb68a713 100644
--- a/usr/src/man/man1/pack.1
+++ b/usr/src/man/man1/pack.1
@@ -44,11 +44,10 @@
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved
.\" Copyright (c) 1996, Sun Microsystems, Inc. All Rights Reserved
.\"
-.TH PACK 1 "Mar 13, 2008"
+.TH PACK 1 "Feb 5, 2020"
.SH NAME
pack, pcat, unpack \- compress and expand files
.SH SYNOPSIS
-.LP
.nf
\fBpack\fR [\fB-f/\fR] [\fB-\fR] \fIfile\fR...
.fi
@@ -65,8 +64,6 @@ pack, pcat, unpack \- compress and expand files
.SH DESCRIPTION
.SS "pack"
-.sp
-.LP
The \fBpack\fR command attempts to store the specified files in a compressed
form. Wherever possible (and useful), each input file \fBfile\fR is replaced by
a packed file \fBfile\fR\fB\&.z\fR with the same access modes, access and
@@ -155,10 +152,8 @@ an I/O error occurred during processing.
.sp
.LP
The last segment of the file name must be short enough to allow space for the
-appended \fB\&.z\fRextension. Directories cannot be compressed.
+appended \fB\&.z\fR extension. Directories cannot be compressed.
.SS "pcat"
-.sp
-.LP
The \fBpcat\fR command does for packed files what \fBcat\fR(1) does for
ordinary files, except that \fBpcat\fR cannot be used as a filter. The
specified files are unpacked and written to the standard output.
@@ -179,8 +174,6 @@ the file cannot be opened;
the file does not appear to be the output of \fBpack\fR.
.RE
.SS "unpack"
-.sp
-.LP
The \fBunpack\fR command expands files created by \fBpack\fR. For each
\fBfile\fR specified in the command, a search is made for a file called
\fBfile\fR\fB\&.z\fR (or just \fBfile\fR, if \fBfile\fR ends in \fB\&.z\fR). If
@@ -206,8 +199,6 @@ a file with the unpacked name already exists;
the unpacked file cannot be created.
.RE
.SH OPTIONS
-.sp
-.LP
The following options are supported by \fBpack\fR:
.sp
.ne 2
@@ -237,8 +228,6 @@ message is written to \fBstderr\fR, and the final exit status is
.RE
.SH OPERANDS
-.sp
-.LP
The following operands are supported:
.sp
.ne 2
@@ -264,13 +253,10 @@ place of \fBfile\fR causes the internal flag to be set and reset.
.RE
.SH USAGE
-.sp
-.LP
See \fBlargefile\fR(5) for the description of the behavior of \fBpack\fR,
\fBpcat\fR, and \fBunpack\fR when encountering files greater than or equal to 2
-Gbyte ( 2^31 bytes).
+Gbyte (2^31 bytes).
.SH EXAMPLES
-.LP
\fBExample 1 \fRViewing a Packed File
.sp
.LP
@@ -289,7 +275,7 @@ or just:
\fBexample%\fR \fBpcat\fR \fBfile\fR
.LP
-\fBExample 2 \fRMaking and Unpacked Copy:
+\fBExample 2 \fRMaking an Unpacked Copy:
.sp
.LP
To make an unpacked copy, say \fBnnn\fR, of a packed file named \fBfile.z\fR
@@ -300,14 +286,10 @@ To make an unpacked copy, say \fBnnn\fR, of a packed file named \fBfile.z\fR
\fBexample%\fR \fBpcat\fR \fBfile\fR \fB>nnn\fR
.SH ENVIRONMENT VARIABLES
-.sp
-.LP
See \fBenviron\fR(5) for descriptions of the following environment variables
that affect the execution of \fBpack\fR, \fBpcat\fR, and \fBunpack\fR:
\fBLC_CTYPE\fR, \fBLC_MESSAGES\fR, and \fBNLSPATH\fR.
.SH EXIT STATUS
-.sp
-.LP
The following exit values are returned:
.sp
.ne 2
@@ -330,8 +312,6 @@ returned.
.RE
.SH ATTRIBUTES
-.sp
-.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -346,7 +326,5 @@ CSI Enabled
.TE
.SH SEE ALSO
-.sp
-.LP
\fBcat\fR(1), \fBcompress\fR(1), \fBzcat\fR(1), \fBfgetattr\fR(3C),
\fBfsetattr\fR(3C), \fBattributes\fR(5), \fBenviron\fR(5), \fBlargefile\fR(5)
diff --git a/usr/src/pkg/manifests/service-file-system-nfs.mf b/usr/src/pkg/manifests/service-file-system-nfs.mf
index 7d3a408bac..c5937467c8 100644
--- a/usr/src/pkg/manifests/service-file-system-nfs.mf
+++ b/usr/src/pkg/manifests/service-file-system-nfs.mf
@@ -21,7 +21,7 @@
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2018 Nexenta Systems, Inc. All rights reserved.
#
set name=pkg.fmri value=pkg:/service/file-system/nfs@$(PKGVERS)
@@ -48,6 +48,7 @@ dir path=usr/lib/fs group=sys
dir path=usr/lib/fs/nfs group=sys
dir path=usr/lib/fs/nfs/$(ARCH64) group=sys
dir path=usr/lib/nfs group=sys
+dir path=usr/lib/nfs/dtrace group=bin
dir path=usr/lib/reparse group=bin
dir path=usr/lib/reparse/$(ARCH64) group=sys
dir path=usr/sbin
@@ -65,6 +66,8 @@ file path=lib/svc/manifest/network/nfs/server.xml group=sys mode=0444
file path=lib/svc/method/nfs-server mode=0555
file path=usr/lib/fs/nfs/$(ARCH64)/libshare_nfs.so.1
file path=usr/lib/fs/nfs/libshare_nfs.so.1
+file path=usr/lib/nfs/dtrace/nfs-time.d mode=0555
+file path=usr/lib/nfs/dtrace/nfs-trace.d mode=0555
file path=usr/lib/nfs/mountd mode=0555
file path=usr/lib/nfs/nfsd mode=0555
file path=usr/lib/nfs/nfslogd mode=0555
diff --git a/usr/src/pkg/manifests/system-file-system-zfs.mf b/usr/src/pkg/manifests/system-file-system-zfs.mf
index 3fa15b19f6..69ef355916 100644
--- a/usr/src/pkg/manifests/system-file-system-zfs.mf
+++ b/usr/src/pkg/manifests/system-file-system-zfs.mf
@@ -24,6 +24,7 @@
# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+# Copyright 2020 Joyent, Inc.
#
set name=pkg.fmri value=pkg:/system/file-system/zfs@$(PKGVERS)
@@ -77,8 +78,10 @@ file path=kernel/drv/zfs.conf group=sys
file path=kernel/kmdb/$(ARCH64)/zfs group=sys mode=0555
file path=lib/$(ARCH64)/libzfs.so.1
file path=lib/$(ARCH64)/libzfs_core.so.1
+file path=lib/$(ARCH64)/libzutil.so.1
file path=lib/libzfs.so.1
file path=lib/libzfs_core.so.1
+file path=lib/libzutil.so.1
file path=sbin/zfs mode=0555
file path=sbin/zpool mode=0555
file path=usr/lib/$(ARCH64)/libzfs_jni.so.1
@@ -122,8 +125,10 @@ link path=etc/fs/zfs/mount target=../../../sbin/zfs
link path=etc/fs/zfs/umount target=../../../sbin/zfs
link path=lib/$(ARCH64)/libzfs.so target=libzfs.so.1
link path=lib/$(ARCH64)/libzfs_core.so target=libzfs_core.so.1
+link path=lib/$(ARCH64)/libzutil.so target=libzutil.so.1
link path=lib/libzfs.so target=libzfs.so.1
link path=lib/libzfs_core.so target=libzfs_core.so.1
+link path=lib/libzutil.so target=libzutil.so.1
link path=usr/lib/$(ARCH64)/libzfs.so \
target=../../../lib/$(ARCH64)/libzfs.so.1
link path=usr/lib/$(ARCH64)/libzfs.so.1 \
@@ -134,6 +139,10 @@ link path=usr/lib/$(ARCH64)/libzfs_core.so.1 \
target=../../../lib/$(ARCH64)/libzfs_core.so.1
link path=usr/lib/$(ARCH64)/libzfs_jni.so target=libzfs_jni.so.1
link path=usr/lib/$(ARCH64)/libzpool.so target=libzpool.so.1
+link path=usr/lib/$(ARCH64)/libzutil.so \
+ target=../../../lib/$(ARCH64)/libzutil.so.1
+link path=usr/lib/$(ARCH64)/libzutil.so.1 \
+ target=../../../lib/$(ARCH64)/libzutil.so.1
link path=usr/lib/fs/zfs/mount target=../../../../sbin/zfs
link path=usr/lib/fs/zfs/umount target=../../../../sbin/zfs
link path=usr/lib/libzfs.so target=../../lib/libzfs.so.1
@@ -142,6 +151,8 @@ link path=usr/lib/libzfs_core.so target=../../lib/libzfs_core.so.1
link path=usr/lib/libzfs_core.so.1 target=../../lib/libzfs_core.so.1
link path=usr/lib/libzfs_jni.so target=libzfs_jni.so.1
$(i386_ONLY)link path=usr/lib/libzpool.so target=libzpool.so.1
+link path=usr/lib/libzutil.so target=../../lib/libzutil.so.1
+link path=usr/lib/libzutil.so.1 target=../../lib/libzutil.so.1
link path=usr/sbin/zfs target=../../sbin/zfs
link path=usr/sbin/zpool target=../../sbin/zpool
$(python3_ONLY)depend fmri=system/library/python/zfs$(PYTHON3_PKGVERS) \
diff --git a/usr/src/uts/common/dtrace/sdt_subr.c b/usr/src/uts/common/dtrace/sdt_subr.c
index a3e99544d2..a3ccaa6ff5 100644
--- a/usr/src/uts/common/dtrace/sdt_subr.c
+++ b/usr/src/uts/common/dtrace/sdt_subr.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/sdt_impl.h>
@@ -98,6 +98,14 @@ static dtrace_pattr_t iscsi_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
};
+static dtrace_pattr_t nfs_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
+};
+
static dtrace_pattr_t smb_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
@@ -124,8 +132,8 @@ sdt_provider_t sdt_providers[] = {
{ "mib", "__mib_", &stab_attr },
{ "fsinfo", "__fsinfo_", &fsinfo_attr },
{ "iscsi", "__iscsi_", &iscsi_attr },
- { "nfsv3", "__nfsv3_", &stab_attr },
- { "nfsv4", "__nfsv4_", &stab_attr },
+ { "nfsv3", "__nfsv3_", &nfs_attr },
+ { "nfsv4", "__nfsv4_", &nfs_attr },
{ "smb", "__smb_", &smb_attr },
{ "smb2", "__smb2_", &smb_attr },
{ "xpv", "__xpv_", &xpv_attr },
@@ -272,627 +280,490 @@ sdt_argdesc_t sdt_args[] = {
{ "iscsi", "xfer-done", 7, 6, "uint32_t"},
{ "iscsi", "xfer-done", 8, 7, "int"},
- { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-getattr-start", 2, 3, "GETATTR3args *" },
- { "nfsv3", "op-getattr-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-getattr-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-getattr-done", 2, 3, "GETATTR3res *" },
- { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *" },
- { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *" },
- { "nfsv3", "op-lookup-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-lookup-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-lookup-start", 2, 3, "LOOKUP3args *" },
- { "nfsv3", "op-lookup-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-lookup-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-lookup-done", 2, 3, "LOOKUP3res *" },
- { "nfsv3", "op-access-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-access-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-access-start", 2, 3, "ACCESS3args *" },
- { "nfsv3", "op-access-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-access-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-access-done", 2, 3, "ACCESS3res *" },
- { "nfsv3", "op-commit-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-commit-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-commit-start", 2, 3, "COMMIT3args *" },
- { "nfsv3", "op-commit-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-commit-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-commit-done", 2, 3, "COMMIT3res *" },
- { "nfsv3", "op-create-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-create-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-create-start", 2, 3, "CREATE3args *" },
- { "nfsv3", "op-create-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-create-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-create-done", 2, 3, "CREATE3res *" },
- { "nfsv3", "op-fsinfo-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-fsinfo-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-fsinfo-start", 2, 3, "FSINFO3args *" },
- { "nfsv3", "op-fsinfo-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-fsinfo-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-fsinfo-done", 2, 3, "FSINFO3res *" },
- { "nfsv3", "op-fsstat-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-fsstat-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-fsstat-start", 2, 3, "FSSTAT3args *" },
- { "nfsv3", "op-fsstat-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-fsstat-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-fsstat-done", 2, 3, "FSSTAT3res *" },
- { "nfsv3", "op-link-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-link-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-link-start", 2, 3, "LINK3args *" },
- { "nfsv3", "op-link-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-link-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-link-done", 2, 3, "LINK3res *" },
- { "nfsv3", "op-mkdir-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-mkdir-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-mkdir-start", 2, 3, "MKDIR3args *" },
- { "nfsv3", "op-mkdir-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-mkdir-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-mkdir-done", 2, 3, "MKDIR3res *" },
- { "nfsv3", "op-mknod-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-mknod-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-mknod-start", 2, 3, "MKNOD3args *" },
- { "nfsv3", "op-mknod-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-mknod-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-mknod-done", 2, 3, "MKNOD3res *" },
- { "nfsv3", "op-null-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-null-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-null-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-null-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-pathconf-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-pathconf-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-pathconf-start", 2, 3, "PATHCONF3args *" },
- { "nfsv3", "op-pathconf-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-pathconf-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-pathconf-done", 2, 3, "PATHCONF3res *" },
- { "nfsv3", "op-read-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-read-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-read-start", 2, 3, "READ3args *" },
- { "nfsv3", "op-read-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-read-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-read-done", 2, 3, "READ3res *" },
- { "nfsv3", "op-readdir-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readdir-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readdir-start", 2, 3, "READDIR3args *" },
- { "nfsv3", "op-readdir-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readdir-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readdir-done", 2, 3, "READDIR3res *" },
- { "nfsv3", "op-readdirplus-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readdirplus-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readdirplus-start", 2, 3, "READDIRPLUS3args *" },
- { "nfsv3", "op-readdirplus-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readdirplus-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readdirplus-done", 2, 3, "READDIRPLUS3res *" },
- { "nfsv3", "op-readlink-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readlink-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readlink-start", 2, 3, "READLINK3args *" },
- { "nfsv3", "op-readlink-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-readlink-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-readlink-done", 2, 3, "READLINK3res *" },
- { "nfsv3", "op-remove-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-remove-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-remove-start", 2, 3, "REMOVE3args *" },
- { "nfsv3", "op-remove-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-remove-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-remove-done", 2, 3, "REMOVE3res *" },
- { "nfsv3", "op-rename-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-rename-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-rename-start", 2, 3, "RENAME3args *" },
- { "nfsv3", "op-rename-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-rename-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-rename-done", 2, 3, "RENAME3res *" },
- { "nfsv3", "op-rmdir-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-rmdir-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-rmdir-start", 2, 3, "RMDIR3args *" },
- { "nfsv3", "op-rmdir-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-rmdir-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-rmdir-done", 2, 3, "RMDIR3res *" },
- { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *" },
- { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *" },
- { "nfsv3", "op-symlink-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-symlink-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-symlink-start", 2, 3, "SYMLINK3args *" },
- { "nfsv3", "op-symlink-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-symlink-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-symlink-done", 2, 3, "SYMLINK3res *" },
- { "nfsv3", "op-write-start", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-write-start", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-write-start", 2, 3, "WRITE3args *" },
- { "nfsv3", "op-write-done", 0, 0, "struct svc_req *",
- "conninfo_t *" },
- { "nfsv3", "op-write-done", 1, 1, "nfsv3oparg_t *",
- "nfsv3opinfo_t *" },
- { "nfsv3", "op-write-done", 2, 3, "WRITE3res *" },
+ /* Tables like this get really ugly when line-wrapped. */
+ /* BEGIN CSTYLED */
+ { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-getattr-start", 2, 4, "GETATTR3args *" },
+
+ { "nfsv3", "op-getattr-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-getattr-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-getattr-done", 2, 4, "GETATTR3res *" },
+
+ { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-setattr-start", 2, 4, "SETATTR3args *" },
+
+ { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-setattr-done", 2, 4, "SETATTR3res *" },
+
+ { "nfsv3", "op-lookup-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-lookup-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-lookup-start", 2, 4, "LOOKUP3args *" },
+
+ { "nfsv3", "op-lookup-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-lookup-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-lookup-done", 2, 4, "LOOKUP3res *" },
+
+ { "nfsv3", "op-access-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-access-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-access-start", 2, 4, "ACCESS3args *" },
+
+ { "nfsv3", "op-access-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-access-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-access-done", 2, 4, "ACCESS3res *" },
+
+ { "nfsv3", "op-commit-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-commit-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-commit-start", 2, 4, "COMMIT3args *" },
+
+ { "nfsv3", "op-commit-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-commit-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-commit-done", 2, 4, "COMMIT3res *" },
+
+ { "nfsv3", "op-create-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-create-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-create-start", 2, 4, "CREATE3args *" },
+
+ { "nfsv3", "op-create-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-create-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-create-done", 2, 4, "CREATE3res *" },
+
+ { "nfsv3", "op-fsinfo-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-fsinfo-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-fsinfo-start", 2, 4, "FSINFO3args *" },
+
+ { "nfsv3", "op-fsinfo-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-fsinfo-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-fsinfo-done", 2, 4, "FSINFO3res *" },
+
+ { "nfsv3", "op-fsstat-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-fsstat-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-fsstat-start", 2, 4, "FSSTAT3args *" },
+
+ { "nfsv3", "op-fsstat-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-fsstat-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-fsstat-done", 2, 4, "FSSTAT3res *" },
+
+ { "nfsv3", "op-link-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-link-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-link-start", 2, 4, "LINK3args *" },
+
+ { "nfsv3", "op-link-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-link-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-link-done", 2, 4, "LINK3res *" },
+
+ { "nfsv3", "op-mkdir-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-mkdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-mkdir-start", 2, 4, "MKDIR3args *" },
+
+ { "nfsv3", "op-mkdir-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-mkdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-mkdir-done", 2, 4, "MKDIR3res *" },
+
+ { "nfsv3", "op-mknod-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-mknod-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-mknod-start", 2, 4, "MKNOD3args *" },
+
+ { "nfsv3", "op-mknod-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-mknod-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-mknod-done", 2, 4, "MKNOD3res *" },
+
+ { "nfsv3", "op-null-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-null-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+
+ { "nfsv3", "op-null-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-null-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+
+ { "nfsv3", "op-pathconf-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-pathconf-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-pathconf-start", 2, 4, "PATHCONF3args *" },
+
+ { "nfsv3", "op-pathconf-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-pathconf-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-pathconf-done", 2, 4, "PATHCONF3res *" },
+
+ { "nfsv3", "op-read-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-read-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-read-start", 2, 4, "READ3args *" },
+
+ { "nfsv3", "op-read-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-read-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-read-done", 2, 4, "READ3res *" },
+
+ { "nfsv3", "op-readdir-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readdir-start", 2, 4, "READDIR3args *" },
+
+ { "nfsv3", "op-readdir-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readdir-done", 2, 4, "READDIR3res *" },
+
+ { "nfsv3", "op-readdirplus-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readdirplus-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readdirplus-start", 2, 4, "READDIRPLUS3args *" },
+
+ { "nfsv3", "op-readdirplus-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readdirplus-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readdirplus-done", 2, 4, "READDIRPLUS3res *" },
+
+ { "nfsv3", "op-readlink-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readlink-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readlink-start", 2, 4, "READLINK3args *" },
+
+ { "nfsv3", "op-readlink-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-readlink-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-readlink-done", 2, 4, "READLINK3res *" },
+
+ { "nfsv3", "op-remove-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-remove-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-remove-start", 2, 4, "REMOVE3args *" },
+
+ { "nfsv3", "op-remove-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-remove-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-remove-done", 2, 4, "REMOVE3res *" },
+
+ { "nfsv3", "op-rename-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-rename-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-rename-start", 2, 4, "RENAME3args *" },
+
+ { "nfsv3", "op-rename-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-rename-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-rename-done", 2, 4, "RENAME3res *" },
+
+ { "nfsv3", "op-rmdir-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-rmdir-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-rmdir-start", 2, 4, "RMDIR3args *" },
+
+ { "nfsv3", "op-rmdir-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-rmdir-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-rmdir-done", 2, 4, "RMDIR3res *" },
+
+ { "nfsv3", "op-symlink-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-symlink-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-symlink-start", 2, 4, "SYMLINK3args *" },
+
+ { "nfsv3", "op-symlink-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-symlink-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-symlink-done", 2, 4, "SYMLINK3res *" },
+
+ { "nfsv3", "op-write-start", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-write-start", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-write-start", 2, 4, "WRITE3args *" },
+
+ { "nfsv3", "op-write-done", 0, 0, "struct svc_req *", "conninfo_t *" },
+ { "nfsv3", "op-write-done", 1, 1, "nfsv3oparg_t *", "nfsv3opinfo_t *" },
+ { "nfsv3", "op-write-done", 2, 4, "WRITE3res *" },
{ "nfsv4", "null-start", 0, 0, "struct svc_req *", "conninfo_t *" },
{ "nfsv4", "null-done", 0, 0, "struct svc_req *", "conninfo_t *" },
- { "nfsv4", "compound-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "compound-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "compound-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "compound-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "compound-start", 2, 1, "COMPOUND4args *" },
- { "nfsv4", "compound-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "compound-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "compound-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "compound-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "compound-done", 2, 1, "COMPOUND4res *" },
- { "nfsv4", "op-access-start", 0, 0, "struct compound_state *",
- "conninfo_t *"},
- { "nfsv4", "op-access-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-access-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-access-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-access-start", 2, 1, "ACCESS4args *" },
- { "nfsv4", "op-access-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-access-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-access-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-access-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-access-done", 2, 1, "ACCESS4res *" },
- { "nfsv4", "op-close-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-close-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-close-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-close-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-close-start", 2, 1, "CLOSE4args *" },
- { "nfsv4", "op-close-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-close-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-close-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-close-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-close-done", 2, 1, "CLOSE4res *" },
- { "nfsv4", "op-commit-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-commit-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-commit-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-commit-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-commit-start", 2, 1, "COMMIT4args *" },
- { "nfsv4", "op-commit-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-commit-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-commit-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-commit-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-commit-done", 2, 1, "COMMIT4res *" },
- { "nfsv4", "op-create-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-create-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-create-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-create-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-create-start", 2, 1, "CREATE4args *" },
- { "nfsv4", "op-create-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-create-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-create-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-create-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-create-done", 2, 1, "CREATE4res *" },
- { "nfsv4", "op-delegpurge-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-delegpurge-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-delegpurge-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-delegpurge-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-delegpurge-start", 2, 1, "DELEGPURGE4args *" },
- { "nfsv4", "op-delegpurge-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-delegpurge-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-delegpurge-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-delegpurge-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-delegpurge-done", 2, 1, "DELEGPURGE4res *" },
- { "nfsv4", "op-delegreturn-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-delegreturn-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-delegreturn-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-delegreturn-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-delegreturn-start", 2, 1, "DELEGRETURN4args *" },
- { "nfsv4", "op-delegreturn-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-delegreturn-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-delegreturn-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-delegreturn-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-delegreturn-done", 2, 1, "DELEGRETURN4res *" },
- { "nfsv4", "op-getattr-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-getattr-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-getattr-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-getattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-getattr-start", 2, 1, "GETATTR4args *" },
- { "nfsv4", "op-getattr-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-getattr-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-getattr-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-getattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-getattr-done", 2, 1, "GETATTR4res *" },
- { "nfsv4", "op-getfh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-getfh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-getfh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-getfh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-getfh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-getfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-getfh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-getfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-getfh-done", 2, 1, "GETFH4res *" },
- { "nfsv4", "op-link-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-link-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-link-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-link-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-link-start", 2, 1, "LINK4args *" },
- { "nfsv4", "op-link-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-link-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-link-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-link-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-link-done", 2, 1, "LINK4res *" },
- { "nfsv4", "op-lock-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lock-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lock-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lock-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lock-start", 2, 1, "LOCK4args *" },
- { "nfsv4", "op-lock-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lock-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lock-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lock-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lock-done", 2, 1, "LOCK4res *" },
- { "nfsv4", "op-lockt-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lockt-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lockt-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lockt-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lockt-start", 2, 1, "LOCKT4args *" },
- { "nfsv4", "op-lockt-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lockt-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lockt-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lockt-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lockt-done", 2, 1, "LOCKT4res *" },
- { "nfsv4", "op-locku-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-locku-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-locku-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-locku-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-locku-start", 2, 1, "LOCKU4args *" },
- { "nfsv4", "op-locku-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-locku-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-locku-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-locku-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-locku-done", 2, 1, "LOCKU4res *" },
- { "nfsv4", "op-lookup-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lookup-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lookup-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lookup-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lookup-start", 2, 1, "LOOKUP4args *" },
- { "nfsv4", "op-lookup-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lookup-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lookup-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lookup-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lookup-done", 2, 1, "LOOKUP4res *" },
- { "nfsv4", "op-lookupp-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lookupp-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-lookupp-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-lookupp-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lookupp-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lookupp-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-lookupp-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-lookupp-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-lookupp-done", 2, 1, "LOOKUPP4res *" },
- { "nfsv4", "op-nverify-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-nverify-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-nverify-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-nverify-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-nverify-start", 2, 1, "NVERIFY4args *" },
- { "nfsv4", "op-nverify-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-nverify-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-nverify-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-nverify-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-nverify-done", 2, 1, "NVERIFY4res *" },
- { "nfsv4", "op-open-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-start", 2, 1, "OPEN4args *" },
- { "nfsv4", "op-open-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-done", 2, 1, "OPEN4res *" },
- { "nfsv4", "op-open-confirm-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-confirm-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-confirm-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-confirm-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-confirm-start", 2, 1, "OPEN_CONFIRM4args *" },
- { "nfsv4", "op-open-confirm-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-confirm-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-confirm-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-confirm-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-confirm-done", 2, 1, "OPEN_CONFIRM4res *" },
- { "nfsv4", "op-open-downgrade-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-downgrade-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-downgrade-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-downgrade-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-downgrade-start", 2, 1, "OPEN_DOWNGRADE4args *" },
- { "nfsv4", "op-open-downgrade-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-open-downgrade-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-open-downgrade-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-open-downgrade-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-open-downgrade-done", 2, 1, "OPEN_DOWNGRADE4res *" },
- { "nfsv4", "op-openattr-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-openattr-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-openattr-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-openattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-openattr-start", 2, 1, "OPENATTR4args *" },
- { "nfsv4", "op-openattr-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-openattr-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-openattr-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-openattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-openattr-done", 2, 1, "OPENATTR4res *" },
- { "nfsv4", "op-putfh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putfh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putfh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-putfh-start", 2, 1, "PUTFH4args *" },
- { "nfsv4", "op-putfh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putfh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putfh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-putfh-done", 2, 1, "PUTFH4res *" },
- { "nfsv4", "op-putpubfh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putpubfh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-putpubfh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putpubfh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putpubfh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putpubfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putpubfh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putpubfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-putpubfh-done", 2, 1, "PUTPUBFH4res *" },
- { "nfsv4", "op-putrootfh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putrootfh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-putrootfh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-putrootfh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putrootfh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putrootfh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-putrootfh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-putrootfh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-putrootfh-done", 2, 1, "PUTROOTFH4res *" },
- { "nfsv4", "op-read-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-read-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-read-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-read-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-read-start", 2, 1, "READ4args *" },
- { "nfsv4", "op-read-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-read-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-read-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-read-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-read-done", 2, 1, "READ4res *" },
- { "nfsv4", "op-readdir-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-readdir-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-readdir-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-readdir-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-readdir-start", 2, 1, "READDIR4args *" },
- { "nfsv4", "op-readdir-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-readdir-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-readdir-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-readdir-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-readdir-done", 2, 1, "READDIR4res *" },
- { "nfsv4", "op-readlink-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-readlink-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-readlink-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-readlink-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-readlink-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-readlink-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-readlink-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-readlink-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-readlink-done", 2, 1, "READLINK4res *" },
- { "nfsv4", "op-release-lockowner-start", 0, 0,
- "struct compound_state *", "conninfo_t *" },
- { "nfsv4", "op-release-lockowner-start", 1, 0,
- "struct compound_state *", "nfsv4opinfo_t *" },
- { "nfsv4", "op-release-lockowner-start", 2, 1,
- "RELEASE_LOCKOWNER4args *" },
- { "nfsv4", "op-release-lockowner-done", 0, 0,
- "struct compound_state *", "conninfo_t *" },
- { "nfsv4", "op-release-lockowner-done", 1, 0,
- "struct compound_state *", "nfsv4opinfo_t *" },
- { "nfsv4", "op-release-lockowner-done", 2, 1,
- "RELEASE_LOCKOWNER4res *" },
- { "nfsv4", "op-remove-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-remove-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-release-lockowner-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-release-lockowner-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+ { "nfsv4", "op-release-lockowner-start", 2, 1, "RELEASE_LOCKOWNER4args *" },
+
+ { "nfsv4", "op-release-lockowner-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-release-lockowner-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+ { "nfsv4", "op-release-lockowner-done", 2, 1, "RELEASE_LOCKOWNER4res *" },
+
+ { "nfsv4", "op-remove-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-remove-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-remove-start", 2, 1, "REMOVE4args *" },
- { "nfsv4", "op-remove-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-remove-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-remove-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-remove-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-remove-done", 2, 1, "REMOVE4res *" },
- { "nfsv4", "op-rename-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-rename-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-rename-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-rename-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-rename-start", 2, 1, "RENAME4args *" },
- { "nfsv4", "op-rename-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-rename-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-rename-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-rename-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-rename-done", 2, 1, "RENAME4res *" },
- { "nfsv4", "op-renew-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-renew-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-renew-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-renew-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-renew-start", 2, 1, "RENEW4args *" },
- { "nfsv4", "op-renew-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-renew-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-renew-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-renew-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-renew-done", 2, 1, "RENEW4res *" },
- { "nfsv4", "op-restorefh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-restorefh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-restorefh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-restorefh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-restorefh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-restorefh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-restorefh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-restorefh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-restorefh-done", 2, 1, "RESTOREFH4res *" },
- { "nfsv4", "op-savefh-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-savefh-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
- { "nfsv4", "op-savefh-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-savefh-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-savefh-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-savefh-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-savefh-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-savefh-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-savefh-done", 2, 1, "SAVEFH4res *" },
- { "nfsv4", "op-secinfo-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-secinfo-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-secinfo-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-secinfo-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-secinfo-start", 2, 1, "SECINFO4args *" },
- { "nfsv4", "op-secinfo-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-secinfo-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-secinfo-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-secinfo-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-secinfo-done", 2, 1, "SECINFO4res *" },
- { "nfsv4", "op-setattr-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-setattr-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-setattr-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setattr-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-setattr-start", 2, 1, "SETATTR4args *" },
- { "nfsv4", "op-setattr-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-setattr-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-setattr-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setattr-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-setattr-done", 2, 1, "SETATTR4res *" },
- { "nfsv4", "op-setclientid-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-setclientid-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-setclientid-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setclientid-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-setclientid-start", 2, 1, "SETCLIENTID4args *" },
- { "nfsv4", "op-setclientid-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-setclientid-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-setclientid-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setclientid-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-setclientid-done", 2, 1, "SETCLIENTID4res *" },
- { "nfsv4", "op-setclientid-confirm-start", 0, 0,
- "struct compound_state *", "conninfo_t *" },
- { "nfsv4", "op-setclientid-confirm-start", 1, 0,
- "struct compound_state *", "nfsv4opinfo_t *" },
- { "nfsv4", "op-setclientid-confirm-start", 2, 1,
- "SETCLIENTID_CONFIRM4args *" },
- { "nfsv4", "op-setclientid-confirm-done", 0, 0,
- "struct compound_state *", "conninfo_t *" },
- { "nfsv4", "op-setclientid-confirm-done", 1, 0,
- "struct compound_state *", "nfsv4opinfo_t *" },
- { "nfsv4", "op-setclientid-confirm-done", 2, 1,
- "SETCLIENTID_CONFIRM4res *" },
- { "nfsv4", "op-verify-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-verify-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-setclientid-confirm-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setclientid-confirm-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+ { "nfsv4", "op-setclientid-confirm-start", 2, 1, "SETCLIENTID_CONFIRM4args *" },
+
+ { "nfsv4", "op-setclientid-confirm-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-setclientid-confirm-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
+ { "nfsv4", "op-setclientid-confirm-done", 2, 1, "SETCLIENTID_CONFIRM4res *" },
+
+ { "nfsv4", "op-verify-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-verify-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-verify-start", 2, 1, "VERIFY4args *" },
- { "nfsv4", "op-verify-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-verify-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-verify-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-verify-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-verify-done", 2, 1, "VERIFY4res *" },
- { "nfsv4", "op-write-start", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-write-start", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-write-start", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-write-start", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-write-start", 2, 1, "WRITE4args *" },
- { "nfsv4", "op-write-done", 0, 0, "struct compound_state *",
- "conninfo_t *" },
- { "nfsv4", "op-write-done", 1, 0, "struct compound_state *",
- "nfsv4opinfo_t *" },
+
+ { "nfsv4", "op-write-done", 0, 0, "struct compound_state *", "conninfo_t *" },
+ { "nfsv4", "op-write-done", 1, 0, "struct compound_state *", "nfsv4opinfo_t *" },
{ "nfsv4", "op-write-done", 2, 1, "WRITE4res *" },
- { "nfsv4", "cb-recall-start", 0, 0, "rfs4_client_t *",
- "conninfo_t *" },
- { "nfsv4", "cb-recall-start", 1, 1, "rfs4_deleg_state_t *",
- "nfsv4cbinfo_t *" },
+
+ { "nfsv4", "cb-recall-start", 0, 0, "rfs4_client_t *", "conninfo_t *" },
+ { "nfsv4", "cb-recall-start", 1, 1, "rfs4_deleg_state_t *", "nfsv4cbinfo_t *" },
{ "nfsv4", "cb-recall-start", 2, 2, "CB_RECALL4args *" },
- { "nfsv4", "cb-recall-done", 0, 0, "rfs4_client_t *",
- "conninfo_t *" },
- { "nfsv4", "cb-recall-done", 1, 1, "rfs4_deleg_state_t *",
- "nfsv4cbinfo_t *" },
+
+ { "nfsv4", "cb-recall-done", 0, 0, "rfs4_client_t *", "conninfo_t *" },
+ { "nfsv4", "cb-recall-done", 1, 1, "rfs4_deleg_state_t *", "nfsv4cbinfo_t *" },
{ "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *" },
- /* Tables like this get really ugly when line-wrapped. */
- /* BEGIN CSTYLED */
{ "smb", "op-Close-start", 0, 0, "smb_request_t *", "conninfo_t *" },
{ "smb", "op-Close-start", 1, 0, "smb_request_t *", "smbopinfo_t *" },
{ "smb", "op-Close-done", 0, 0, "smb_request_t *", "conninfo_t *" },
diff --git a/usr/src/uts/common/fs/nfs/nfs3_srv.c b/usr/src/uts/common/fs/nfs/nfs3_srv.c
index 7f5f4611b3..89d65a4d0f 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_srv.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -28,6 +28,7 @@
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -68,13 +69,18 @@
#include <inet/ip6.h>
/*
+ * Zone global variables of NFSv3 server
+ */
+typedef struct nfs3_srv {
+ writeverf3 write3verf;
+} nfs3_srv_t;
+
+/*
* These are the interface routines for the server side of the
* Network File System. See the NFS version 3 protocol specification
* for a description of this interface.
*/
-static writeverf3 write3verf;
-
static int sattr3_to_vattr(sattr3 *, struct vattr *);
static int vattr_to_fattr3(struct vattr *, fattr3 *);
static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
@@ -86,6 +92,15 @@ extern int nfs_loaned_buffers;
u_longlong_t nfs3_srv_caller_id;
+static nfs3_srv_t *
+nfs3_get_srv(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs3_srv_t *srv = ng->nfs3_srv;
+ ASSERT(srv != NULL);
+ return (srv);
+}
+
/* ARGSUSED */
void
rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
@@ -97,8 +112,9 @@ rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object, exi);
- DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
+ DTRACE_NFSV3_5(op__getattr__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ GETATTR3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -119,8 +135,9 @@ rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
goto out;
resp->status = NFS3_OK;
- DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
+ DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ GETATTR3res *, resp);
VN_RELE(vp);
@@ -134,8 +151,9 @@ out:
} else
resp->status = puterrno3(error);
- DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
+ DTRACE_NFSV3_5(op__getattr__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ GETATTR3res *, resp);
if (vp != NULL)
VN_RELE(vp);
@@ -168,8 +186,9 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object, exi);
- DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
+ DTRACE_NFSV3_5(op__setattr__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ SETATTR3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -330,8 +349,9 @@ rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
resp->status = NFS3_OK;
vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
- DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
+ DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ SETATTR3res *, resp);
VN_RELE(vp);
@@ -344,8 +364,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
+ DTRACE_NFSV3_5(op__setattr__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ SETATTR3res *, resp);
if (vp != NULL) {
if (in_crit)
@@ -390,16 +411,19 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
* location of the public filehandle.
*/
if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
- dvp = rootdir;
+ ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
+ dvp = ZONE_ROOTVP();
VN_HOLD(dvp);
- DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
+ DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ LOOKUP3args *, args);
} else {
dvp = nfs3_fhtovp(&args->what.dir, exi);
- DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
+ DTRACE_NFSV3_5(op__lookup__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ LOOKUP3args *, args);
if (dvp == NULL) {
error = ESTALE;
@@ -421,10 +445,11 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
}
fhp = &args->what.dir;
+ ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL */
if (strcmp(args->what.name, "..") == 0 &&
EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
- (dvp->v_flag & VROOT)) {
+ ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
/*
* special case for ".." and 'nohide'exported root
*/
@@ -455,6 +480,7 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
publicfh_flag = TRUE;
exi_rele(exi);
+ exi = NULL;
error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
&exi, &sec);
@@ -538,7 +564,6 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
va.va_mask = AT_ALL;
vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
- exi_rele(exi);
VN_RELE(vp);
resp->status = NFS3_OK;
@@ -553,9 +578,11 @@ rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
if (auth_weak)
resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
- DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
+ DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ LOOKUP3res *, resp);
VN_RELE(dvp);
+ exi_rele(exi);
return;
@@ -566,12 +593,13 @@ out:
} else
resp->status = puterrno3(error);
out1:
+ DTRACE_NFSV3_5(op__lookup__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ LOOKUP3res *, resp);
+
if (exi != NULL)
exi_rele(exi);
- DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
-
if (dvp != NULL)
VN_RELE(dvp);
vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
@@ -603,8 +631,9 @@ rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object, exi);
- DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
+ DTRACE_NFSV3_5(op__access__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ ACCESS3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -714,8 +743,9 @@ rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
resp->status = NFS3_OK;
vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
- DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
+ DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ ACCESS3res *, resp);
VN_RELE(vp);
@@ -727,8 +757,9 @@ out:
resp->status = NFS3ERR_JUKEBOX;
} else
resp->status = puterrno3(error);
- DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
+ DTRACE_NFSV3_5(op__access__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ ACCESS3res *, resp);
if (vp != NULL)
VN_RELE(vp);
vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
@@ -761,8 +792,9 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->symlink, exi);
- DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
+ DTRACE_NFSV3_5(op__readlink__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READLINK3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -811,10 +843,11 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
if (is_referral) {
char *s;
size_t strsz;
+ kstat_named_t *stat = exi->exi_ne->ne_globals->svstat[NFS_V3];
/* Get an artificial symlink based on a referral */
s = build_symlink(vp, cr, &strsz);
- global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
+ stat[NFS_REFERLINKS].value.ui64++;
DTRACE_PROBE2(nfs3serv__func__referral__reflink,
vnode_t *, vp, char *, s);
if (s == NULL)
@@ -882,8 +915,9 @@ rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
resp->resok.data = name;
- DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
+ DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READLINK3res *, resp);
VN_RELE(vp);
if (name != data)
@@ -898,8 +932,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
+ DTRACE_NFSV3_5(op__readlink__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READLINK3res *, resp);
if (vp != NULL)
VN_RELE(vp);
vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
@@ -949,8 +984,10 @@ rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->file, exi);
- DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READ3args *, args);
+ DTRACE_NFSV3_5(op__read__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READ3args *, args);
+
if (vp == NULL) {
error = ESTALE;
@@ -1205,8 +1242,9 @@ doio_read:
}
done:
- DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READ3res *, resp);
+ DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READ3res *, resp);
VN_RELE(vp);
@@ -1222,8 +1260,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READ3res *, resp);
+ DTRACE_NFSV3_5(op__read__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READ3res *, resp);
if (vp != NULL) {
if (need_rwunlock)
@@ -1268,6 +1307,7 @@ void
rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
struct svc_req *req, cred_t *cr, bool_t ro)
{
+ nfs3_srv_t *ns;
int error;
vnode_t *vp;
struct vattr *bvap = NULL;
@@ -1288,14 +1328,18 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->file, exi);
- DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
+ DTRACE_NFSV3_5(op__write__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ WRITE3args *, args);
if (vp == NULL) {
error = ESTALE;
goto err;
}
+ ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
+ ns = nfs3_get_srv();
+
if (is_system_labeled()) {
bslabel_t *clabel = req->rq_label;
@@ -1383,7 +1427,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
resp->resok.count = 0;
resp->resok.committed = args->stable;
- resp->resok.verf = write3verf;
+ resp->resok.verf = ns->write3verf;
goto out;
}
@@ -1485,7 +1529,7 @@ rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
resp->resok.count = args->count - uio.uio_resid;
resp->resok.committed = args->stable;
- resp->resok.verf = write3verf;
+ resp->resok.verf = ns->write3verf;
goto out;
err:
@@ -1497,8 +1541,9 @@ err:
err1:
vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
out:
- DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
+ DTRACE_NFSV3_5(op__write__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ WRITE3res *, resp);
if (vp != NULL) {
if (rwlock_ret != -1)
@@ -1543,8 +1588,9 @@ rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
dvp = nfs3_fhtovp(&args->where.dir, exi);
- DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
+ DTRACE_NFSV3_5(op__create__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ CREATE3args *, args);
if (dvp == NULL) {
error = ESTALE;
@@ -1843,8 +1889,9 @@ tryagain:
vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
- DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
+ DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ CREATE3res *, resp);
VN_RELE(dvp);
return;
@@ -1856,8 +1903,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
+ DTRACE_NFSV3_5(op__create__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ CREATE3res *, resp);
if (name != NULL && name != args->where.name)
kmem_free(name, MAXPATHLEN + 1);
@@ -1900,8 +1948,9 @@ rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
dvp = nfs3_fhtovp(&args->where.dir, exi);
- DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
+ DTRACE_NFSV3_5(op__mkdir__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKDIR3args *, args);
if (dvp == NULL) {
error = ESTALE;
@@ -2000,8 +2049,9 @@ rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
- DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
+ DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKDIR3res *, resp);
VN_RELE(dvp);
return;
@@ -2013,8 +2063,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
+ DTRACE_NFSV3_5(op__mkdir__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKDIR3res *, resp);
if (dvp != NULL)
VN_RELE(dvp);
vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
@@ -2049,8 +2100,9 @@ rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
dvp = nfs3_fhtovp(&args->where.dir, exi);
- DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
+ DTRACE_NFSV3_5(op__symlink__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ SYMLINK3args *, args);
if (dvp == NULL) {
error = ESTALE;
@@ -2187,8 +2239,9 @@ out:
if (symdata != NULL && symdata != args->symlink.symlink_data)
kmem_free(symdata, MAXPATHLEN + 1);
- DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
+ DTRACE_NFSV3_5(op__symlink__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ SYMLINK3res *, resp);
if (dvp != NULL)
VN_RELE(dvp);
@@ -2225,8 +2278,9 @@ rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
dvp = nfs3_fhtovp(&args->where.dir, exi);
- DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
+ DTRACE_NFSV3_5(op__mknod__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKNOD3args *, args);
if (dvp == NULL) {
error = ESTALE;
@@ -2372,8 +2426,9 @@ rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
- DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
+ DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKNOD3res *, resp);
VN_RELE(dvp);
return;
@@ -2384,8 +2439,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
+ DTRACE_NFSV3_5(op__mknod__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, dvp, struct exportinfo *, exi,
+ MKNOD3res *, resp);
if (dvp != NULL)
VN_RELE(dvp);
vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
@@ -2417,8 +2473,9 @@ rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object.dir, exi);
- DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
+ DTRACE_NFSV3_5(op__remove__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ REMOVE3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -2526,8 +2583,9 @@ err:
err1:
vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
out:
- DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
+ DTRACE_NFSV3_5(op__remove__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ REMOVE3res *, resp);
if (name != NULL && name != args->object.name)
kmem_free(name, MAXPATHLEN + 1);
@@ -2561,8 +2619,9 @@ rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object.dir, exi);
- DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
+ DTRACE_NFSV3_5(op__rmdir__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ RMDIR3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -2618,7 +2677,8 @@ rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
goto err1;
}
- error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
+ ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
+ error = VOP_RMDIR(vp, name, ZONE_ROOTVP(), cr, NULL, 0);
if (name != args->object.name)
kmem_free(name, MAXPATHLEN + 1);
@@ -2656,8 +2716,9 @@ err:
err1:
vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
out:
- DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
+ DTRACE_NFSV3_5(op__rmdir__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ RMDIR3res *, resp);
if (vp != NULL)
VN_RELE(vp);
@@ -2702,8 +2763,9 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
fvp = nfs3_fhtovp(&args->from.dir, exi);
- DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
+ DTRACE_NFSV3_5(op__rename__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
+ RENAME3args *, args);
if (fvp == NULL) {
error = ESTALE;
@@ -2820,10 +2882,10 @@ rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
}
/*
- * Check for renaming over a delegated file. Check rfs4_deleg_policy
+ * Check for renaming over a delegated file. Check nfs4_deleg_policy
* first to avoid VOP_LOOKUP if possible.
*/
- if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
+ if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
NULL, NULL, NULL) == 0) {
@@ -2887,8 +2949,9 @@ out:
if (toname != NULL && toname != args->to.name)
kmem_free(toname, MAXPATHLEN + 1);
- DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
+ DTRACE_NFSV3_5(op__rename__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, fvp, struct exportinfo *, exi,
+ RENAME3res *, resp);
if (fvp != NULL)
VN_RELE(fvp);
if (tvp != NULL)
@@ -2928,8 +2991,9 @@ rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->file, exi);
- DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, LINK3args *, args);
+ DTRACE_NFSV3_5(op__link__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ LINK3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -3041,8 +3105,9 @@ rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
- DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
+ DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ LINK3res *, resp);
VN_RELE(vp);
@@ -3058,8 +3123,9 @@ out1:
if (name != NULL && name != args->link.name)
kmem_free(name, MAXPATHLEN + 1);
- DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
+ DTRACE_NFSV3_5(op__link__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ LINK3res *, resp);
if (vp != NULL)
VN_RELE(vp);
@@ -3127,8 +3193,9 @@ rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->dir, exi);
- DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
+ DTRACE_NFSV3_5(op__readdir__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIR3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -3292,8 +3359,9 @@ rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
resp->resok.count = args->count;
resp->resok.freecount = count;
- DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
+ DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIR3res *, resp);
VN_RELE(vp);
@@ -3306,8 +3374,11 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
+ vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
+
+ DTRACE_NFSV3_5(op__readdir__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIR3res *, resp);
if (vp != NULL) {
VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
@@ -3398,8 +3469,9 @@ rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
vp = nfs3_fhtovp(&args->dir, exi);
- DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
+ DTRACE_NFSV3_5(op__readdirplus__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIRPLUS3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -3681,11 +3753,9 @@ good:
resp->resok.count = args->dircount - ret;
resp->resok.maxcount = args->maxcount;
- DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
- if (ndata != data)
- kmem_free(data, args->dircount);
-
+ DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIRPLUS3res *, resp);
VN_RELE(vp);
@@ -3699,8 +3769,11 @@ out:
resp->status = puterrno3(error);
}
out1:
- DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
+ vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
+
+ DTRACE_NFSV3_5(op__readdirplus__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ READDIRPLUS3res *, resp);
if (vp != NULL) {
VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
@@ -3746,8 +3819,9 @@ rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->fsroot, exi);
- DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
+ DTRACE_NFSV3_5(op__fsstat__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ FSSTAT3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -3797,8 +3871,9 @@ rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
resp->resok.afiles = (size3)sb.f_favail;
resp->resok.invarsec = 0;
- DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
+ DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ FSSTAT3res *, resp);
VN_RELE(vp);
return;
@@ -3810,8 +3885,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
+ DTRACE_NFSV3_5(op__fsstat__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ FSSTAT3res *, resp);
if (vp != NULL)
VN_RELE(vp);
@@ -3839,8 +3915,9 @@ rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->fsroot, exi);
- DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
+ DTRACE_NFSV3_5(op__fsinfo__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ FSINFO3args *, args);
if (vp == NULL) {
if (curthread->t_flag & T_WOULDBLOCK) {
@@ -3914,16 +3991,18 @@ rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
- DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
+ DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ FSINFO3res *, resp);
VN_RELE(vp);
return;
out:
- DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
+ DTRACE_NFSV3_5(op__fsinfo__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi,
+ FSINFO3res *, resp);
if (vp != NULL)
VN_RELE(vp);
}
@@ -3949,8 +4028,9 @@ rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->object, exi);
- DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
+ DTRACE_NFSV3_5(op__pathconf__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ PATHCONF3args *, args);
if (vp == NULL) {
error = ESTALE;
@@ -4006,8 +4086,9 @@ rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
resp->resok.info.case_insensitive = FALSE;
resp->resok.info.case_preserving = TRUE;
- DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
+ DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ PATHCONF3res *, resp);
VN_RELE(vp);
return;
@@ -4018,8 +4099,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
+ DTRACE_NFSV3_5(op__pathconf__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ PATHCONF3res *, resp);
if (vp != NULL)
VN_RELE(vp);
vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
@@ -4036,6 +4118,7 @@ void
rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
struct svc_req *req, cred_t *cr, bool_t ro)
{
+ nfs3_srv_t *ns;
int error;
vnode_t *vp;
struct vattr *bvap;
@@ -4048,14 +4131,17 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
vp = nfs3_fhtovp(&args->file, exi);
- DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
+ DTRACE_NFSV3_5(op__commit__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ COMMIT3args *, args);
if (vp == NULL) {
error = ESTALE;
goto out;
}
+ ASSERT3U(curzone->zone_id, ==, exi->exi_zoneid); /* exi is non-NULL. */
+ ns = nfs3_get_srv();
bva.va_mask = AT_ALL;
error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
@@ -4108,10 +4194,11 @@ rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
resp->status = NFS3_OK;
vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
- resp->resok.verf = write3verf;
+ resp->resok.verf = ns->write3verf;
- DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
+ DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ COMMIT3res *, resp);
VN_RELE(vp);
@@ -4124,8 +4211,9 @@ out:
} else
resp->status = puterrno3(error);
out1:
- DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
+ DTRACE_NFSV3_5(op__commit__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, vp, struct exportinfo *, exi,
+ COMMIT3res *, resp);
if (vp != NULL)
VN_RELE(vp);
@@ -4203,7 +4291,7 @@ sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
return (0);
}
-static ftype3 vt_to_nf3[] = {
+static const ftype3 vt_to_nf3[] = {
0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
};
@@ -4285,20 +4373,39 @@ vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
static void
vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
{
-
vattr_to_pre_op_attr(bvap, &wccp->before);
vattr_to_post_op_attr(avap, &wccp->after);
}
+static int
+rdma_setup_read_data3(READ3args *args, READ3resok *rok)
+{
+ struct clist *wcl;
+ int wlist_len;
+ count3 count = rok->count;
+
+ wcl = args->wlist;
+ if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE)
+ return (FALSE);
+
+ wcl = args->wlist;
+ rok->wlist_len = wlist_len;
+ rok->wlist = wcl;
+ return (TRUE);
+}
+
void
-rfs3_srvrinit(void)
+rfs3_srv_zone_init(nfs_globals_t *ng)
{
+ nfs3_srv_t *ns;
struct rfs3_verf_overlay {
uint_t id; /* a "unique" identifier */
int ts; /* a unique timestamp */
} *verfp;
timestruc_t now;
+ ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
+
/*
* The following algorithm attempts to find a unique verifier
* to be used as the write verifier returned from the server
@@ -4322,37 +4429,34 @@ rfs3_srvrinit(void)
* We ASSERT that this constant logic expression is
* always true because in the past, it wasn't.
*/
- ASSERT(sizeof (*verfp) <= sizeof (write3verf));
+ ASSERT(sizeof (*verfp) <= sizeof (ns->write3verf));
#endif
gethrestime(&now);
- verfp = (struct rfs3_verf_overlay *)&write3verf;
+ verfp = (struct rfs3_verf_overlay *)&ns->write3verf;
verfp->ts = (int)now.tv_sec;
verfp->id = zone_get_hostid(NULL);
if (verfp->id == 0)
verfp->id = (uint_t)now.tv_nsec;
- nfs3_srv_caller_id = fs_new_caller_id();
-
+ ng->nfs3_srv = ns;
}
-static int
-rdma_setup_read_data3(READ3args *args, READ3resok *rok)
+void
+rfs3_srv_zone_fini(nfs_globals_t *ng)
{
- struct clist *wcl;
- int wlist_len;
- count3 count = rok->count;
+ nfs3_srv_t *ns = ng->nfs3_srv;
- wcl = args->wlist;
- if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
- return (FALSE);
- }
+ ng->nfs3_srv = NULL;
- wcl = args->wlist;
- rok->wlist_len = wlist_len;
- rok->wlist = wcl;
- return (TRUE);
+ kmem_free(ns, sizeof (*ns));
+}
+
+void
+rfs3_srvrinit(void)
+{
+ nfs3_srv_caller_id = fs_new_caller_id();
}
void
diff --git a/usr/src/uts/common/fs/nfs/nfs4_callback.c b/usr/src/uts/common/fs/nfs/nfs4_callback.c
index 729c8658fe..0d604fca70 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_callback.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_callback.c
@@ -560,13 +560,17 @@ cb_compound(CB_COMPOUND4args *args, CB_COMPOUND4res *resp, struct svc_req *req,
cs.cont = TRUE;
/*
- * Form a reply tag by copying over the reqeuest tag.
+ * Form a reply tag by copying over the request tag.
*/
resp->tag.utf8string_len = args->tag.utf8string_len;
- resp->tag.utf8string_val = kmem_alloc(resp->tag.utf8string_len,
- KM_SLEEP);
- bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
- args->tag.utf8string_len);
+ if (args->tag.utf8string_len != 0) {
+ resp->tag.utf8string_val =
+ kmem_alloc(resp->tag.utf8string_len, KM_SLEEP);
+ bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
+ args->tag.utf8string_len);
+ } else {
+ resp->tag.utf8string_val = NULL;
+ }
/*
* XXX for now, minorversion should be zero
diff --git a/usr/src/uts/common/fs/nfs/nfs4_db.c b/usr/src/uts/common/fs/nfs/nfs4_db.c
index fbecb86f64..b97567be70 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_db.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_db.c
@@ -18,10 +18,15 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
@@ -249,6 +254,50 @@ rfs4_database_destroy(rfs4_database_t *db)
kmem_free(db, sizeof (rfs4_database_t));
}
+/*
+ * Used to get the correct kmem_cache database for the state table being
+ * created.
+ * Helper function for rfs4_table_create
+ */
+static kmem_cache_t *
+get_db_mem_cache(char *name)
+{
+ int i;
+
+ for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
+ if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
+ return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
+ }
+ /*
+ * There is no associated kmem cache for this NFS4 server state
+ * table name
+ */
+ return (NULL);
+}
+
+/*
+ * Used to initialize the global NFSv4 server state database.
+ * Helper funtion for rfs4_state_g_init and called when module is loaded.
+ */
+kmem_cache_t *
+/* CSTYLED */
+nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
+{
+ kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
+ sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
+ 0,
+ rfs4_dbe_kmem_constructor,
+ rfs4_dbe_kmem_destructor,
+ NULL,
+ NULL,
+ NULL,
+ 0);
+ (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
+ strlen(cache_name) + 1);
+ rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
+ return (mem_cache);
+}
+
rfs4_table_t *
rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
@@ -304,15 +353,11 @@ rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
table->dbt_destroy = destroy;
table->dbt_expiry = expiry;
- table->dbt_mem_cache = kmem_cache_create(cache_name,
- sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
- 0,
- rfs4_dbe_kmem_constructor,
- rfs4_dbe_kmem_destructor,
- NULL,
- table,
- NULL,
- 0);
+ /*
+ * get the correct kmem_cache for this table type based on the name.
+ */
+ table->dbt_mem_cache = get_db_mem_cache(cache_name);
+
kmem_free(cache_name, len+13);
table->dbt_debug = db->db_debug_flags;
@@ -364,7 +409,7 @@ rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
if (table->dbt_id_space)
id_space_destroy(table->dbt_id_space);
- kmem_cache_destroy(table->dbt_mem_cache);
+ table->dbt_mem_cache = NULL;
kmem_free(table, sizeof (rfs4_table_t));
}
@@ -683,12 +728,14 @@ retry:
boolean_t
rfs4_cpr_callb(void *arg, int code)
{
- rfs4_table_t *table = rfs4_client_tab;
rfs4_bucket_t *buckets, *bp;
rfs4_link_t *l;
rfs4_client_t *cp;
int i;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+ rfs4_table_t *table = nsrv4->rfs4_client_tab;
+
/*
* We get called for Suspend and Resume events.
* For the suspend case we simply don't care! Nor do we care if
@@ -879,6 +926,7 @@ reaper_thread(caddr_t *arg)
table->dbt_db->db_shutdown_count--;
cv_signal(&table->dbt_db->db_shutdown_wait);
mutex_exit(table->dbt_db->db_lock);
+ zthread_exit();
}
static void
@@ -887,7 +935,7 @@ rfs4_start_reaper(rfs4_table_t *table)
if (table->dbt_max_cache_time == 0)
return;
- (void) thread_create(NULL, 0, reaper_thread, table, 0, &p0, TS_RUN,
+ (void) zthread_create(NULL, 0, reaper_thread, table, 0,
minclsyspri);
}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c
index fbff936e09..1fdfd0f601 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/systm.h>
#include <sys/sdt.h>
#include <rpc/types.h>
@@ -39,11 +43,6 @@
#define NFS4_MAX_MINOR_VERSION 0
/*
- * This is the duplicate request cache for NFSv4
- */
-rfs4_drc_t *nfs4_drc = NULL;
-
-/*
* The default size of the duplicate request cache
*/
uint32_t nfs4_drc_max = 8 * 1024;
@@ -94,12 +93,12 @@ rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size)
* Destroy a duplicate request cache.
*/
void
-rfs4_fini_drc(rfs4_drc_t *drc)
+rfs4_fini_drc(void)
{
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+ rfs4_drc_t *drc = nsrv4->nfs4_drc;
rfs4_dupreq_t *drp, *drp_next;
- ASSERT(drc);
-
/* iterate over the dr_cache and free the enties */
for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
@@ -356,25 +355,25 @@ rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup)
*
* Passed into this function are:-
*
- * disp A pointer to our dispatch table entry
- * req The request to process
- * xprt The server transport handle
- * ap A pointer to the arguments
+ * disp A pointer to our dispatch table entry
+ * req The request to process
+ * xprt The server transport handle
+ * ap A pointer to the arguments
*
*
* When appropriate this function is responsible for inserting
* the reply into the duplicate cache or replaying an existing
* cached reply.
*
- * dr_stat reflects the state of the duplicate request that
- * has been inserted into or retrieved from the cache
+ * dr_stat reflects the state of the duplicate request that
+ * has been inserted into or retrieved from the cache
*
* drp is the duplicate request entry
*
*/
int
-rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
- SVCXPRT *xprt, char *ap)
+rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, SVCXPRT *xprt,
+ char *ap)
{
COMPOUND4res res_buf;
@@ -386,6 +385,8 @@ rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
int dr_stat = NFS4_NOT_DUP;
rfs4_dupreq_t *drp = NULL;
int rv;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+ rfs4_drc_t *nfs4_drc = nsrv4->nfs4_drc;
ASSERT(disp);
@@ -544,13 +545,17 @@ rfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args)
resp = &res_buf;
/*
- * Form a reply tag by copying over the reqeuest tag.
+ * Form a reply tag by copying over the request tag.
*/
- resp->tag.utf8string_val =
- kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
resp->tag.utf8string_len = argsp->tag.utf8string_len;
- bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val,
- resp->tag.utf8string_len);
+ if (argsp->tag.utf8string_len != 0) {
+ resp->tag.utf8string_val =
+ kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
+ bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val,
+ resp->tag.utf8string_len);
+ } else {
+ resp->tag.utf8string_val = NULL;
+ }
resp->array_len = 0;
resp->array = NULL;
resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
@@ -575,11 +580,15 @@ rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp)
/*
* Form a reply tag by copying over the request tag.
*/
- rbp->tag.utf8string_val =
- kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
rbp->tag.utf8string_len = argsp->tag.utf8string_len;
- bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val,
- rbp->tag.utf8string_len);
+ if (argsp->tag.utf8string_len != 0) {
+ rbp->tag.utf8string_val =
+ kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
+ bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val,
+ rbp->tag.utf8string_len);
+ } else {
+ rbp->tag.utf8string_val = NULL;
+ }
rbp->array_len = 1;
rbp->array = kmem_zalloc(rbp->array_len * sizeof (nfs_resop4),
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv.c b/usr/src/uts/common/fs/nfs/nfs4_srv.c
index 30c45a71cd..757964eb84 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv.c
@@ -20,9 +20,7 @@
*/
/*
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
/*
@@ -30,6 +28,12 @@
* All Rights Reserved
*/
+/*
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc.
+ * Copyright 2019 Nexenta by DDN, Inc.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -66,10 +70,12 @@
#include <rpc/svc.h>
#include <nfs/nfs.h>
+#include <nfs/nfssys.h>
#include <nfs/export.h>
#include <nfs/nfs_cmd.h>
#include <nfs/lm.h>
#include <nfs/nfs4.h>
+#include <nfs/nfs4_drc.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
@@ -114,8 +120,8 @@ static int rdma_setup_read_data4(READ4args *, READ4res *);
* sizeof nfsstat4 (4 bytes) +
* sizeof verifier4 (8 bytes) +
* sizeof entry4list bool (4 bytes) +
- * sizeof entry4 (36 bytes) +
- * sizeof eof bool (4 bytes)
+ * sizeof entry4 (36 bytes) +
+ * sizeof eof bool (4 bytes)
*
* RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
* VOP_READDIR. Its value is the size of the maximum possible dirent
@@ -147,108 +153,105 @@ static int rdma_setup_read_data4(READ4args *, READ4res *);
#define DIRENT64_TO_DIRCOUNT(dp) \
(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
-time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
-static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
+static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
u_longlong_t nfs4_srv_caller_id;
uint_t nfs4_srv_vkey = 0;
-verifier4 Write4verf;
-verifier4 Readdir4verf;
-
void rfs4_init_compound_state(struct compound_state *);
static void nullfree(caddr_t);
static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_create_free(nfs_resop4 *resop);
static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_getattr_free(nfs_resop4 *);
static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_getfh_free(nfs_resop4 *);
static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void lock_denied_free(nfs_resop4 *);
static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
- struct svc_req *req, struct compound_state *cs);
+ struct svc_req *req, struct compound_state *cs);
static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_read_free(nfs_resop4 *);
static void rfs4_op_readdir_free(nfs_resop4 *resop);
static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_readlink_free(nfs_resop4 *);
static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *, struct compound_state *);
+ struct svc_req *, struct compound_state *);
static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
- struct svc_req *req, struct compound_state *);
+ struct svc_req *req, struct compound_state *);
static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
- struct compound_state *);
+ struct compound_state *);
static void rfs4_op_secinfo_free(nfs_resop4 *);
-static nfsstat4 check_open_access(uint32_t,
- struct compound_state *, struct svc_req *);
-nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
-void rfs4_ss_clid(rfs4_client_t *);
+static nfsstat4 check_open_access(uint32_t, struct compound_state *,
+ struct svc_req *);
+nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
+void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
+
/*
* translation table for attrs
@@ -262,19 +265,17 @@ struct nfs4_ntov_table {
static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
- struct nfs4_svgetit_arg *sargp);
+ struct nfs4_svgetit_arg *sargp);
static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
+static void hanfsv4_failover(nfs4_srv_t *);
+
fem_t *deleg_rdops;
fem_t *deleg_wrops;
-rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
-kmutex_t rfs4_servinst_lock; /* protects linked list */
-int rfs4_seen_first_compound; /* set first time we see one */
-
/*
* NFS4 op dispatch table
*/
@@ -466,7 +467,7 @@ static char *rfs4_op_string[] = {
};
#endif
-void rfs4_ss_chkclid(rfs4_client_t *);
+void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
@@ -499,13 +500,22 @@ static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
NULL, NULL
};
-int
-rfs4_srvrinit(void)
+nfs4_srv_t *
+nfs4_get_srv(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs4_srv_t *srv = ng->nfs4_srv;
+ ASSERT(srv != NULL);
+ return (srv);
+}
+
+void
+rfs4_srv_zone_init(nfs_globals_t *ng)
{
+ nfs4_srv_t *nsrv4;
timespec32_t verf;
- int error;
- extern void rfs4_attr_init();
- extern krwlock_t rfs4_deleg_policy_lock;
+
+ nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
/*
* The following algorithm attempts to find a unique verifier
@@ -535,61 +545,113 @@ rfs4_srvrinit(void)
verf.tv_sec = (time_t)tverf.tv_sec;
verf.tv_nsec = tverf.tv_nsec;
}
+ nsrv4->write4verf = *(uint64_t *)&verf;
+
+ /* Used to manage create/destroy of server state */
+ nsrv4->nfs4_server_state = NULL;
+ nsrv4->nfs4_cur_servinst = NULL;
+ nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
+ mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
+
+ ng->nfs4_srv = nsrv4;
+}
- Write4verf = *(uint64_t *)&verf;
+void
+rfs4_srv_zone_fini(nfs_globals_t *ng)
+{
+ nfs4_srv_t *nsrv4 = ng->nfs4_srv;
- rfs4_attr_init();
- mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
+ ng->nfs4_srv = NULL;
- /* Used to manage create/destroy of server state */
- mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_destroy(&nsrv4->deleg_lock);
+ mutex_destroy(&nsrv4->state_lock);
+ mutex_destroy(&nsrv4->servinst_lock);
+ rw_destroy(&nsrv4->deleg_policy_lock);
- /* Used to manage access to server instance linked list */
- mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
+ kmem_free(nsrv4, sizeof (*nsrv4));
+}
- /* Used to manage access to rfs4_deleg_policy */
- rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
+void
+rfs4_srvrinit(void)
+{
+ extern void rfs4_attr_init();
- error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
- if (error != 0) {
+ rfs4_attr_init();
+
+ if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
rfs4_disable_delegation();
- } else {
- error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
- &deleg_wrops);
- if (error != 0) {
- rfs4_disable_delegation();
- fem_free(deleg_rdops);
- }
+ } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
+ &deleg_wrops) != 0) {
+ rfs4_disable_delegation();
+ fem_free(deleg_rdops);
}
nfs4_srv_caller_id = fs_new_caller_id();
-
lockt_sysid = lm_alloc_sysidt();
-
vsd_create(&nfs4_srv_vkey, NULL);
-
- return (0);
+ rfs4_state_g_init();
}
void
rfs4_srvrfini(void)
{
- extern krwlock_t rfs4_deleg_policy_lock;
-
if (lockt_sysid != LM_NOSYSID) {
lm_free_sysidt(lockt_sysid);
lockt_sysid = LM_NOSYSID;
}
- mutex_destroy(&rfs4_deleg_lock);
- mutex_destroy(&rfs4_state_lock);
- rw_destroy(&rfs4_deleg_policy_lock);
+ rfs4_state_g_fini();
fem_free(deleg_rdops);
fem_free(deleg_wrops);
}
void
+rfs4_do_server_start(int server_upordown,
+ int srv_delegation, int cluster_booted)
+{
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+
+ /* Is this a warm start? */
+ if (server_upordown == NFS_SERVER_QUIESCED) {
+ cmn_err(CE_NOTE, "nfs4_srv: "
+ "server was previously quiesced; "
+ "existing NFSv4 state will be re-used");
+
+ /*
+ * HA-NFSv4: this is also the signal
+ * that a Resource Group failover has
+ * occurred.
+ */
+ if (cluster_booted)
+ hanfsv4_failover(nsrv4);
+ } else {
+ /* Cold start */
+ nsrv4->rfs4_start_time = 0;
+ rfs4_state_zone_init(nsrv4);
+ nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
+ nfs4_drc_hash);
+
+ /*
+ * The nfsd service was started with the -s option
+ * we need to pull in any state from the paths indicated.
+ */
+ if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
+ /* read in the stable storage state from these paths */
+ rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
+ rfs4_dss_newpaths);
+ }
+ }
+
+ /* Check if delegation is to be enabled */
+ if (srv_delegation != FALSE)
+ rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
+}
+
+void
rfs4_init_compound_state(struct compound_state *cs)
{
bzero(cs, sizeof (*cs));
@@ -652,34 +714,35 @@ rfs4_clnt_in_grace(rfs4_client_t *cp)
* reset all currently active grace periods
*/
void
-rfs4_grace_reset_all(void)
+rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
{
rfs4_servinst_t *sip;
- mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
+ mutex_enter(&nsrv4->servinst_lock);
+ for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
if (rfs4_servinst_in_grace(sip))
rfs4_grace_start(sip);
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
/*
* start any new instances' grace periods
*/
void
-rfs4_grace_start_new(void)
+rfs4_grace_start_new(nfs4_srv_t *nsrv4)
{
rfs4_servinst_t *sip;
- mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
+ mutex_enter(&nsrv4->servinst_lock);
+ for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
if (rfs4_servinst_grace_new(sip))
rfs4_grace_start(sip);
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
static rfs4_dss_path_t *
-rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
+rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
+ char *path, unsigned index)
{
size_t len;
rfs4_dss_path_t *dss_path;
@@ -703,15 +766,15 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
* Add to list of served paths.
* No locking required, as we're only ever called at startup.
*/
- if (rfs4_dss_pathlist == NULL) {
+ if (nsrv4->dss_pathlist == NULL) {
/* this is the first dss_path_t */
/* needed for insque/remque */
dss_path->next = dss_path->prev = dss_path;
- rfs4_dss_pathlist = dss_path;
+ nsrv4->dss_pathlist = dss_path;
} else {
- insque(dss_path, rfs4_dss_pathlist);
+ insque(dss_path, nsrv4->dss_pathlist);
}
return (dss_path);
@@ -723,7 +786,8 @@ rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
* recovery window.
*/
void
-rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
+rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
+ int dss_npaths, char **dss_paths)
{
unsigned i;
rfs4_servinst_t *sip;
@@ -754,21 +818,22 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
sizeof (rfs4_dss_path_t *), KM_SLEEP);
for (i = 0; i < dss_npaths; i++) {
- sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
+ sip->dss_paths[i] =
+ rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
}
- mutex_enter(&rfs4_servinst_lock);
- if (rfs4_cur_servinst != NULL) {
+ mutex_enter(&nsrv4->servinst_lock);
+ if (nsrv4->nfs4_cur_servinst != NULL) {
/* add to linked list */
- sip->prev = rfs4_cur_servinst;
- rfs4_cur_servinst->next = sip;
+ sip->prev = nsrv4->nfs4_cur_servinst;
+ nsrv4->nfs4_cur_servinst->next = sip;
}
if (start_grace)
rfs4_grace_start(sip);
/* make the new instance "current" */
- rfs4_cur_servinst = sip;
+ nsrv4->nfs4_cur_servinst = sip;
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
/*
@@ -776,31 +841,47 @@ rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
* all instances directly.
*/
void
-rfs4_servinst_destroy_all(void)
+rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
{
rfs4_servinst_t *sip, *prev, *current;
#ifdef DEBUG
int n = 0;
#endif
- mutex_enter(&rfs4_servinst_lock);
- ASSERT(rfs4_cur_servinst != NULL);
- current = rfs4_cur_servinst;
- rfs4_cur_servinst = NULL;
+ mutex_enter(&nsrv4->servinst_lock);
+ ASSERT(nsrv4->nfs4_cur_servinst != NULL);
+ current = nsrv4->nfs4_cur_servinst;
+ nsrv4->nfs4_cur_servinst = NULL;
for (sip = current; sip != NULL; sip = prev) {
prev = sip->prev;
rw_destroy(&sip->rwlock);
if (sip->oldstate)
kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
- if (sip->dss_paths)
+ if (sip->dss_paths) {
+ int i = sip->dss_npaths;
+
+ while (i > 0) {
+ i--;
+ if (sip->dss_paths[i] != NULL) {
+ char *path = sip->dss_paths[i]->path;
+
+ if (path != NULL) {
+ kmem_free(path,
+ strlen(path) + 1);
+ }
+ kmem_free(sip->dss_paths[i],
+ sizeof (rfs4_dss_path_t));
+ }
+ }
kmem_free(sip->dss_paths,
sip->dss_npaths * sizeof (rfs4_dss_path_t *));
+ }
kmem_free(sip, sizeof (rfs4_servinst_t));
#ifdef DEBUG
n++;
#endif
}
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
/*
@@ -808,7 +889,8 @@ rfs4_servinst_destroy_all(void)
* Should be called with cp->rc_dbe held.
*/
void
-rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
+rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
+ rfs4_servinst_t *sip)
{
ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
@@ -816,9 +898,9 @@ rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
* The lock ensures that if the current instance is in the process
* of changing, we will see the new one.
*/
- mutex_enter(&rfs4_servinst_lock);
+ mutex_enter(&nsrv4->servinst_lock);
cp->rc_server_instance = sip;
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
rfs4_servinst_t *
@@ -871,7 +953,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
{
int error, different_export = 0;
vnode_t *dvp, *vp;
- struct exportinfo *exi = NULL;
+ struct exportinfo *exi;
fid_t fid;
uint_t count, i;
secinfo4 *resok_val;
@@ -879,8 +961,11 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
seconfig_t *si;
bool_t did_traverse = FALSE;
int dotdot, walk;
+ nfs_export_t *ne = nfs_get_export();
dvp = cs->vp;
+ exi = cs->exi;
+ ASSERT(exi != NULL);
dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
/*
@@ -888,25 +973,27 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
* root of a filesystem, or above an export point.
*/
if (dotdot) {
+ vnode_t *zone_rootvp = ne->exi_root->exi_vp;
+ ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
/*
* If dotdotting at the root of a filesystem, then
* need to traverse back to the mounted-on filesystem
* and do the dotdot lookup there.
*/
- if (cs->vp->v_flag & VROOT) {
+ if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
/*
* If at the system root, then can
* go up no further.
*/
- if (VN_CMP(dvp, rootdir))
+ if (VN_CMP(dvp, zone_rootvp))
return (puterrno4(ENOENT));
/*
* Traverse back to the mounted-on filesystem
*/
- dvp = untraverse(cs->vp);
+ dvp = untraverse(dvp, zone_rootvp);
/*
* Set the different_export flag so we remember
@@ -920,7 +1007,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
* If dotdotting above an export point then set
* the different_export to get new export info.
*/
- different_export = nfs_exported(cs->exi, cs->vp);
+ different_export = nfs_exported(exi, dvp);
}
}
@@ -939,9 +1026,9 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
* check whether this vnode is visible.
*/
if (!different_export &&
- (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
+ (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
cs->access & CS_ACCESS_LIMITED)) {
- if (! nfs_visible(cs->exi, vp, &different_export)) {
+ if (! nfs_visible(exi, vp, &different_export)) {
VN_RELE(vp);
return (puterrno4(ENOENT));
}
@@ -983,6 +1070,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
return (puterrno4(error));
}
+ /* We'll need to reassign "exi". */
if (dotdot)
exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
else
@@ -1003,8 +1091,6 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
return (puterrno4(EACCES));
}
}
- } else {
- exi = cs->exi;
}
ASSERT(exi != NULL);
@@ -1017,7 +1103,7 @@ do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
* For a real export node, return the flavor that the client
* has access with.
*/
- ASSERT(RW_LOCK_HELD(&exported_lock));
+ ASSERT(RW_LOCK_HELD(&ne->exported_lock));
if (PSEUDO(exi)) {
count = exi->exi_export.ex_seccnt; /* total sec count */
resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
@@ -1380,6 +1466,7 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
vnode_t *vp = cs->vp;
cred_t *cr = cs->cr;
vattr_t va;
+ nfs4_srv_t *nsrv4;
DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
COMMIT4args *, args);
@@ -1436,8 +1523,9 @@ rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
+ nsrv4 = nfs4_get_srv();
*cs->statusp = resp->status = NFS4_OK;
- resp->writeverf = Write4verf;
+ resp->writeverf = nsrv4->write4verf;
out:
DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
COMMIT4res *, resp);
@@ -2633,25 +2721,28 @@ do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
* export point.
*/
if (dotdot) {
+ vnode_t *zone_rootvp;
+ ASSERT(cs->exi != NULL);
+ zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
/*
* If dotdotting at the root of a filesystem, then
* need to traverse back to the mounted-on filesystem
* and do the dotdot lookup there.
*/
- if (cs->vp->v_flag & VROOT) {
+ if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
/*
* If at the system root, then can
* go up no further.
*/
- if (VN_CMP(cs->vp, rootdir))
+ if (VN_CMP(cs->vp, zone_rootvp))
return (puterrno4(ENOENT));
/*
* Traverse back to the mounted-on filesystem
*/
- cs->vp = untraverse(cs->vp);
+ cs->vp = untraverse(cs->vp, zone_rootvp);
/*
* Set the different_export flag so we remember
@@ -3409,6 +3500,7 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
vnode_t *vp;
struct exportinfo *exi, *sav_exi;
nfs_fh4_fmt_t *fh_fmtp;
+ nfs_export_t *ne = nfs_get_export();
DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
@@ -3422,19 +3514,19 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
cs->cr = crdup(cs->basecr);
- vp = exi_public->exi_vp;
+ vp = ne->exi_public->exi_vp;
if (vp == NULL) {
*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
goto out;
}
- error = makefh4(&cs->fh, vp, exi_public);
+ error = makefh4(&cs->fh, vp, ne->exi_public);
if (error != 0) {
*cs->statusp = resp->status = puterrno4(error);
goto out;
}
sav_exi = cs->exi;
- if (exi_public == exi_root) {
+ if (ne->exi_public == ne->exi_root) {
/*
* No filesystem is actually shared public, so we default
* to exi_root. In this case, we must check whether root
@@ -3449,12 +3541,12 @@ rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
*/
exi = checkexport4(&fh_fmtp->fh4_fsid,
(fid_t *)&fh_fmtp->fh4_xlen, NULL);
- cs->exi = ((exi != NULL) ? exi : exi_public);
+ cs->exi = ((exi != NULL) ? exi : ne->exi_public);
} else {
/*
* it's a properly shared filesystem
*/
- cs->exi = exi_public;
+ cs->exi = ne->exi_public;
}
if (is_system_labeled()) {
@@ -3596,7 +3688,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
*/
bzero(&fid, sizeof (fid));
fid.fid_len = MAXFIDSZ;
- error = vop_fid_pseudo(rootdir, &fid);
+ error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
if (error != 0) {
*cs->statusp = resp->status = puterrno4(error);
goto out;
@@ -3610,7 +3702,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
* one or more exports further down in the server's
* file tree.
*/
- exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
+ exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
NFS4_DEBUG(rfs4_debug,
(CE_WARN, "rfs4_op_putrootfh: export check failure"));
@@ -3622,7 +3714,7 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
* Now make a filehandle based on the root
* export and root vnode.
*/
- error = makefh4(&cs->fh, rootdir, exi);
+ error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
if (error != 0) {
*cs->statusp = resp->status = puterrno4(error);
goto out;
@@ -3631,11 +3723,11 @@ rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
sav_exi = cs->exi;
cs->exi = exi;
- VN_HOLD(rootdir);
- cs->vp = rootdir;
+ VN_HOLD(ZONE_ROOTVP());
+ cs->vp = ZONE_ROOTVP();
if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
- VN_RELE(rootdir);
+ VN_RELE(cs->vp);
cs->vp = NULL;
cs->exi = sav_exi;
goto out;
@@ -3721,10 +3813,12 @@ rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
if (is_referral) {
char *s;
size_t strsz;
+ kstat_named_t *stat =
+ cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
/* Get an artificial symlink based on a referral */
s = build_symlink(vp, cs->cr, &strsz);
- global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
+ stat[NFS_REFERLINKS].value.ui64++;
DTRACE_PROBE2(nfs4serv__func__referral__reflink,
vnode_t *, vp, char *, s);
if (s == NULL)
@@ -4171,7 +4265,7 @@ rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
* NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
* transmit over the wire.
*/
- if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
+ if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
NULL, 0)) == EEXIST)
error = ENOTEMPTY;
}
@@ -4283,7 +4377,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
int error;
vnode_t *odvp;
vnode_t *ndvp;
- vnode_t *srcvp, *targvp;
+ vnode_t *srcvp, *targvp, *tvp;
struct vattr obdva, oidva, oadva;
struct vattr nbdva, nidva, nadva;
char *onm, *nnm;
@@ -4291,6 +4385,7 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
rfs4_file_t *fp, *sfp;
int in_crit_src, in_crit_targ;
int fp_rele_grant_hold, sfp_rele_grant_hold;
+ int unlinked;
bslabel_t *clabel;
struct sockaddr *ca;
char *converted_onm = NULL;
@@ -4301,9 +4396,10 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
RENAME4args *, args);
fp = sfp = NULL;
- srcvp = targvp = NULL;
+ srcvp = targvp = tvp = NULL;
in_crit_src = in_crit_targ = 0;
fp_rele_grant_hold = sfp_rele_grant_hold = 0;
+ unlinked = 0;
/* CURRENT_FH: target directory */
ndvp = cs->vp;
@@ -4476,7 +4572,6 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
}
fp_rele_grant_hold = 1;
-
/* Check for NBMAND lock on both source and target */
if (nbl_need_check(srcvp)) {
nbl_start_crit(srcvp, RW_READER);
@@ -4511,11 +4606,15 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
- if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
- cs->cr, NULL, 0)) == 0 && fp != NULL) {
- struct vattr va;
- vnode_t *tvp;
+ error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
+ NULL, 0);
+ /*
+ * If target existed and was unlinked by VOP_RENAME, state will need
+ * closed. To avoid deadlock, rfs4_close_all_state will be done after
+ * any necessary nbl_end_crit on srcvp and tgtvp.
+ */
+ if (error == 0 && fp != NULL) {
rfs4_dbe_lock(fp->rf_dbe);
tvp = fp->rf_vp;
if (tvp)
@@ -4523,17 +4622,23 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
rfs4_dbe_unlock(fp->rf_dbe);
if (tvp) {
+ struct vattr va;
va.va_mask = AT_NLINK;
+
if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
va.va_nlink == 0) {
- /* The file is gone and so should the state */
- if (in_crit_targ) {
- nbl_end_crit(targvp);
- in_crit_targ = 0;
+ unlinked = 1;
+
+ /* DEBUG data */
+ if ((srcvp == targvp) || (tvp != targvp)) {
+ cmn_err(CE_WARN, "rfs4_op_rename: "
+ "srcvp %p, targvp: %p, tvp: %p",
+ (void *)srcvp, (void *)targvp,
+ (void *)tvp);
}
- rfs4_close_all_state(fp);
+ } else {
+ VN_RELE(tvp);
}
- VN_RELE(tvp);
}
}
if (error == 0)
@@ -4548,6 +4653,21 @@ rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
if (targvp)
VN_RELE(targvp);
+ if (unlinked) {
+ ASSERT(fp != NULL);
+ ASSERT(tvp != NULL);
+
+ /* DEBUG data */
+ if (RW_READ_HELD(&tvp->v_nbllock)) {
+ cmn_err(CE_WARN, "rfs4_op_rename: "
+ "RW_READ_HELD(%p)", (void *)tvp);
+ }
+
+ /* The file is gone and so should the state */
+ rfs4_close_all_state(fp);
+ VN_RELE(tvp);
+ }
+
if (sfp) {
rfs4_clear_dont_grant(sfp);
rfs4_file_rele(sfp);
@@ -5484,6 +5604,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
nfsstat4 stat;
int in_crit = 0;
caller_context_t ct;
+ nfs4_srv_t *nsrv4;
DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
WRITE4args *, args);
@@ -5554,11 +5675,12 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
goto out;
}
+ nsrv4 = nfs4_get_srv();
if (args->data_len == 0) {
*cs->statusp = resp->status = NFS4_OK;
resp->count = 0;
resp->committed = args->stable;
- resp->writeverf = Write4verf;
+ resp->writeverf = nsrv4->write4verf;
goto out;
}
@@ -5654,7 +5776,7 @@ rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
else
resp->committed = FILE_SYNC4;
- resp->writeverf = Write4verf;
+ resp->writeverf = nsrv4->write4verf;
out:
if (in_crit)
@@ -5674,18 +5796,24 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
{
uint_t i;
struct compound_state cs;
+ nfs4_srv_t *nsrv4;
+ nfs_export_t *ne = nfs_get_export();
if (rv != NULL)
*rv = 0;
rfs4_init_compound_state(&cs);
/*
- * Form a reply tag by copying over the reqeuest tag.
+ * Form a reply tag by copying over the request tag.
*/
- resp->tag.utf8string_val =
- kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
resp->tag.utf8string_len = args->tag.utf8string_len;
- bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
- resp->tag.utf8string_len);
+ if (args->tag.utf8string_len != 0) {
+ resp->tag.utf8string_val =
+ kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
+ bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
+ resp->tag.utf8string_len);
+ } else {
+ resp->tag.utf8string_val = NULL;
+ }
cs.statusp = &resp->status;
cs.req = req;
@@ -5731,6 +5859,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
KM_SLEEP);
cs.basecr = cr;
+ nsrv4 = nfs4_get_srv();
DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
COMPOUND4args *, args);
@@ -5745,26 +5874,27 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
* ops along with unexport. This lock will be removed as
* part of the NFSv4 phase 2 namespace redesign work.
*/
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
/*
* If this is the first compound we've seen, we need to start all
* new instances' grace periods.
*/
- if (rfs4_seen_first_compound == 0) {
- rfs4_grace_start_new();
+ if (nsrv4->seen_first_compound == 0) {
+ rfs4_grace_start_new(nsrv4);
/*
* This must be set after rfs4_grace_start_new(), otherwise
* another thread could proceed past here before the former
* is finished.
*/
- rfs4_seen_first_compound = 1;
+ nsrv4->seen_first_compound = 1;
}
for (i = 0; i < args->array_len && cs.cont; i++) {
nfs_argop4 *argop;
nfs_resop4 *resop;
uint_t op;
+ kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
argop = &args->array[i];
resop = &resp->array[i];
@@ -5776,7 +5906,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
* Count the individual ops here; NULL and COMPOUND
* are counted in common_dispatch()
*/
- rfsproccnt_v4_ptr[op].value.ui64++;
+ stat[op].value.ui64++;
NFS4_DEBUG(rfs4_debug > 1,
(CE_NOTE, "Executing %s", rfs4_op_string[op]));
@@ -5793,7 +5923,7 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
* day when XDR code doesn't verify v4 opcodes.
*/
op = OP_ILLEGAL;
- rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
+ stat[OP_ILLEGAL_IDX].value.ui64++;
rfs4_op_illegal(argop, resop, req, &cs);
cs.cont = FALSE;
@@ -5816,15 +5946,22 @@ rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
}
}
- rw_exit(&exported_lock);
-
- DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
- COMPOUND4res *, resp);
+ rw_exit(&ne->exported_lock);
+ /*
+ * clear exportinfo and vnode fields from compound_state before dtrace
+ * probe, to avoid tracing residual values for path and share path.
+ */
if (cs.vp)
VN_RELE(cs.vp);
if (cs.saved_vp)
VN_RELE(cs.saved_vp);
+ cs.exi = cs.saved_exi = NULL;
+ cs.vp = cs.saved_vp = NULL;
+
+ DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
+ COMPOUND4res *, resp);
+
if (cs.saved_fh.nfs_fh4_val)
kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
@@ -6528,25 +6665,27 @@ rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
if (trunc) {
int in_crit = 0;
rfs4_file_t *fp;
+ nfs4_srv_t *nsrv4;
bool_t create = FALSE;
/*
* We are writing over an existing file.
* Check to see if we need to recall a delegation.
*/
- rfs4_hold_deleg_policy();
+ nsrv4 = nfs4_get_srv();
+ rfs4_hold_deleg_policy(nsrv4);
if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
if (rfs4_check_delegated_byfp(FWRITE, fp,
(reqsize == 0), FALSE, FALSE, &clientid)) {
rfs4_file_rele(fp);
- rfs4_rele_deleg_policy();
+ rfs4_rele_deleg_policy(nsrv4);
VN_RELE(vp);
*attrset = 0;
return (NFS4ERR_DELAY);
}
rfs4_file_rele(fp);
}
- rfs4_rele_deleg_policy();
+ rfs4_rele_deleg_policy(nsrv4);
if (nbl_need_check(vp)) {
in_crit = 1;
@@ -8104,11 +8243,13 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
SETCLIENTID_CONFIRM4res *res =
&resop->nfs_resop4_u.opsetclientid_confirm;
rfs4_client_t *cp, *cptoclose = NULL;
+ nfs4_srv_t *nsrv4;
DTRACE_NFSV4_2(op__setclientid__confirm__start,
struct compound_state *, cs,
SETCLIENTID_CONFIRM4args *, args);
+ nsrv4 = nfs4_get_srv();
*cs->statusp = res->status = NFS4_OK;
cp = rfs4_findclient_by_id(args->clientid, TRUE);
@@ -8144,14 +8285,14 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
* Update the client's associated server instance, if it's changed
* since the client was created.
*/
- if (rfs4_servinst(cp) != rfs4_cur_servinst)
- rfs4_servinst_assign(cp, rfs4_cur_servinst);
+ if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
+ rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
/*
* Record clientid in stable storage.
* Must be done after server instance has been assigned.
*/
- rfs4_ss_clid(cp);
+ rfs4_ss_clid(nsrv4, cp);
rfs4_dbe_unlock(cp->rc_dbe);
@@ -8166,7 +8307,7 @@ rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
/*
* Check to see if client can perform reclaims
*/
- rfs4_ss_chkclid(cp);
+ rfs4_ss_chkclid(nsrv4, cp);
rfs4_client_rele(cp);
@@ -9811,3 +9952,167 @@ client_is_downrev(struct svc_req *req)
rfs4_dbe_rele(ci->ri_dbe);
return (is_downrev);
}
+
+/*
+ * Do the main work of handling HA-NFSv4 Resource Group failover on
+ * Sun Cluster.
+ * We need to detect whether any RG admin paths have been added or removed,
+ * and adjust resources accordingly.
+ * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
+ * order to scale, the list and array of paths need to be held in more
+ * suitable data structures.
+ */
+static void
+hanfsv4_failover(nfs4_srv_t *nsrv4)
+{
+ int i, start_grace, numadded_paths = 0;
+ char **added_paths = NULL;
+ rfs4_dss_path_t *dss_path;
+
+ /*
+ * Note: currently, dss_pathlist cannot be NULL, since
+ * it will always include an entry for NFS4_DSS_VAR_DIR. If we
+ * make the latter dynamically specified too, the following will
+ * need to be adjusted.
+ */
+
+ /*
+ * First, look for removed paths: RGs that have been failed-over
+ * away from this node.
+ * Walk the "currently-serving" dss_pathlist and, for each
+ * path, check if it is on the "passed-in" rfs4_dss_newpaths array
+ * from nfsd. If not, that RG path has been removed.
+ *
+ * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
+ * any duplicates.
+ */
+ dss_path = nsrv4->dss_pathlist;
+ do {
+ int found = 0;
+ char *path = dss_path->path;
+
+ /* used only for non-HA so may not be removed */
+ if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
+ dss_path = dss_path->next;
+ continue;
+ }
+
+ for (i = 0; i < rfs4_dss_numnewpaths; i++) {
+ int cmpret;
+ char *newpath = rfs4_dss_newpaths[i];
+
+ /*
+ * Since nfsd has sorted rfs4_dss_newpaths for us,
+ * once the return from strcmp is negative we know
+ * we've passed the point where "path" should be,
+ * and can stop searching: "path" has been removed.
+ */
+ cmpret = strcmp(path, newpath);
+ if (cmpret < 0)
+ break;
+ if (cmpret == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found == 0) {
+ unsigned index = dss_path->index;
+ rfs4_servinst_t *sip = dss_path->sip;
+ rfs4_dss_path_t *path_next = dss_path->next;
+
+ /*
+ * This path has been removed.
+ * We must clear out the servinst reference to
+ * it, since it's now owned by another
+ * node: we should not attempt to touch it.
+ */
+ ASSERT(dss_path == sip->dss_paths[index]);
+ sip->dss_paths[index] = NULL;
+
+ /* remove from "currently-serving" list, and destroy */
+ remque(dss_path);
+ /* allow for NUL */
+ kmem_free(dss_path->path, strlen(dss_path->path) + 1);
+ kmem_free(dss_path, sizeof (rfs4_dss_path_t));
+
+ dss_path = path_next;
+ } else {
+ /* path was found; not removed */
+ dss_path = dss_path->next;
+ }
+ } while (dss_path != nsrv4->dss_pathlist);
+
+ /*
+ * Now, look for added paths: RGs that have been failed-over
+ * to this node.
+ * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
+ * for each path, check if it is on the "currently-serving"
+ * dss_pathlist. If not, that RG path has been added.
+ *
+ * Note: we don't do duplicate detection here; nfsd does that for us.
+ *
+ * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
+ * an upper bound for the size needed for added_paths[numadded_paths].
+ */
+
+ /* probably more space than we need, but guaranteed to be enough */
+ if (rfs4_dss_numnewpaths > 0) {
+ size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
+ added_paths = kmem_zalloc(sz, KM_SLEEP);
+ }
+
+ /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
+ for (i = 0; i < rfs4_dss_numnewpaths; i++) {
+ int found = 0;
+ char *newpath = rfs4_dss_newpaths[i];
+
+ dss_path = nsrv4->dss_pathlist;
+ do {
+ char *path = dss_path->path;
+
+ /* used only for non-HA */
+ if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
+ dss_path = dss_path->next;
+ continue;
+ }
+
+ if (strncmp(path, newpath, strlen(path)) == 0) {
+ found = 1;
+ break;
+ }
+
+ dss_path = dss_path->next;
+ } while (dss_path != nsrv4->dss_pathlist);
+
+ if (found == 0) {
+ added_paths[numadded_paths] = newpath;
+ numadded_paths++;
+ }
+ }
+
+ /* did we find any added paths? */
+ if (numadded_paths > 0) {
+
+ /* create a new server instance, and start its grace period */
+ start_grace = 1;
+ /* CSTYLED */
+ rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
+
+ /* read in the stable storage state from these paths */
+ rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
+
+ /*
+ * Multiple failovers during a grace period will cause
+ * clients of the same resource group to be partitioned
+ * into different server instances, with different
+ * grace periods. Since clients of the same resource
+ * group must be subject to the same grace period,
+ * we need to reset all currently active grace periods.
+ */
+ rfs4_grace_reset_all(nsrv4);
+ }
+
+ if (rfs4_dss_numnewpaths > 0)
+ kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
+}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
index 7240faa356..a9ee217a8b 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_attr.c
@@ -18,12 +18,14 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+
/*
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc.
*/
#include <sys/systm.h>
@@ -133,6 +135,11 @@ rfs4_attr_init()
struct statvfs64 sb;
rfs4_init_compound_state(&cs);
+ /*
+ * This is global state checking, called once. We might be in
+ * non-global-zone context here (say a modload happens from a zone
+ * process) so in this case, we want the global-zone root vnode.
+ */
cs.vp = rootvp;
cs.fh.nfs_fh4_val = NULL;
cs.cr = kcred;
@@ -1301,22 +1308,29 @@ rfs4_get_mntdfileid(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg)
vp = sarg->cs->vp;
sarg->mntdfid_set = FALSE;
- /* VROOT object, must untraverse */
- if (vp->v_flag & VROOT) {
+ /*
+ * VROOT object or zone's root, must untraverse.
+ *
+ * NOTE: Not doing reality checks on curzone vs. compound
+ * state vnode because it will mismatch once at initialization
+ * if a non-global-zone triggers the module load, BUT in that case
+ * the vp is literally "/" which has VROOT set.
+ */
+ if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) {
/* extra hold for vp since untraverse might rele */
VN_HOLD(vp);
- stubvp = untraverse(vp);
+ stubvp = untraverse(vp, ZONE_ROOTVP());
/*
- * If vp/stubvp are same, we must be at system
+ * If vp/stubvp are same, we must be at system-or-zone
* root because untraverse returned same vp
* for a VROOT object. sarg->vap was setup
* before we got here, so there's no need to do
* another getattr -- just use the one in sarg.
*/
if (VN_CMP(vp, stubvp)) {
- ASSERT(VN_CMP(vp, rootdir));
+ ASSERT(VN_IS_CURZONEROOT(vp));
vap = sarg->vap;
} else {
va.va_mask = AT_NODEID;
@@ -1375,10 +1389,10 @@ rfs4_fattr4_mounted_on_fileid(nfs4_attr_cmd_t cmd,
break; /* this attr is supported */
case NFS4ATTR_GETIT:
case NFS4ATTR_VERIT:
- if (! sarg->mntdfid_set)
+ if (!sarg->mntdfid_set)
error = rfs4_get_mntdfileid(cmd, sarg);
- if (! error && sarg->mntdfid_set) {
+ if (!error && sarg->mntdfid_set) {
if (cmd == NFS4ATTR_GETIT)
na->mounted_on_fileid = sarg->mounted_on_fileid;
else
@@ -1595,6 +1609,10 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg,
break; /* this attr is supported */
case NFS4ATTR_GETIT:
+ {
+ kstat_named_t *stat =
+ sarg->cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
+
fsl = fetch_referral(sarg->cs->vp, sarg->cs->cr);
if (fsl == NULL)
(void) memset(&(na->fs_locations), 0,
@@ -1603,9 +1621,9 @@ rfs4_fattr4_fs_locations(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sarg,
na->fs_locations = *fsl;
kmem_free(fsl, sizeof (fs_locations4));
}
- global_svstat_ptr[4][NFS_REFERRALS].value.ui64++;
+ stat[NFS_REFERRALS].value.ui64++;
break;
-
+ }
case NFS4ATTR_FREEIT:
if (sarg->op == NFS4ATTR_SETIT || sarg->op == NFS4ATTR_VERIT)
error = EINVAL;
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
index bb3f1bdd95..ea50f18d77 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_deleg.c
@@ -22,7 +22,10 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
*/
#include <sys/systm.h>
@@ -48,10 +51,7 @@
#define MAX_READ_DELEGATIONS 5
-krwlock_t rfs4_deleg_policy_lock;
-srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
static int rfs4_deleg_wlp = 5;
-kmutex_t rfs4_deleg_lock;
static int rfs4_deleg_disabled;
static int rfs4_max_setup_cb_tries = 5;
@@ -138,23 +138,30 @@ uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
* value of "new_policy"
*/
void
-rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
+rfs4_set_deleg_policy(nfs4_srv_t *nsrv4, srv_deleg_policy_t new_policy)
{
- rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
- rfs4_deleg_policy = new_policy;
- rw_exit(&rfs4_deleg_policy_lock);
+ rw_enter(&nsrv4->deleg_policy_lock, RW_WRITER);
+ nsrv4->nfs4_deleg_policy = new_policy;
+ rw_exit(&nsrv4->deleg_policy_lock);
}
void
-rfs4_hold_deleg_policy(void)
+rfs4_hold_deleg_policy(nfs4_srv_t *nsrv4)
{
- rw_enter(&rfs4_deleg_policy_lock, RW_READER);
+ rw_enter(&nsrv4->deleg_policy_lock, RW_READER);
}
void
-rfs4_rele_deleg_policy(void)
+rfs4_rele_deleg_policy(nfs4_srv_t *nsrv4)
{
- rw_exit(&rfs4_deleg_policy_lock);
+ rw_exit(&nsrv4->deleg_policy_lock);
+}
+
+srv_deleg_policy_t
+nfs4_get_deleg_policy()
+{
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+ return (nsrv4->nfs4_deleg_policy);
}
@@ -210,7 +217,7 @@ rfs4_do_cb_null(rfs4_client_t *cp)
if (cbp->cb_nullcaller == TRUE) {
mutex_exit(cbp->cb_lock);
rfs4_client_rele(cp);
- return;
+ zthread_exit();
}
/* Mark the cbinfo as having a thread in the NULL callback */
@@ -278,7 +285,7 @@ retry:
cbp->cb_nullcaller = FALSE;
mutex_exit(cbp->cb_lock);
rfs4_client_rele(cp);
- return;
+ zthread_exit();
}
/* mark rfs4_client_t as CALLBACK NULL in progress */
@@ -320,8 +327,8 @@ retry:
cv_broadcast(cbp->cb_cv); /* start up the other threads */
cbp->cb_nullcaller = FALSE;
mutex_exit(cbp->cb_lock);
-
rfs4_client_rele(cp);
+ zthread_exit();
}
/*
@@ -687,7 +694,7 @@ rfs4_deleg_cb_check(rfs4_client_t *cp)
rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
- (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
+ (void) zthread_create(NULL, 0, rfs4_do_cb_null, cp, 0,
minclsyspri);
}
@@ -948,8 +955,8 @@ do_recall(struct recall_arg *arg)
mutex_destroy(&cpr_lock);
rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
-
kmem_free(arg, sizeof (struct recall_arg));
+ zthread_exit();
}
struct master_recall_args {
@@ -977,7 +984,7 @@ do_recall_file(struct master_recall_args *map)
rfs4_dbe_rele_nolock(fp->rf_dbe);
rfs4_dbe_unlock(fp->rf_dbe);
kmem_free(map, sizeof (struct master_recall_args));
- return;
+ zthread_exit();
}
mutex_exit(fp->rf_dinfo.rd_recall_lock);
@@ -1010,7 +1017,7 @@ do_recall_file(struct master_recall_args *map)
recall_count++;
- (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
+ (void) zthread_create(NULL, 0, do_recall, arg, 0,
minclsyspri);
}
@@ -1035,6 +1042,7 @@ do_recall_file(struct master_recall_args *map)
mutex_enter(&cpr_lock);
CALLB_CPR_EXIT(&cpr_info);
mutex_destroy(&cpr_lock);
+ zthread_exit();
}
static void
@@ -1070,7 +1078,7 @@ rfs4_recall_file(rfs4_file_t *fp,
args->recall = recall;
args->trunc = trunc;
- (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
+ (void) zthread_create(NULL, 0, do_recall_file, args, 0,
minclsyspri);
}
@@ -1206,12 +1214,12 @@ rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
* determine the actual delegation type to return.
*/
static open_delegation_type4
-rfs4_delegation_policy(open_delegation_type4 dtype,
+rfs4_delegation_policy(nfs4_srv_t *nsrv4, open_delegation_type4 dtype,
rfs4_dinfo_t *dinfo, clientid4 cid)
{
time_t elapsed;
- if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
+ if (nsrv4->nfs4_deleg_policy != SRV_NORMAL_DELEGATE)
return (OPEN_DELEGATE_NONE);
/*
@@ -1254,6 +1262,7 @@ rfs4_delegation_policy(open_delegation_type4 dtype,
rfs4_deleg_state_t *
rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
{
+ nfs4_srv_t *nsrv4;
rfs4_file_t *fp = sp->rs_finfo;
open_delegation_type4 dtype;
int no_delegation;
@@ -1261,14 +1270,18 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
+ nsrv4 = nfs4_get_srv();
+
/* Is the server even providing delegations? */
- if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
+ if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE ||
+ dreq == DELEG_NONE) {
return (NULL);
+ }
/* Check to see if delegations have been temporarily disabled */
- mutex_enter(&rfs4_deleg_lock);
+ mutex_enter(&nsrv4->deleg_lock);
no_delegation = rfs4_deleg_disabled;
- mutex_exit(&rfs4_deleg_lock);
+ mutex_exit(&nsrv4->deleg_lock);
if (no_delegation)
return (NULL);
@@ -1349,7 +1362,7 @@ rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
* Based on policy and the history of the file get the
* actual delegation.
*/
- dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
+ dtype = rfs4_delegation_policy(nsrv4, dtype, &fp->rf_dinfo,
sp->rs_owner->ro_client->rc_clientid);
if (dtype == OPEN_DELEGATE_NONE)
@@ -1438,8 +1451,10 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
{
rfs4_deleg_state_t *dsp;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+
/* Is delegation enabled? */
- if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
+ if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
return (FALSE);
/* do we have a delegation on this file? */
@@ -1504,14 +1519,16 @@ rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
bool_t
rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
{
+ nfs4_srv_t *nsrv4;
rfs4_file_t *fp;
bool_t create = FALSE;
bool_t rc = FALSE;
- rfs4_hold_deleg_policy();
+ nsrv4 = nfs4_get_srv();
+ rfs4_hold_deleg_policy(nsrv4);
/* Is delegation enabled? */
- if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
+ if (nsrv4->nfs4_deleg_policy != SRV_NEVER_DELEGATE) {
fp = rfs4_findfile(vp, NULL, &create);
if (fp != NULL) {
if (rfs4_check_delegated_byfp(mode, fp, trunc,
@@ -1521,7 +1538,7 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
rfs4_file_rele(fp);
}
}
- rfs4_rele_deleg_policy();
+ rfs4_rele_deleg_policy(nsrv4);
return (rc);
}
@@ -1533,7 +1550,9 @@ rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
void
rfs4_clear_dont_grant(rfs4_file_t *fp)
{
- if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
+
+ if (nsrv4->nfs4_deleg_policy == SRV_NEVER_DELEGATE)
return;
rfs4_dbe_lock(fp->rf_dbe);
ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
@@ -1869,18 +1888,24 @@ rfs4_is_deleg(rfs4_state_t *sp)
void
rfs4_disable_delegation(void)
{
- mutex_enter(&rfs4_deleg_lock);
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
+ mutex_enter(&nsrv4->deleg_lock);
rfs4_deleg_disabled++;
- mutex_exit(&rfs4_deleg_lock);
+ mutex_exit(&nsrv4->deleg_lock);
}
void
rfs4_enable_delegation(void)
{
- mutex_enter(&rfs4_deleg_lock);
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
+ mutex_enter(&nsrv4->deleg_lock);
ASSERT(rfs4_deleg_disabled > 0);
rfs4_deleg_disabled--;
- mutex_exit(&rfs4_deleg_lock);
+ mutex_exit(&nsrv4->deleg_lock);
}
void
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
index 4ad799be46..920ebeca53 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
@@ -20,8 +20,11 @@
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
* Copyright (c) 2015, Joyent, Inc.
*/
@@ -139,12 +142,12 @@ nfs4_vget_pseudo(struct exportinfo *exi, vnode_t **vpp, fid_t *fidp)
*
* A visible list has a per-file-system scope. Any exportinfo
* struct (real or pseudo) can have a visible list as long as
- * a) its export root is VROOT
+ * a) its export root is VROOT, or is the zone's root for in-zone NFS service
* b) a descendant of the export root is shared
*/
struct exportinfo *
-pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
- struct exportdata *exdata)
+pseudo_exportfs(nfs_export_t *ne, vnode_t *vp, fid_t *fid,
+ struct exp_visible *vis_head, struct exportdata *exdata)
{
struct exportinfo *exi;
struct exportdata *kex;
@@ -152,7 +155,7 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
int vpathlen;
int i;
- ASSERT(RW_WRITE_HELD(&exported_lock));
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
fsid = vp->v_vfsp->vfs_fsid;
exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
@@ -162,6 +165,7 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
VN_HOLD(exi->exi_vp);
exi->exi_visible = vis_head;
exi->exi_count = 1;
+ exi->exi_zoneid = ne->ne_globals->nfs_zoneid;
exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
VSW_VOLATILEDEV) ? 1 : 0;
mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -205,7 +209,15 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
/*
* Insert the new entry at the front of the export list
*/
- export_link(exi);
+ export_link(ne, exi);
+
+ /*
+ * Initialize exi_id and exi_kstats
+ */
+ mutex_enter(&nfs_exi_id_lock);
+ exi->exi_id = exi_id_get_next();
+ avl_add(&exi_id_tree, exi);
+ mutex_exit(&nfs_exi_id_lock);
return (exi);
}
@@ -281,14 +293,14 @@ tree_prepend_node(treenode_t *n, exp_visible_t *v, exportinfo_t *e)
* they should be already freed.
*/
static void
-tree_remove_node(treenode_t *node)
+tree_remove_node(nfs_export_t *ne, treenode_t *node)
{
treenode_t *parent = node->tree_parent;
treenode_t *s; /* s for sibling */
if (parent == NULL) {
kmem_free(node, sizeof (*node));
- ns_root = NULL;
+ ne->ns_root = NULL;
return;
}
/* This node is first child */
@@ -437,6 +449,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head)
struct exp_visible *vp1, *vp2, *vis_head, *tail, *next;
int found;
treenode_t *child, *curr, *connect_point;
+ nfs_export_t *ne = nfs_get_export();
vis_head = tree_head->tree_vis;
connect_point = exi->exi_tree;
@@ -450,7 +463,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head)
exi->exi_visible = vis_head;
/* Update the change timestamp */
- tree_update_change(connect_point, &vis_head->vis_change);
+ tree_update_change(ne, connect_point, &vis_head->vis_change);
return;
}
@@ -510,7 +523,7 @@ more_visible(struct exportinfo *exi, treenode_t *tree_head)
tree_add_child(connect_point, curr);
/* Update the change timestamp */
- tree_update_change(connect_point,
+ tree_update_change(ne, connect_point,
&curr->tree_vis->vis_change);
connect_point = NULL;
@@ -627,8 +640,11 @@ treeclimb_export(struct exportinfo *exip)
struct vattr va;
treenode_t *tree_head = NULL;
timespec_t now;
+ nfs_export_t *ne;
- ASSERT(RW_WRITE_HELD(&exported_lock));
+ ne = exip->exi_ne;
+ ASSERT3P(ne, ==, nfs_get_export()); /* curzone reality check */
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
gethrestime(&now);
@@ -644,11 +660,14 @@ treeclimb_export(struct exportinfo *exip)
if (error)
break;
+ /* XXX KEBE ASKS DO WE NEED THIS?!? */
+ ASSERT3U(exip->exi_zoneid, ==, curzone->zone_id);
/*
- * The root of the file system needs special handling
+ * The root of the file system, or the zone's root for
+ * in-zone NFS service needs special handling
*/
- if (vp->v_flag & VROOT) {
- if (! exportdir) {
+ if (vp->v_flag & VROOT || vp == EXI_TO_ZONEROOTVP(exip)) {
+ if (!exportdir) {
struct exportinfo *exi;
/*
@@ -677,23 +696,23 @@ treeclimb_export(struct exportinfo *exip)
* this as a pseudo export so that an NFS v4
* client can do lookups in it.
*/
- new_exi = pseudo_exportfs(vp, &fid, vis_head,
- NULL);
+ new_exi = pseudo_exportfs(ne, vp, &fid,
+ vis_head, NULL);
vis_head = NULL;
}
- if (VN_CMP(vp, rootdir)) {
+ if (VN_IS_CURZONEROOT(vp)) {
/* at system root */
/*
* If sharing "/", new_exi is shared exportinfo
* (exip). Otherwise, new_exi is exportinfo
* created by pseudo_exportfs() above.
*/
- ns_root = tree_prepend_node(tree_head, NULL,
+ ne->ns_root = tree_prepend_node(tree_head, NULL,
new_exi);
/* Update the change timestamp */
- tree_update_change(ns_root, &now);
+ tree_update_change(ne, ne->ns_root, &now);
break;
}
@@ -702,7 +721,7 @@ treeclimb_export(struct exportinfo *exip)
* Traverse across the mountpoint and continue the
* climb on the mounted-on filesystem.
*/
- vp = untraverse(vp);
+ vp = untraverse(vp, ne->exi_root->exi_vp);
exportdir = 0;
continue;
}
@@ -788,7 +807,10 @@ treeclimb_export(struct exportinfo *exip)
exportinfo_t *e = tree_head->tree_exi;
/* exip will be freed in exportfs() */
if (e && e != exip) {
- export_unlink(e);
+ mutex_enter(&nfs_exi_id_lock);
+ avl_remove(&exi_id_tree, e);
+ mutex_exit(&nfs_exi_id_lock);
+ export_unlink(ne, e);
exi_rele(e);
}
tree_head = tree_head->tree_child_first;
@@ -809,17 +831,30 @@ treeclimb_export(struct exportinfo *exip)
* node was a leaf node.
* Deleting of nodes will finish when we reach a node which
* has children or is a real export, then we might still need
- * to continue releasing visibles, until we reach VROOT node.
+ * to continue releasing visibles, until we reach VROOT or zone's root node.
*/
void
-treeclimb_unexport(struct exportinfo *exip)
+treeclimb_unexport(nfs_export_t *ne, struct exportinfo *exip)
{
treenode_t *tnode, *old_nd;
treenode_t *connect_point = NULL;
- ASSERT(RW_WRITE_HELD(&exported_lock));
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
+ ASSERT(curzone->zone_id == exip->exi_zoneid ||
+ curzone->zone_id == global_zone->zone_id);
+ /*
+ * exi_tree can be null for the zone root
+ * which means we're already at the "top"
+ * and there's nothing more to "climb".
+ */
tnode = exip->exi_tree;
+ if (tnode == NULL) {
+ /* Should only happen for... */
+ ASSERT(exip == ne->exi_root);
+ return;
+ }
+
/*
* The unshared exportinfo was unlinked in unexport().
* Zeroing tree_exi ensures that we will skip it.
@@ -831,7 +866,10 @@ treeclimb_unexport(struct exportinfo *exip)
while (tnode != NULL) {
- /* Stop at VROOT node which is exported or has child */
+ /*
+ * Stop at VROOT (or zone root) node which is exported or has
+ * child.
+ */
if (TREE_ROOT(tnode) &&
(TREE_EXPORTED(tnode) || tnode->tree_child_first != NULL))
break;
@@ -839,8 +877,12 @@ treeclimb_unexport(struct exportinfo *exip)
/* Release pseudo export if it has no child */
if (TREE_ROOT(tnode) && !TREE_EXPORTED(tnode) &&
tnode->tree_child_first == NULL) {
- export_unlink(tnode->tree_exi);
+ mutex_enter(&nfs_exi_id_lock);
+ avl_remove(&exi_id_tree, tnode->tree_exi);
+ mutex_exit(&nfs_exi_id_lock);
+ export_unlink(ne, tnode->tree_exi);
exi_rele(tnode->tree_exi);
+ tnode->tree_exi = NULL;
}
/* Release visible in parent's exportinfo */
@@ -854,14 +896,14 @@ treeclimb_unexport(struct exportinfo *exip)
/* Remove itself, if this is a leaf and non-exported node */
if (old_nd->tree_child_first == NULL &&
!TREE_EXPORTED(old_nd)) {
- tree_remove_node(old_nd);
+ tree_remove_node(ne, old_nd);
connect_point = tnode;
}
}
/* Update the change timestamp */
if (connect_point != NULL)
- tree_update_change(connect_point, NULL);
+ tree_update_change(ne, connect_point, NULL);
}
/*
@@ -870,13 +912,13 @@ treeclimb_unexport(struct exportinfo *exip)
* vnode.
*/
vnode_t *
-untraverse(vnode_t *vp)
+untraverse(vnode_t *vp, vnode_t *zone_rootvp)
{
vnode_t *tvp, *nextvp;
tvp = vp;
for (;;) {
- if (! (tvp->v_flag & VROOT))
+ if (!(tvp->v_flag & VROOT) && !VN_CMP(tvp, zone_rootvp))
break;
/* lock vfs to prevent unmount of this vfs */
@@ -907,7 +949,7 @@ untraverse(vnode_t *vp)
/*
* Given an exportinfo, climb up to find the exportinfo for the VROOT
- * of the filesystem.
+ * (or zone root) of the filesystem.
*
* e.g. /
* |
@@ -924,7 +966,7 @@ untraverse(vnode_t *vp)
*
* If d is shared, then c will be put into a's visible list.
* Note: visible list is per filesystem and is attached to the
- * VROOT exportinfo.
+ * VROOT exportinfo. Returned exi does NOT have a new hold.
*/
struct exportinfo *
get_root_export(struct exportinfo *exip)
@@ -956,12 +998,15 @@ has_visible(struct exportinfo *exi, vnode_t *vp)
vp_is_exported = VN_CMP(vp, exi->exi_vp);
/*
- * An exported root vnode has a sub-dir shared if it has a visible list.
- * i.e. if it does not have a visible list, then there is no node in
- * this filesystem leads to any other shared node.
+ * An exported root vnode has a sub-dir shared if it has a visible
+ * list. i.e. if it does not have a visible list, then there is no
+ * node in this filesystem leads to any other shared node.
*/
- if (vp_is_exported && (vp->v_flag & VROOT))
+ ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
+ if (vp_is_exported &&
+ ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp))) {
return (exi->exi_visible ? 1 : 0);
+ }
/*
* Only the exportinfo of a fs root node may have a visible list.
@@ -1034,7 +1079,7 @@ nfs_visible(struct exportinfo *exi, vnode_t *vp, int *expseudo)
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
- if (! PSEUDO(exi))
+ if (!PSEUDO(exi))
exi = get_root_export(exi);
/* Get the fid of the vnode */
@@ -1142,7 +1187,7 @@ nfs_visible_inode(struct exportinfo *exi, ino64_t ino,
* Only a PSEUDO node has a visible list or an exported VROOT
* node may have a visible list.
*/
- if (! PSEUDO(exi))
+ if (!PSEUDO(exi))
exi = get_root_export(exi);
for (*visp = exi->exi_visible; *visp != NULL; *visp = (*visp)->vis_next)
@@ -1154,14 +1199,6 @@ nfs_visible_inode(struct exportinfo *exi, ino64_t ino,
}
/*
- * The change attribute value of the root of nfs pseudo namespace.
- *
- * The ns_root_change is protected by exported_lock because all of the treenode
- * operations are protected by exported_lock too.
- */
-static timespec_t ns_root_change;
-
-/*
* Get the change attribute from visible and returns TRUE.
* If the change value is not available returns FALSE.
*/
@@ -1171,6 +1208,7 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change)
struct exp_visible *visp;
fid_t fid;
treenode_t *node;
+ nfs_export_t *ne = nfs_get_export();
/*
* First check to see if vp is export root.
@@ -1215,14 +1253,13 @@ nfs_visible_change(struct exportinfo *exi, vnode_t *vp, timespec_t *change)
exproot:
/* The VROOT export have its visible available through treenode */
node = exi->exi_tree;
- if (node != ns_root) {
+ if (node != ne->ns_root) {
ASSERT(node->tree_vis != NULL);
*change = node->tree_vis->vis_change;
} else {
ASSERT(node->tree_vis == NULL);
- *change = ns_root_change;
+ *change = ne->ns_root_change;
}
-
return (TRUE);
}
@@ -1234,15 +1271,15 @@ exproot:
* If the change value is not supplied, the current time is used.
*/
void
-tree_update_change(treenode_t *tnode, timespec_t *change)
+tree_update_change(nfs_export_t *ne, treenode_t *tnode, timespec_t *change)
{
timespec_t *vis_change;
ASSERT(tnode != NULL);
- ASSERT((tnode != ns_root && tnode->tree_vis != NULL) ||
- (tnode == ns_root && tnode->tree_vis == NULL));
+ ASSERT((tnode != ne->ns_root && tnode->tree_vis != NULL) ||
+ (tnode == ne->ns_root && tnode->tree_vis == NULL));
- vis_change = tnode == ns_root ? &ns_root_change
+ vis_change = tnode == ne->ns_root ? &ne->ns_root_change
: &tnode->tree_vis->vis_change;
if (change != NULL)
diff --git a/usr/src/uts/common/fs/nfs/nfs4_state.c b/usr/src/uts/common/fs/nfs/nfs4_state.c
index 47941454bc..0c1efb26df 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_state.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_state.c
@@ -18,9 +18,14 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright 2019 Nexenta by DDN, Inc.
*/
#include <sys/systm.h>
@@ -41,7 +46,6 @@
extern u_longlong_t nfs4_srv_caller_id;
-extern time_t rfs4_start_time;
extern uint_t nfs4_srv_vkey;
stateid4 special0 = {
@@ -72,7 +76,8 @@ int rfs4_debug;
static uint32_t rfs4_database_debug = 0x00;
-static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
+/* CSTYLED */
+static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
@@ -121,11 +126,6 @@ rfs4_sw_exit(rfs4_state_wait_t *swp)
mutex_exit(swp->sw_cv_lock);
}
-/*
- * CPR callback id -- not related to v4 callbacks
- */
-static callb_id_t cpr_id = 0;
-
static void
deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
{
@@ -138,6 +138,11 @@ deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
}
}
+/*
+ * CPR callback id -- not related to v4 callbacks
+ */
+static callb_id_t cpr_id = 0;
+
static void
deep_lock_free(LOCK4res *res)
{
@@ -273,41 +278,6 @@ rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
#define ADDRHASH(key) ((unsigned long)(key) >> 3)
-/* Used to serialize create/destroy of rfs4_server_state database */
-kmutex_t rfs4_state_lock;
-static rfs4_database_t *rfs4_server_state = NULL;
-
-/* Used to serialize lookups of clientids */
-static krwlock_t rfs4_findclient_lock;
-
-/*
- * For now this "table" is exposed so that the CPR callback
- * function can tromp through it..
- */
-rfs4_table_t *rfs4_client_tab;
-
-static rfs4_index_t *rfs4_clientid_idx;
-static rfs4_index_t *rfs4_nfsclnt_idx;
-static rfs4_table_t *rfs4_clntip_tab;
-static rfs4_index_t *rfs4_clntip_idx;
-static rfs4_table_t *rfs4_openowner_tab;
-static rfs4_index_t *rfs4_openowner_idx;
-static rfs4_table_t *rfs4_state_tab;
-static rfs4_index_t *rfs4_state_idx;
-static rfs4_index_t *rfs4_state_owner_file_idx;
-static rfs4_index_t *rfs4_state_file_idx;
-static rfs4_table_t *rfs4_lo_state_tab;
-static rfs4_index_t *rfs4_lo_state_idx;
-static rfs4_index_t *rfs4_lo_state_owner_idx;
-static rfs4_table_t *rfs4_lockowner_tab;
-static rfs4_index_t *rfs4_lockowner_idx;
-static rfs4_index_t *rfs4_lockowner_pid_idx;
-static rfs4_table_t *rfs4_file_tab;
-static rfs4_index_t *rfs4_file_idx;
-static rfs4_table_t *rfs4_deleg_state_tab;
-static rfs4_index_t *rfs4_deleg_idx;
-static rfs4_index_t *rfs4_deleg_state_idx;
-
#define MAXTABSZ 1024*1024
/* The values below are rfs4_lease_time units */
@@ -330,16 +300,25 @@ static rfs4_index_t *rfs4_deleg_state_idx;
#define DELEG_STATE_CACHE_TIME 1
#endif
+/*
+ * NFSv4 server state databases
+ *
+ * Initilized when the module is loaded and used by NFSv4 state tables.
+ * These kmem_cache databases are global, the tables that make use of these
+ * are per zone.
+ */
+kmem_cache_t *rfs4_client_mem_cache;
+kmem_cache_t *rfs4_clntIP_mem_cache;
+kmem_cache_t *rfs4_openown_mem_cache;
+kmem_cache_t *rfs4_openstID_mem_cache;
+kmem_cache_t *rfs4_lockstID_mem_cache;
+kmem_cache_t *rfs4_lockown_mem_cache;
+kmem_cache_t *rfs4_file_mem_cache;
+kmem_cache_t *rfs4_delegstID_mem_cache;
-static time_t rfs4_client_cache_time = 0;
-static time_t rfs4_clntip_cache_time = 0;
-static time_t rfs4_openowner_cache_time = 0;
-static time_t rfs4_state_cache_time = 0;
-static time_t rfs4_lo_state_cache_time = 0;
-static time_t rfs4_lockowner_cache_time = 0;
-static time_t rfs4_file_cache_time = 0;
-static time_t rfs4_deleg_state_cache_time = 0;
-
+/*
+ * NFSv4 state table functions
+ */
static bool_t rfs4_client_create(rfs4_entry_t, void *);
static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
@@ -424,7 +403,7 @@ static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char *dir, char *leaf)
{
rfs4_ss_pn_t *ss_pn;
- int dir_len, leaf_len;
+ int dir_len, leaf_len;
/*
* validate we have a resonable path
@@ -705,29 +684,29 @@ out:
}
static void
-rfs4_ss_init(void)
+rfs4_ss_init(nfs4_srv_t *nsrv4)
{
int npaths = 1;
char *default_dss_path = NFS4_DSS_VAR_DIR;
/* read the default stable storage state */
- rfs4_dss_readstate(npaths, &default_dss_path);
+ rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
rfs4_ss_enabled = 1;
}
static void
-rfs4_ss_fini(void)
+rfs4_ss_fini(nfs4_srv_t *nsrv4)
{
rfs4_servinst_t *sip;
- mutex_enter(&rfs4_servinst_lock);
- sip = rfs4_cur_servinst;
+ mutex_enter(&nsrv4->servinst_lock);
+ sip = nsrv4->nfs4_cur_servinst;
while (sip != NULL) {
rfs4_dss_clear_oldstate(sip);
sip = sip->next;
}
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
/*
@@ -771,7 +750,7 @@ rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
* Form the state and oldstate paths, and read in the stable storage files.
*/
void
-rfs4_dss_readstate(int npaths, char **paths)
+rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
{
int i;
char *state, *oldstate;
@@ -795,8 +774,10 @@ rfs4_dss_readstate(int npaths, char **paths)
* and move the latter's contents to old state
* directory.
*/
- rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
- rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
+ /* CSTYLED */
+ rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
+ /* CSTYLED */
+ rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
}
kmem_free(state, MAXPATHLEN);
@@ -809,7 +790,7 @@ rfs4_dss_readstate(int npaths, char **paths)
* granted permission to perform reclaims.
*/
void
-rfs4_ss_chkclid(rfs4_client_t *cp)
+rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
{
rfs4_servinst_t *sip;
@@ -830,15 +811,15 @@ rfs4_ss_chkclid(rfs4_client_t *cp)
* Start at the current instance, and walk the list backwards
* to the first.
*/
- mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ mutex_enter(&nsrv4->servinst_lock);
+ for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
rfs4_ss_chkclid_sip(cp, sip);
/* if the above check found this client, we're done */
if (cp->rc_can_reclaim)
break;
}
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
static void
@@ -888,7 +869,7 @@ rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
* the server-generated short-hand clientid.
*/
void
-rfs4_ss_clid(rfs4_client_t *cp)
+rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
{
const char *kinet_ntop6(uchar_t *, char *, size_t);
char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
@@ -920,7 +901,7 @@ rfs4_ss_clid(rfs4_client_t *cp)
(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
(longlong_t)cp->rc_clientid);
- rfs4_ss_clid_write(cp, leaf);
+ rfs4_ss_clid_write(nsrv4, cp, leaf);
}
/*
@@ -929,7 +910,7 @@ rfs4_ss_clid(rfs4_client_t *cp)
* multiple directories.
*/
static void
-rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
+rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
{
rfs4_servinst_t *sip;
@@ -943,8 +924,8 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
* to all instances' paths. Start at the current instance, and
* walk the list backwards to the first.
*/
- mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ mutex_enter(&nsrv4->servinst_lock);
+ for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
int i, npaths = sip->dss_npaths;
/* write the leaf file to all DSS paths */
@@ -958,7 +939,7 @@ rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
}
}
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
/*
@@ -1151,34 +1132,107 @@ rfs4_client_scrub(rfs4_entry_t ent, void *arg)
void
rfs4_clear_client_state(struct nfs4clrst_args *clr)
{
- (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);
+ nfs4_srv_t *nsrv4;
+ nsrv4 = nfs4_get_srv();
+ (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
}
/*
* Used to initialize the NFSv4 server's state or database. All of
- * the tables are created and timers are set. Only called when NFSv4
- * service is provided.
+ * the tables are created and timers are set.
*/
void
-rfs4_state_init()
+rfs4_state_g_init()
{
- int start_grace;
extern boolean_t rfs4_cpr_callb(void *, int);
- char *dss_path = NFS4_DSS_VAR_DIR;
- time_t start_time;
+ /*
+ * Add a CPR callback so that we can update client
+ * access times to extend the lease after a suspend
+ * and resume (using the same class as rpcmod/connmgr)
+ */
+ cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
- mutex_enter(&rfs4_state_lock);
+ /*
+ * NFSv4 server state databases
+ *
+ * Initialized when the module is loaded and used by NFSv4 state
+ * tables. These kmem_cache free pools are used globally, the NFSv4
+ * state tables which make use of these kmem_cache free pools are per
+ * zone.
+ *
+ * initialize the global kmem_cache free pools which will be used by
+ * the NFSv4 state tables.
+ */
+ /* CSTYLED */
+ rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
+ /* CSTYLED */
+ rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
+ /* CSTYLED */
+ rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
+ /* CSTYLED */
+ rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
+ /* CSTYLED */
+ rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
+ /* CSTYLED */
+ rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
+ /* CSTYLED */
+ rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
+ /* CSTYLED */
+ rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
+ rfs4_client_clrst = rfs4_clear_client_state;
+}
+
+
+/*
+ * Used at server shutdown to cleanup all of the NFSv4 server's structures
+ * and other state.
+ */
+void
+rfs4_state_g_fini()
+{
+ int i;
/*
- * If the server state database has already been initialized,
- * skip it
+ * Cleanup the CPR callback.
*/
- if (rfs4_server_state != NULL) {
- mutex_exit(&rfs4_state_lock);
- return;
+ if (cpr_id)
+ (void) callb_delete(cpr_id);
+
+ rfs4_client_clrst = NULL;
+
+ /* free the NFSv4 state databases */
+ for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
+ kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
+ rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
}
- rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
+ rfs4_client_mem_cache = NULL;
+ rfs4_clntIP_mem_cache = NULL;
+ rfs4_openown_mem_cache = NULL;
+ rfs4_openstID_mem_cache = NULL;
+ rfs4_lockstID_mem_cache = NULL;
+ rfs4_lockown_mem_cache = NULL;
+ rfs4_file_mem_cache = NULL;
+ rfs4_delegstID_mem_cache = NULL;
+
+ /* DSS: distributed stable storage */
+ nvlist_free(rfs4_dss_oldpaths);
+ nvlist_free(rfs4_dss_paths);
+ rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
+}
+
+/*
+ * Used to initialize the per zone NFSv4 server's state
+ */
+void
+rfs4_state_zone_init(nfs4_srv_t *nsrv4)
+{
+ time_t start_time;
+ int start_grace;
+ char *dss_path = NFS4_DSS_VAR_DIR;
+
+ /* DSS: distributed stable storage: initialise served paths list */
+ nsrv4->dss_pathlist = NULL;
/*
* Set the boot time. If the server
@@ -1187,13 +1241,10 @@ rfs4_state_init()
* regardless. A small window but it exists...
*/
start_time = gethrestime_sec();
- if (rfs4_start_time < start_time)
- rfs4_start_time = start_time;
+ if (nsrv4->rfs4_start_time < start_time)
+ nsrv4->rfs4_start_time = start_time;
else
- rfs4_start_time++;
-
- /* DSS: distributed stable storage: initialise served paths list */
- rfs4_dss_pathlist = NULL;
+ nsrv4->rfs4_start_time++;
/*
* Create the first server instance, or a new one if the server has
@@ -1202,42 +1253,67 @@ rfs4_state_init()
* clients' recovery window.
*/
start_grace = 0;
- rfs4_servinst_create(start_grace, 1, &dss_path);
+ if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
+ int i;
+ char **dss_allpaths = NULL;
+ dss_allpaths = kmem_alloc(sizeof (char *) *
+ (rfs4_dss_numnewpaths + 1), KM_SLEEP);
+ /*
+ * Add the default path into the list of paths for saving
+ * state informantion.
+ */
+ dss_allpaths[0] = dss_path;
+ for (i = 0; i < rfs4_dss_numnewpaths; i++) {
+ dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
+ }
+ rfs4_servinst_create(nsrv4, start_grace,
+ (rfs4_dss_numnewpaths + 1), dss_allpaths);
+ kmem_free(dss_allpaths,
+ (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
+ } else {
+ rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
+ }
/* reset the "first NFSv4 request" status */
- rfs4_seen_first_compound = 0;
+ nsrv4->seen_first_compound = 0;
+
+ mutex_enter(&nsrv4->state_lock);
/*
- * Add a CPR callback so that we can update client
- * access times to extend the lease after a suspend
- * and resume (using the same class as rpcmod/connmgr)
+ * If the server state database has already been initialized,
+ * skip it
*/
- cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
+ if (nsrv4->nfs4_server_state != NULL) {
+ mutex_exit(&nsrv4->state_lock);
+ return;
+ }
+
+ rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
/* set the various cache timers for table creation */
- if (rfs4_client_cache_time == 0)
- rfs4_client_cache_time = CLIENT_CACHE_TIME;
- if (rfs4_openowner_cache_time == 0)
- rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
- if (rfs4_state_cache_time == 0)
- rfs4_state_cache_time = STATE_CACHE_TIME;
- if (rfs4_lo_state_cache_time == 0)
- rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
- if (rfs4_lockowner_cache_time == 0)
- rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
- if (rfs4_file_cache_time == 0)
- rfs4_file_cache_time = FILE_CACHE_TIME;
- if (rfs4_deleg_state_cache_time == 0)
- rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
+ if (nsrv4->rfs4_client_cache_time == 0)
+ nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
+ if (nsrv4->rfs4_openowner_cache_time == 0)
+ nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
+ if (nsrv4->rfs4_state_cache_time == 0)
+ nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
+ if (nsrv4->rfs4_lo_state_cache_time == 0)
+ nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
+ if (nsrv4->rfs4_lockowner_cache_time == 0)
+ nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
+ if (nsrv4->rfs4_file_cache_time == 0)
+ nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
+ if (nsrv4->rfs4_deleg_state_cache_time == 0)
+ nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
/* Create the overall database to hold all server state */
- rfs4_server_state = rfs4_database_create(rfs4_database_debug);
+ nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
/* Now create the individual tables */
- rfs4_client_cache_time *= rfs4_lease_time;
- rfs4_client_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"Client",
- rfs4_client_cache_time,
+ nsrv4->rfs4_client_cache_time,
2,
rfs4_client_create,
rfs4_client_destroy,
@@ -1245,19 +1321,19 @@ rfs4_state_init()
sizeof (rfs4_client_t),
TABSIZE,
MAXTABSZ/8, 100);
- rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
+ nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
"nfs_client_id4", nfsclnt_hash,
nfsclnt_compare, nfsclnt_mkkey,
TRUE);
- rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
+ nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
"client_id", clientid_hash,
clientid_compare, clientid_mkkey,
FALSE);
- rfs4_clntip_cache_time = 86400 * 365; /* about a year */
- rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
+ nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"ClntIP",
- rfs4_clntip_cache_time,
+ nsrv4->rfs4_clntip_cache_time,
1,
rfs4_clntip_create,
rfs4_clntip_destroy,
@@ -1265,15 +1341,15 @@ rfs4_state_init()
sizeof (rfs4_clntip_t),
TABSIZE,
MAXTABSZ, 100);
- rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
+ nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
"client_ip", clntip_hash,
clntip_compare, clntip_mkkey,
TRUE);
- rfs4_openowner_cache_time *= rfs4_lease_time;
- rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"OpenOwner",
- rfs4_openowner_cache_time,
+ nsrv4->rfs4_openowner_cache_time,
1,
rfs4_openowner_create,
rfs4_openowner_destroy,
@@ -1281,15 +1357,15 @@ rfs4_state_init()
sizeof (rfs4_openowner_t),
TABSIZE,
MAXTABSZ, 100);
- rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
+ nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
"open_owner4", openowner_hash,
openowner_compare,
openowner_mkkey, TRUE);
- rfs4_state_cache_time *= rfs4_lease_time;
- rfs4_state_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"OpenStateID",
- rfs4_state_cache_time,
+ nsrv4->rfs4_state_cache_time,
3,
rfs4_state_create,
rfs4_state_destroy,
@@ -1298,25 +1374,26 @@ rfs4_state_init()
TABSIZE,
MAXTABSZ, 100);
- rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,
+ /* CSTYLED */
+ nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
"Openowner-File",
state_owner_file_hash,
state_owner_file_compare,
state_owner_file_mkkey, TRUE);
- rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
+ nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
"State-id", state_hash,
state_compare, state_mkkey, FALSE);
- rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
+ nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
"File", state_file_hash,
state_file_compare, state_file_mkkey,
FALSE);
- rfs4_lo_state_cache_time *= rfs4_lease_time;
- rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"LockStateID",
- rfs4_lo_state_cache_time,
+ nsrv4->rfs4_lo_state_cache_time,
2,
rfs4_lo_state_create,
rfs4_lo_state_destroy,
@@ -1325,22 +1402,23 @@ rfs4_state_init()
TABSIZE,
MAXTABSZ, 100);
- rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,
+ /* CSTYLED */
+ nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
"lockownerxstate",
lo_state_lo_hash,
lo_state_lo_compare,
lo_state_lo_mkkey, TRUE);
- rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
+ nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
"State-id",
lo_state_hash, lo_state_compare,
lo_state_mkkey, FALSE);
- rfs4_lockowner_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
- rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"Lockowner",
- rfs4_lockowner_cache_time,
+ nsrv4->rfs4_lockowner_cache_time,
2,
rfs4_lockowner_create,
rfs4_lockowner_destroy,
@@ -1349,20 +1427,21 @@ rfs4_state_init()
TABSIZE,
MAXTABSZ, 100);
- rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
+ nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
"lock_owner4", lockowner_hash,
lockowner_compare,
lockowner_mkkey, TRUE);
- rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,
+ /* CSTYLED */
+ nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
"pid", pid_hash,
pid_compare, pid_mkkey,
FALSE);
- rfs4_file_cache_time *= rfs4_lease_time;
- rfs4_file_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
+ nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"File",
- rfs4_file_cache_time,
+ nsrv4->rfs4_file_cache_time,
1,
rfs4_file_create,
rfs4_file_destroy,
@@ -1371,14 +1450,15 @@ rfs4_state_init()
TABSIZE,
MAXTABSZ, -1);
- rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
+ nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
"Filehandle", file_hash,
file_compare, file_mkkey, TRUE);
- rfs4_deleg_state_cache_time *= rfs4_lease_time;
- rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,
+ nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
+ /* CSTYLED */
+ nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
"DelegStateID",
- rfs4_deleg_state_cache_time,
+ nsrv4->rfs4_deleg_state_cache_time,
2,
rfs4_deleg_state_create,
rfs4_deleg_state_destroy,
@@ -1386,87 +1466,89 @@ rfs4_state_init()
sizeof (rfs4_deleg_state_t),
TABSIZE,
MAXTABSZ, 100);
- rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
+ nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
"DelegByFileClient",
deleg_hash,
deleg_compare,
deleg_mkkey, TRUE);
- rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,
+ /* CSTYLED */
+ nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
"DelegState",
deleg_state_hash,
deleg_state_compare,
deleg_state_mkkey, FALSE);
+ mutex_exit(&nsrv4->state_lock);
+
/*
* Init the stable storage.
*/
- rfs4_ss_init();
-
- rfs4_client_clrst = rfs4_clear_client_state;
-
- mutex_exit(&rfs4_state_lock);
+ rfs4_ss_init(nsrv4);
}
-
/*
- * Used at server shutdown to cleanup all of the NFSv4 server's structures
- * and other state.
+ * Used at server shutdown to cleanup all of NFSv4 server's zone structures
+ * and state.
*/
void
-rfs4_state_fini()
+rfs4_state_zone_fini()
{
rfs4_database_t *dbp;
+ nfs4_srv_t *nsrv4;
+ nsrv4 = nfs4_get_srv();
+
+ rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
+
+ /*
+ * Clean up any dangling stable storage structures BEFORE calling
+ * rfs4_servinst_destroy_all() so there are no dangling structures
+ * (i.e. the srvinsts are all cleared of danglers BEFORE they get
+ * freed).
+ */
+ rfs4_ss_fini(nsrv4);
- mutex_enter(&rfs4_state_lock);
+ mutex_enter(&nsrv4->state_lock);
- if (rfs4_server_state == NULL) {
- mutex_exit(&rfs4_state_lock);
+ if (nsrv4->nfs4_server_state == NULL) {
+ mutex_exit(&nsrv4->state_lock);
return;
}
- rfs4_client_clrst = NULL;
+ /* destroy server instances and current instance ptr */
+ rfs4_servinst_destroy_all(nsrv4);
- rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
- dbp = rfs4_server_state;
- rfs4_server_state = NULL;
+ /* reset the "first NFSv4 request" status */
+ nsrv4->seen_first_compound = 0;
- /*
- * Cleanup the CPR callback.
- */
- if (cpr_id)
- (void) callb_delete(cpr_id);
+ dbp = nsrv4->nfs4_server_state;
+ nsrv4->nfs4_server_state = NULL;
- rw_destroy(&rfs4_findclient_lock);
+ rw_destroy(&nsrv4->rfs4_findclient_lock);
/* First stop all of the reaper threads in the database */
rfs4_database_shutdown(dbp);
- /* clean up any dangling stable storage structures */
- rfs4_ss_fini();
- /* Now actually destroy/release the database and its tables */
+
+ /*
+ * WARNING: There may be consumers of the rfs4 database still
+ * active as we destroy these. IF that's the case, consider putting
+ * some of their _zone_fini()-like functions into the zsd key as
+ * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
+ * maintain some ordering guarantees better that way.
+ */
+ /* Now destroy/release the database tables */
rfs4_database_destroy(dbp);
/* Reset the cache timers for next time */
- rfs4_client_cache_time = 0;
- rfs4_openowner_cache_time = 0;
- rfs4_state_cache_time = 0;
- rfs4_lo_state_cache_time = 0;
- rfs4_lockowner_cache_time = 0;
- rfs4_file_cache_time = 0;
- rfs4_deleg_state_cache_time = 0;
-
- mutex_exit(&rfs4_state_lock);
-
- /* destroy server instances and current instance ptr */
- rfs4_servinst_destroy_all();
-
- /* reset the "first NFSv4 request" status */
- rfs4_seen_first_compound = 0;
+ nsrv4->rfs4_client_cache_time = 0;
+ nsrv4->rfs4_openowner_cache_time = 0;
+ nsrv4->rfs4_state_cache_time = 0;
+ nsrv4->rfs4_lo_state_cache_time = 0;
+ nsrv4->rfs4_lockowner_cache_time = 0;
+ nsrv4->rfs4_file_cache_time = 0;
+ nsrv4->rfs4_deleg_state_cache_time = 0;
- /* DSS: distributed stable storage */
- nvlist_free(rfs4_dss_oldpaths);
- nvlist_free(rfs4_dss_paths);
- rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
+ mutex_exit(&nsrv4->state_lock);
}
typedef union {
@@ -1581,6 +1663,7 @@ rfs4_client_expiry(rfs4_entry_t u_entry)
static void
rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
{
+ nfs4_srv_t *nsrv4;
rfs4_servinst_t *sip;
char *leaf = cp->rc_ss_pn->leaf;
@@ -1590,12 +1673,13 @@ rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
* from all server instances.
*/
- mutex_enter(&rfs4_servinst_lock);
- for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
+ nsrv4 = nfs4_get_srv();
+ mutex_enter(&nsrv4->servinst_lock);
+ for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
/* remove the leaf file associated with this server instance */
rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
}
- mutex_exit(&rfs4_servinst_lock);
+ mutex_exit(&nsrv4->servinst_lock);
}
static void
@@ -1663,10 +1747,13 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg)
struct sockaddr *ca;
cid *cidp;
scid_confirm_verf *scvp;
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
/* Get a clientid to give to the client */
cidp = (cid *)&cp->rc_clientid;
- cidp->impl_id.start_time = rfs4_start_time;
+ cidp->impl_id.start_time = nsrv4->rfs4_start_time;
cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
/* If we are booted as a cluster node, embed our nodeid */
@@ -1724,7 +1811,7 @@ rfs4_client_create(rfs4_entry_t u_entry, void *arg)
* rfs4_servinst_assign(). In this case it's not strictly necessary.
*/
rfs4_dbe_hold(cp->rc_dbe);
- rfs4_servinst_assign(cp, rfs4_cur_servinst);
+ rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
rfs4_dbe_rele(cp->rc_dbe);
return (TRUE);
@@ -1755,22 +1842,24 @@ rfs4_client_t *
rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
{
rfs4_client_t *cp;
+ nfs4_srv_t *nsrv4;
+ nsrv4 = nfs4_get_srv();
if (oldcp) {
- rw_enter(&rfs4_findclient_lock, RW_WRITER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
rfs4_dbe_hide(oldcp->rc_dbe);
} else {
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
}
- cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
+ cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
create, (void *)client, RFS4_DBS_VALID);
if (oldcp)
rfs4_dbe_unhide(oldcp->rc_dbe);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
return (cp);
}
@@ -1781,17 +1870,18 @@ rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
rfs4_client_t *cp;
bool_t create = FALSE;
cid *cidp = (cid *)&clientid;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
/* If we're a cluster and the nodeid isn't right, short-circuit */
if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
return (NULL);
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
- cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
+ cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
&create, NULL, RFS4_DBS_VALID);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
rfs4_client_rele(cp);
@@ -1899,13 +1989,16 @@ rfs4_clntip_t *
rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
{
rfs4_clntip_t *cp;
+ nfs4_srv_t *nsrv4;
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ nsrv4 = nfs4_get_srv();
- cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
+
+ cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
create, addr, RFS4_DBS_VALID);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
return (cp);
}
@@ -1915,19 +2008,20 @@ rfs4_invalidate_clntip(struct sockaddr *addr)
{
rfs4_clntip_t *cp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
- cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
+ cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
&create, NULL, RFS4_DBS_VALID);
if (cp == NULL) {
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
return;
}
rfs4_dbe_invalidate(cp->ri_dbe);
rfs4_dbe_rele(cp->ri_dbe);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
}
bool_t
@@ -2075,14 +2169,15 @@ rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
seqid4 seqid = argp->ro_open_seqid;
rfs4_client_t *cp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
- cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
+ cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
&openowner->clientid,
&create, NULL, RFS4_DBS_VALID);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
if (cp == NULL)
return (FALSE);
@@ -2124,10 +2219,12 @@ rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
{
rfs4_openowner_t *oo;
rfs4_openowner_t arg;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
arg.ro_owner = *openowner;
arg.ro_open_seqid = seqid;
- oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,
+ /* CSTYLED */
+ oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
create, &arg, RFS4_DBS_VALID);
return (oo);
@@ -2270,14 +2367,15 @@ rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
lock_owner4 *lockowner = (lock_owner4 *)arg;
rfs4_client_t *cp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- rw_enter(&rfs4_findclient_lock, RW_READER);
+ rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
- cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
+ cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
&lockowner->clientid,
&create, NULL, RFS4_DBS_VALID);
- rw_exit(&rfs4_findclient_lock);
+ rw_exit(&nsrv4->rfs4_findclient_lock);
if (cp == NULL)
return (FALSE);
@@ -2298,8 +2396,10 @@ rfs4_lockowner_t *
rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
{
rfs4_lockowner_t *lo;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,
+ /* CSTYLED */
+ lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
create, lockowner, RFS4_DBS_VALID);
return (lo);
@@ -2310,8 +2410,9 @@ rfs4_findlockowner_by_pid(pid_t pid)
{
rfs4_lockowner_t *lo;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
+ lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
(void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
return (lo);
@@ -2422,12 +2523,14 @@ rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
{
rfs4_file_t *fp;
rfs4_fcreate_arg arg;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
arg.vp = vp;
arg.fh = fh;
if (*create == TRUE)
- fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
+ /* CSTYLED */
+ fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
&arg, RFS4_DBS_VALID);
else {
mutex_enter(&vp->v_vsd_lock);
@@ -2462,6 +2565,7 @@ rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
rfs4_file_t *fp;
rfs4_fcreate_arg arg;
bool_t screate = *create;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
if (screate == FALSE) {
mutex_enter(&vp->v_vsd_lock);
@@ -2492,8 +2596,8 @@ retry:
arg.vp = vp;
arg.fh = fh;
- fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
- &arg, RFS4_DBS_VALID);
+ fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
+ create, &arg, RFS4_DBS_VALID);
if (fp != NULL) {
rw_enter(&fp->rf_file_rwlock, RW_WRITER);
if (fp->rf_vp == NULL) {
@@ -2648,8 +2752,9 @@ rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
{
rfs4_lo_state_t *lsp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
+ lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
&create, NULL, RFS4_DBS_VALID);
if (lock_fp == TRUE && lsp != NULL)
rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
@@ -2688,12 +2793,13 @@ rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
{
rfs4_lo_state_t *lsp;
rfs4_lo_state_t arg;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
arg.rls_locker = lo;
arg.rls_state = sp;
- lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
- create, &arg, RFS4_DBS_VALID);
+ lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
+ &arg, create, &arg, RFS4_DBS_VALID);
return (lsp);
}
@@ -2702,8 +2808,11 @@ static stateid_t
get_stateid(id_t eid)
{
stateid_t id;
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
- id.bits.boottime = rfs4_start_time;
+ id.bits.boottime = nsrv4->rfs4_start_time;
id.bits.ident = eid;
id.bits.chgseq = 0;
id.bits.type = 0;
@@ -2959,11 +3068,12 @@ rfs4_deleg_state_t *
rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
{
rfs4_deleg_state_t ds, *dsp;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
ds.rds_client = sp->rs_owner->ro_client;
ds.rds_finfo = sp->rs_finfo;
- dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
+ dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
create, &ds, RFS4_DBS_VALID);
return (dsp);
@@ -2974,9 +3084,10 @@ rfs4_finddelegstate(stateid_t *id)
{
rfs4_deleg_state_t *dsp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
- &create, NULL, RFS4_DBS_VALID);
+ dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
+ id, &create, NULL, RFS4_DBS_VALID);
return (dsp);
}
@@ -3091,16 +3202,17 @@ state_file_mkkey(rfs4_entry_t u_entry)
rfs4_state_t *
rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
- bool_t *create)
+ bool_t *create)
{
rfs4_state_t *sp;
rfs4_state_t key;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
key.rs_owner = oo;
key.rs_finfo = fp;
- sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
- create, &key, RFS4_DBS_VALID);
+ sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
+ &key, create, &key, RFS4_DBS_VALID);
return (sp);
}
@@ -3110,8 +3222,9 @@ static rfs4_state_t *
rfs4_findstate_by_file(rfs4_file_t *fp)
{
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
+ return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
&create, fp, RFS4_DBS_VALID));
}
@@ -3162,8 +3275,9 @@ rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
{
rfs4_state_t *sp;
bool_t create = FALSE;
+ nfs4_srv_t *nsrv4 = nfs4_get_srv();
- sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
+ sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
&create, NULL, find_invalid);
if (lock_fp == TRUE && sp != NULL)
rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
@@ -3231,6 +3345,9 @@ nfsstat4
rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
{
cid *cidp = (cid *) cp;
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
/*
* If we are booted as a cluster node, check the embedded nodeid.
@@ -3245,7 +3362,8 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
* by the client (via the clientid) and this is NOT a
* setclientid_confirm then return EXPIRED.
*/
- if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)
+ if (!setclid_confirm &&
+ cidp->impl_id.start_time == nsrv4->rfs4_start_time)
return (NFS4ERR_EXPIRED);
return (NFS4ERR_STALE_CLIENTID);
@@ -3259,6 +3377,10 @@ rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
static nfsstat4
what_stateid_error(stateid_t *id, stateid_type_t type)
{
+ nfs4_srv_t *nsrv4;
+
+ nsrv4 = nfs4_get_srv();
+
/* If we are booted as a cluster node, was stateid locally generated? */
if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
return (NFS4ERR_STALE_STATEID);
@@ -3268,7 +3390,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type)
return (NFS4ERR_BAD_STATEID);
/* From a different server instantiation, return STALE */
- if (id->bits.boottime != rfs4_start_time)
+ if (id->bits.boottime != nsrv4->rfs4_start_time)
return (NFS4ERR_STALE_STATEID);
/*
@@ -3283,7 +3405,7 @@ what_stateid_error(stateid_t *id, stateid_type_t type)
* that has been revoked, the server should return BAD_STATEID
* instead of the more common EXPIRED error.
*/
- if (id->bits.boottime == rfs4_start_time) {
+ if (id->bits.boottime == nsrv4->rfs4_start_time) {
if (type == DELEGID)
return (NFS4ERR_BAD_STATEID);
else
@@ -3785,7 +3907,7 @@ rfs4_close_all_state(rfs4_file_t *fp)
#ifdef DEBUG
/* only applies when server is handing out delegations */
- if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
+ if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
#endif
@@ -3991,21 +4113,34 @@ rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
* state in the server that refers to objects residing underneath this
* particular export. The ordering of the release is important.
* Lock_owner, then state and then file.
+ *
+ * NFS zones note: nfs_export.c:unexport() calls this from a
+ * thread in the global zone for NGZ data structures, so we
+ * CANNOT use zone_getspecific anywhere in this code path.
*/
void
-rfs4_clean_state_exi(struct exportinfo *exi)
+rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
{
- mutex_enter(&rfs4_state_lock);
+ nfs_globals_t *ng;
+ nfs4_srv_t *nsrv4;
+
+ ng = ne->ne_globals;
+ ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
+ nsrv4 = ng->nfs4_srv;
+
+ mutex_enter(&nsrv4->state_lock);
- if (rfs4_server_state == NULL) {
- mutex_exit(&rfs4_state_lock);
+ if (nsrv4->nfs4_server_state == NULL) {
+ mutex_exit(&nsrv4->state_lock);
return;
}
- rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
- rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
- rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
- rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);
+ rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
+ rfs4_lo_state_walk_callout, exi);
+ rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
+ rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
+ rfs4_deleg_state_walk_callout, exi);
+ rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
- mutex_exit(&rfs4_state_lock);
+ mutex_exit(&nsrv4->state_lock);
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_auth.c b/usr/src/uts/common/fs/nfs/nfs_auth.c
index 0ceadca711..ee53038e7d 100644
--- a/usr/src/uts/common/fs/nfs/nfs_auth.c
+++ b/usr/src/uts/common/fs/nfs/nfs_auth.c
@@ -20,11 +20,11 @@
*/
/*
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 Joyent, Inc. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
* Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -54,10 +54,12 @@
static struct kmem_cache *exi_cache_handle;
static void exi_cache_reclaim(void *);
+static void exi_cache_reclaim_zone(nfs_globals_t *);
static void exi_cache_trim(struct exportinfo *exi);
extern pri_t minclsyspri;
+/* NFS auth cache statistics */
volatile uint_t nfsauth_cache_hit;
volatile uint_t nfsauth_cache_miss;
volatile uint_t nfsauth_cache_refresh;
@@ -121,9 +123,8 @@ typedef struct refreshq_auth_node {
} refreshq_auth_node_t;
/*
- * Used to manipulate things on the refreshq_queue.
- * Note that the refresh thread will effectively
- * pop a node off of the queue, at which point it
+ * Used to manipulate things on the refreshq_queue. Note that the refresh
+ * thread will effectively pop a node off of the queue, at which point it
* will no longer need to hold the mutex.
*/
static kmutex_t refreshq_lock;
@@ -131,102 +132,130 @@ static list_t refreshq_queue;
static kcondvar_t refreshq_cv;
/*
- * If there is ever a problem with loading the
- * module, then nfsauth_fini() needs to be called
- * to remove state. In that event, since the
- * refreshq thread has been started, they need to
- * work together to get rid of state.
+ * If there is ever a problem with loading the module, then nfsauth_fini()
+ * needs to be called to remove state. In that event, since the refreshq
+ * thread has been started, they need to work together to get rid of state.
*/
typedef enum nfsauth_refreshq_thread_state {
REFRESHQ_THREAD_RUNNING,
REFRESHQ_THREAD_FINI_REQ,
- REFRESHQ_THREAD_HALTED
+ REFRESHQ_THREAD_HALTED,
+ REFRESHQ_THREAD_NEED_CREATE
} nfsauth_refreshq_thread_state_t;
-nfsauth_refreshq_thread_state_t
-refreshq_thread_state = REFRESHQ_THREAD_HALTED;
+typedef struct nfsauth_globals {
+ kmutex_t mountd_lock;
+ door_handle_t mountd_dh;
+
+ /*
+ * Used to manipulate things on the refreshq_queue. Note that the
+ * refresh thread will effectively pop a node off of the queue,
+ * at which point it will no longer need to hold the mutex.
+ */
+ kmutex_t refreshq_lock;
+ list_t refreshq_queue;
+ kcondvar_t refreshq_cv;
+
+ /*
+ * A list_t would be overkill. These are auth_cache entries which are
+ * no longer linked to an exi. It should be the case that all of their
+ * states are NFS_AUTH_INVALID, i.e., the only way to be put on this
+ * list is iff their state indicated that they had been placed on the
+ * refreshq_queue.
+ *
+ * Note that while there is no link from the exi or back to the exi,
+ * the exi can not go away until these entries are harvested.
+ */
+ struct auth_cache *refreshq_dead_entries;
+ nfsauth_refreshq_thread_state_t refreshq_thread_state;
+
+} nfsauth_globals_t;
static void nfsauth_free_node(struct auth_cache *);
-static void nfsauth_refresh_thread(void);
+static void nfsauth_refresh_thread(nfsauth_globals_t *);
static int nfsauth_cache_compar(const void *, const void *);
-/*
- * mountd is a server-side only daemon. This will need to be
- * revisited if the NFS server is ever made zones-aware.
- */
-kmutex_t mountd_lock;
-door_handle_t mountd_dh;
+static nfsauth_globals_t *
+nfsauth_get_zg(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfsauth_globals_t *nag = ng->nfs_auth;
+ ASSERT(nag != NULL);
+ return (nag);
+}
void
mountd_args(uint_t did)
{
- mutex_enter(&mountd_lock);
- if (mountd_dh != NULL)
- door_ki_rele(mountd_dh);
- mountd_dh = door_ki_lookup(did);
- mutex_exit(&mountd_lock);
+ nfsauth_globals_t *nag;
+
+ nag = nfsauth_get_zg();
+ mutex_enter(&nag->mountd_lock);
+ if (nag->mountd_dh != NULL)
+ door_ki_rele(nag->mountd_dh);
+ nag->mountd_dh = door_ki_lookup(did);
+ mutex_exit(&nag->mountd_lock);
}
void
nfsauth_init(void)
{
- /*
- * mountd can be restarted by smf(5). We need to make sure
- * the updated door handle will safely make it to mountd_dh
- */
- mutex_init(&mountd_lock, NULL, MUTEX_DEFAULT, NULL);
+ exi_cache_handle = kmem_cache_create("exi_cache_handle",
+ sizeof (struct auth_cache), 0, NULL, NULL,
+ exi_cache_reclaim, NULL, NULL, 0);
+}
- mutex_init(&refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&refreshq_queue, sizeof (refreshq_exi_node_t),
- offsetof(refreshq_exi_node_t, ren_node));
+void
+nfsauth_fini(void)
+{
+ kmem_cache_destroy(exi_cache_handle);
+}
- cv_init(&refreshq_cv, NULL, CV_DEFAULT, NULL);
+void
+nfsauth_zone_init(nfs_globals_t *ng)
+{
+ nfsauth_globals_t *nag;
+
+ nag = kmem_zalloc(sizeof (*nag), KM_SLEEP);
/*
- * Allocate nfsauth cache handle
+ * mountd can be restarted by smf(5). We need to make sure
+ * the updated door handle will safely make it to mountd_dh.
*/
- exi_cache_handle = kmem_cache_create("exi_cache_handle",
- sizeof (struct auth_cache), 0, NULL, NULL,
- exi_cache_reclaim, NULL, NULL, 0);
+ mutex_init(&nag->mountd_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&nag->refreshq_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&nag->refreshq_queue, sizeof (refreshq_exi_node_t),
+ offsetof(refreshq_exi_node_t, ren_node));
+ cv_init(&nag->refreshq_cv, NULL, CV_DEFAULT, NULL);
+ nag->refreshq_thread_state = REFRESHQ_THREAD_NEED_CREATE;
- refreshq_thread_state = REFRESHQ_THREAD_RUNNING;
- (void) zthread_create(NULL, 0, nfsauth_refresh_thread,
- NULL, 0, minclsyspri);
+ ng->nfs_auth = nag;
}
-/*
- * Finalization routine for nfsauth. It is important to call this routine
- * before destroying the exported_lock.
- */
void
-nfsauth_fini(void)
+nfsauth_zone_shutdown(nfs_globals_t *ng)
{
refreshq_exi_node_t *ren;
+ nfsauth_globals_t *nag = ng->nfs_auth;
- /*
- * Prevent the nfsauth_refresh_thread from getting new
- * work.
- */
- mutex_enter(&refreshq_lock);
- if (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
- refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
- cv_broadcast(&refreshq_cv);
+ /* Prevent the nfsauth_refresh_thread from getting new work */
+ mutex_enter(&nag->refreshq_lock);
+ if (nag->refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
+ nag->refreshq_thread_state = REFRESHQ_THREAD_FINI_REQ;
+ cv_broadcast(&nag->refreshq_cv);
- /*
- * Also, wait for nfsauth_refresh_thread() to exit.
- */
- while (refreshq_thread_state != REFRESHQ_THREAD_HALTED) {
- cv_wait(&refreshq_cv, &refreshq_lock);
- }
+ /* Wait for nfsauth_refresh_thread() to exit */
+ while (nag->refreshq_thread_state != REFRESHQ_THREAD_HALTED)
+ cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
}
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
/*
* Walk the exi_list and in turn, walk the auth_lists and free all
* lists. In addition, free INVALID auth_cache entries.
*/
- while ((ren = list_remove_head(&refreshq_queue))) {
+ while ((ren = list_remove_head(&nag->refreshq_queue))) {
refreshq_auth_node_t *ran;
while ((ran = list_remove_head(&ren->ren_authlist)) != NULL) {
@@ -234,24 +263,30 @@ nfsauth_fini(void)
if (p->auth_state == NFS_AUTH_INVALID)
nfsauth_free_node(p);
strfree(ran->ran_netid);
- kmem_free(ran, sizeof (refreshq_auth_node_t));
+ kmem_free(ran, sizeof (*ran));
}
list_destroy(&ren->ren_authlist);
exi_rele(ren->ren_exi);
- kmem_free(ren, sizeof (refreshq_exi_node_t));
+ kmem_free(ren, sizeof (*ren));
}
- list_destroy(&refreshq_queue);
-
- cv_destroy(&refreshq_cv);
- mutex_destroy(&refreshq_lock);
-
- mutex_destroy(&mountd_lock);
+}
- /*
- * Deallocate nfsauth cache handle
- */
- kmem_cache_destroy(exi_cache_handle);
+void
+nfsauth_zone_fini(nfs_globals_t *ng)
+{
+ nfsauth_globals_t *nag = ng->nfs_auth;
+
+ ng->nfs_auth = NULL;
+
+ list_destroy(&nag->refreshq_queue);
+ cv_destroy(&nag->refreshq_cv);
+ mutex_destroy(&nag->refreshq_lock);
+ mutex_destroy(&nag->mountd_lock);
+ /* Extra cleanup. */
+ if (nag->mountd_dh != NULL)
+ door_ki_rele(nag->mountd_dh);
+ kmem_free(nag, sizeof (*nag));
}
/*
@@ -343,9 +378,10 @@ sys_log(const char *msg)
* Callup to the mountd to get access information in the kernel.
*/
static bool_t
-nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
- struct netbuf *addr, int *access, cred_t *clnt_cred, uid_t *srv_uid,
- gid_t *srv_gid, uint_t *srv_gids_cnt, gid_t **srv_gids)
+nfsauth_retrieve(nfsauth_globals_t *nag, struct exportinfo *exi,
+ char *req_netid, int flavor, struct netbuf *addr, int *access,
+ cred_t *clnt_cred, uid_t *srv_uid, gid_t *srv_gid, uint_t *srv_gids_cnt,
+ gid_t **srv_gids)
{
varg_t varg = {0};
nfsauth_res_t res = {0};
@@ -418,11 +454,11 @@ nfsauth_retrieve(struct exportinfo *exi, char *req_netid, int flavor,
da.rsize = 1;
retry:
- mutex_enter(&mountd_lock);
- dh = mountd_dh;
+ mutex_enter(&nag->mountd_lock);
+ dh = nag->mountd_dh;
if (dh != NULL)
door_ki_hold(dh);
- mutex_exit(&mountd_lock);
+ mutex_exit(&nag->mountd_lock);
if (dh == NULL) {
/*
@@ -492,12 +528,12 @@ retry:
* chance to restart mountd(1m)
* and establish a new door handle.
*/
- mutex_enter(&mountd_lock);
- if (dh == mountd_dh) {
- door_ki_rele(mountd_dh);
- mountd_dh = NULL;
+ mutex_enter(&nag->mountd_lock);
+ if (dh == nag->mountd_dh) {
+ door_ki_rele(nag->mountd_dh);
+ nag->mountd_dh = NULL;
}
- mutex_exit(&mountd_lock);
+ mutex_exit(&nag->mountd_lock);
delay(hz);
goto retry;
}
@@ -594,7 +630,7 @@ fail:
}
static void
-nfsauth_refresh_thread(void)
+nfsauth_refresh_thread(nfsauth_globals_t *nag)
{
refreshq_exi_node_t *ren;
refreshq_auth_node_t *ran;
@@ -606,25 +642,25 @@ nfsauth_refresh_thread(void)
callb_cpr_t cprinfo;
- CALLB_CPR_INIT(&cprinfo, &refreshq_lock, callb_generic_cpr,
+ CALLB_CPR_INIT(&cprinfo, &nag->refreshq_lock, callb_generic_cpr,
"nfsauth_refresh");
for (;;) {
- mutex_enter(&refreshq_lock);
- if (refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
+ mutex_enter(&nag->refreshq_lock);
+ if (nag->refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
/* Keep the hold on the lock! */
break;
}
- ren = list_remove_head(&refreshq_queue);
+ ren = list_remove_head(&nag->refreshq_queue);
if (ren == NULL) {
CALLB_CPR_SAFE_BEGIN(&cprinfo);
- cv_wait(&refreshq_cv, &refreshq_lock);
- CALLB_CPR_SAFE_END(&cprinfo, &refreshq_lock);
- mutex_exit(&refreshq_lock);
+ cv_wait(&nag->refreshq_cv, &nag->refreshq_lock);
+ CALLB_CPR_SAFE_END(&cprinfo, &nag->refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
continue;
}
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
exi = ren->ren_exi;
ASSERT(exi != NULL);
@@ -671,7 +707,8 @@ nfsauth_refresh_thread(void)
* shutdown.
*/
if (p->auth_state == NFS_AUTH_INVALID ||
- refreshq_thread_state != REFRESHQ_THREAD_RUNNING) {
+ nag->refreshq_thread_state !=
+ REFRESHQ_THREAD_RUNNING) {
mutex_exit(&p->auth_lock);
if (p->auth_state == NFS_AUTH_INVALID)
@@ -706,7 +743,7 @@ nfsauth_refresh_thread(void)
* of the request which triggered the
* refresh attempt.
*/
- retrieval = nfsauth_retrieve(exi, netid,
+ retrieval = nfsauth_retrieve(nag, exi, netid,
p->auth_flavor, &p->auth_clnt->authc_addr, &access,
p->auth_clnt_cred, &uid, &gid, &ngids, &gids);
@@ -753,9 +790,10 @@ nfsauth_refresh_thread(void)
kmem_free(ren, sizeof (refreshq_exi_node_t));
}
- refreshq_thread_state = REFRESHQ_THREAD_HALTED;
- cv_broadcast(&refreshq_cv);
+ nag->refreshq_thread_state = REFRESHQ_THREAD_HALTED;
+ cv_broadcast(&nag->refreshq_cv);
CALLB_CPR_EXIT(&cprinfo);
+ DTRACE_PROBE(nfsauth__nfsauth__refresh__thread__exit);
zthread_exit();
}
@@ -827,6 +865,7 @@ static int
nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
cred_t *cr, uid_t *uid, gid_t *gid, uint_t *ngids, gid_t **gids)
{
+ nfsauth_globals_t *nag;
struct netbuf *taddrmask;
struct netbuf addr; /* temporary copy of client's address */
const struct netbuf *claddr;
@@ -846,6 +885,9 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
ASSERT(cr != NULL);
+ ASSERT3P(curzone->zone_id, ==, exi->exi_zoneid);
+ nag = nfsauth_get_zg();
+
/*
* Now check whether this client already
* has an entry for this flavor in the cache
@@ -858,8 +900,12 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
claddr = svc_getrpccaller(req->rq_xprt);
addr = *claddr;
- addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
- bcopy(claddr->buf, addr.buf, claddr->len);
+ if (claddr->len != 0) {
+ addr.buf = kmem_alloc(addr.maxlen, KM_SLEEP);
+ bcopy(claddr->buf, addr.buf, claddr->len);
+ } else {
+ addr.buf = NULL;
+ }
SVC_GETADDRMASK(req->rq_xprt, SVC_TATTR_ADDRMASK, (void **)&taddrmask);
ASSERT(taddrmask != NULL);
@@ -1005,8 +1051,9 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
atomic_inc_uint(&nfsauth_cache_miss);
- res = nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor,
- &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids);
+ res = nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt),
+ flavor, &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids,
+ &tmpgids);
p->auth_access = access;
p->auth_time = p->auth_freshness = gethrestime_sec();
@@ -1091,21 +1138,33 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
ran->ran_auth = p;
ran->ran_netid = strdup(svc_getnetid(req->rq_xprt));
- mutex_enter(&refreshq_lock);
+ mutex_enter(&nag->refreshq_lock);
+
+ if (nag->refreshq_thread_state ==
+ REFRESHQ_THREAD_NEED_CREATE) {
+ /* Launch nfsauth refresh thread */
+ nag->refreshq_thread_state =
+ REFRESHQ_THREAD_RUNNING;
+ (void) zthread_create(NULL, 0,
+ nfsauth_refresh_thread, nag, 0,
+ minclsyspri);
+ }
+
/*
- * We should not add a work queue
- * item if the thread is not
- * accepting them.
+ * We should not add a work queue item if the thread
+ * is not accepting them.
*/
- if (refreshq_thread_state == REFRESHQ_THREAD_RUNNING) {
+ if (nag->refreshq_thread_state ==
+ REFRESHQ_THREAD_RUNNING) {
refreshq_exi_node_t *ren;
/*
* Is there an existing exi_list?
*/
- for (ren = list_head(&refreshq_queue);
+ for (ren = list_head(&nag->refreshq_queue);
ren != NULL;
- ren = list_next(&refreshq_queue, ren)) {
+ ren = list_next(&nag->refreshq_queue,
+ ren)) {
if (ren->ren_exi == exi) {
list_insert_tail(
&ren->ren_authlist, ran);
@@ -1128,16 +1187,17 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
list_insert_tail(&ren->ren_authlist,
ran);
- list_insert_tail(&refreshq_queue, ren);
+ list_insert_tail(&nag->refreshq_queue,
+ ren);
}
- cv_broadcast(&refreshq_cv);
+ cv_broadcast(&nag->refreshq_cv);
} else {
strfree(ran->ran_netid);
kmem_free(ran, sizeof (refreshq_auth_node_t));
}
- mutex_exit(&refreshq_lock);
+ mutex_exit(&nag->refreshq_lock);
} else {
mutex_exit(&p->auth_lock);
}
@@ -1163,8 +1223,8 @@ retrieve:
atomic_inc_uint(&nfsauth_cache_miss);
- if (nfsauth_retrieve(exi, svc_getnetid(req->rq_xprt), flavor, &addr,
- &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
+ if (nfsauth_retrieve(nag, exi, svc_getnetid(req->rq_xprt), flavor,
+ &addr, &access, cr, &tmpuid, &tmpgid, &tmpngids, &tmpgids)) {
if (uid != NULL)
*uid = tmpuid;
if (gid != NULL)
@@ -1411,32 +1471,55 @@ nfsauth_cache_free(struct exportinfo *exi)
}
/*
- * Called by the kernel memory allocator when
- * memory is low. Free unused cache entries.
- * If that's not enough, the VM system will
- * call again for some more.
+ * Called by the kernel memory allocator when memory is low.
+ * Free unused cache entries. If that's not enough, the VM system
+ * will call again for some more.
+ *
+ * This needs to operate on all zones, so we take a reader lock
+ * on the list of zones and walk the list. This is OK here
+ * becuase exi_cache_trim doesn't block or cause new objects
+ * to be allocated (basically just frees lots of stuff).
+ * Use care if nfssrv_globals_rwl is taken as reader in any
+ * other cases because it will block nfs_server_zone_init
+ * and nfs_server_zone_fini, which enter as writer.
*/
/*ARGSUSED*/
void
exi_cache_reclaim(void *cdrarg)
{
+ nfs_globals_t *ng;
+
+ rw_enter(&nfssrv_globals_rwl, RW_READER);
+
+ ng = list_head(&nfssrv_globals_list);
+ while (ng != NULL) {
+ exi_cache_reclaim_zone(ng);
+ ng = list_next(&nfssrv_globals_list, ng);
+ }
+
+ rw_exit(&nfssrv_globals_rwl);
+}
+
+static void
+exi_cache_reclaim_zone(nfs_globals_t *ng)
+{
int i;
struct exportinfo *exi;
+ nfs_export_t *ne = ng->nfs_export;
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
for (i = 0; i < EXPTABLESIZE; i++) {
- for (exi = exptable[i]; exi; exi = exi->fid_hash.next) {
+ for (exi = ne->exptable[i]; exi; exi = exi->fid_hash.next)
exi_cache_trim(exi);
- }
}
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
atomic_inc_uint(&nfsauth_cache_reclaim);
}
-void
+static void
exi_cache_trim(struct exportinfo *exi)
{
struct auth_cache_clnt *c;
diff --git a/usr/src/uts/common/fs/nfs/nfs_client.c b/usr/src/uts/common/fs/nfs/nfs_client.c
index b034aa4a77..c88cff3739 100644
--- a/usr/src/uts/common/fs/nfs/nfs_client.c
+++ b/usr/src/uts/common/fs/nfs/nfs_client.c
@@ -18,13 +18,20 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
+ */
+
+/*
+ * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -60,6 +67,7 @@
#include <nfs/nfs.h>
#include <nfs/nfs_clnt.h>
+#include <nfs/nfs_cmd.h>
#include <nfs/rnode.h>
#include <nfs/nfs_acl.h>
@@ -2796,7 +2804,7 @@ nfs_mi_zonelist_remove(mntinfo_t *mi)
* NFS Client initialization routine. This routine should only be called
* once. It performs the following tasks:
* - Initalize all global locks
- * - Call sub-initialization routines (localize access to variables)
+ * - Call sub-initialization routines (localize access to variables)
*/
int
nfs_clntinit(void)
@@ -2827,6 +2835,8 @@ nfs_clntinit(void)
nfs4_clnt_init();
+ nfscmd_init();
+
#ifdef DEBUG
nfs_clntup = B_TRUE;
#endif
@@ -2846,6 +2856,7 @@ nfs_clntfini(void)
nfs_subrfini();
nfs_vfsfini();
nfs4_clnt_fini();
+ nfscmd_fini();
}
/*
@@ -3346,7 +3357,7 @@ nfs_free_delmapcall(nfs_delmapcall_t *delmap_call)
* Returns:
* 0 if the caller wasn't found
* 1 if the caller was found, removed and freed. *errp is set to what
- * the result of the delmap was.
+ * the result of the delmap was.
*/
int
nfs_find_and_delete_delmapcall(rnode_t *rp, int *errp)
diff --git a/usr/src/uts/common/fs/nfs/nfs_cmd.c b/usr/src/uts/common/fs/nfs/nfs_cmd.c
index 343bbd491a..40775bb231 100644
--- a/usr/src/uts/common/fs/nfs/nfs_cmd.c
+++ b/usr/src/uts/common/fs/nfs/nfs_cmd.c
@@ -18,11 +18,16 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/pathname.h>
@@ -45,32 +50,65 @@
#endif
#define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
-kmutex_t nfscmd_lock;
-door_handle_t nfscmd_dh;
+typedef struct nfscmd_globals {
+ kmutex_t nfscmd_lock;
+ door_handle_t nfscmd_dh;
+} nfscmd_globals_t;
+
+static zone_key_t nfscmd_zone_key;
static struct charset_cache *nfscmd_charmap(exportinfo_t *exi,
struct sockaddr *sp);
-
+static void *nfscmd_zone_init(zoneid_t);
+static void nfscmd_zone_fini(zoneid_t, void *);
void
nfscmd_args(uint_t did)
{
- mutex_enter(&nfscmd_lock);
- if (nfscmd_dh)
- door_ki_rele(nfscmd_dh);
- nfscmd_dh = door_ki_lookup(did);
- mutex_exit(&nfscmd_lock);
+ nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone);
+
+ mutex_enter(&ncg->nfscmd_lock);
+ if (ncg->nfscmd_dh != NULL)
+ door_ki_rele(ncg->nfscmd_dh);
+ ncg->nfscmd_dh = door_ki_lookup(did);
+ mutex_exit(&ncg->nfscmd_lock);
}
void
nfscmd_init(void)
{
- mutex_init(&nfscmd_lock, NULL, MUTEX_DEFAULT, NULL);
+ zone_key_create(&nfscmd_zone_key, nfscmd_zone_init,
+ NULL, nfscmd_zone_fini);
}
void
nfscmd_fini(void)
{
+ (void) zone_key_delete(nfscmd_zone_key);
+}
+
+/*ARGSUSED*/
+static void *
+nfscmd_zone_init(zoneid_t zoneid)
+{
+ nfscmd_globals_t *ncg;
+
+ ncg = kmem_zalloc(sizeof (*ncg), KM_SLEEP);
+ mutex_init(&ncg->nfscmd_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (ncg);
+}
+
+/*ARGSUSED*/
+static void
+nfscmd_zone_fini(zoneid_t zoneid, void *data)
+{
+ nfscmd_globals_t *ncg = data;
+
+ mutex_destroy(&ncg->nfscmd_lock);
+ if (ncg->nfscmd_dh)
+ door_ki_rele(ncg->nfscmd_dh);
+ kmem_free(ncg, sizeof (*ncg));
}
/*
@@ -88,13 +126,14 @@ nfscmd_send(nfscmd_arg_t *arg, nfscmd_res_t *res)
door_info_t di;
int ntries = 0;
int last = 0;
+ nfscmd_globals_t *ncg = zone_getspecific(nfscmd_zone_key, curzone);
retry:
- mutex_enter(&nfscmd_lock);
- dh = nfscmd_dh;
+ mutex_enter(&ncg->nfscmd_lock);
+ dh = ncg->nfscmd_dh;
if (dh != NULL)
door_ki_hold(dh);
- mutex_exit(&nfscmd_lock);
+ mutex_exit(&ncg->nfscmd_lock);
if (dh == NULL) {
/*
@@ -141,10 +180,10 @@ retry:
* chance to restart mountd(1m)
* and establish a new door handle.
*/
- mutex_enter(&nfscmd_lock);
- if (dh == nfscmd_dh)
- nfscmd_dh = NULL;
- mutex_exit(&nfscmd_lock);
+ mutex_enter(&ncg->nfscmd_lock);
+ if (dh == ncg->nfscmd_dh)
+ ncg->nfscmd_dh = NULL;
+ mutex_exit(&ncg->nfscmd_lock);
door_ki_rele(dh);
delay(hz);
goto retry;
diff --git a/usr/src/uts/common/fs/nfs/nfs_export.c b/usr/src/uts/common/fs/nfs/nfs_export.c
index 200ef6668d..080dfe1adf 100644
--- a/usr/src/uts/common/fs/nfs/nfs_export.c
+++ b/usr/src/uts/common/fs/nfs/nfs_export.c
@@ -20,15 +20,17 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T.
+ * Copyright 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T.
* All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
#include <sys/types.h>
#include <sys/param.h>
@@ -65,14 +67,25 @@
#include <nfs/nfs_log.h>
#include <nfs/lm.h>
#include <sys/sunddi.h>
-#include <sys/pkp_hash.h>
-
-treenode_t *ns_root;
-struct exportinfo *exptable_path_hash[PKP_HASH_SIZE];
-struct exportinfo *exptable[EXPTABLESIZE];
+/*
+ * exi_id support
+ *
+ * exi_id_next The next exi_id available.
+ * exi_id_overflow The exi_id_next already overflowed, so we should
+ * thoroughly check for duplicates.
+ * exi_id_tree AVL tree indexed by exi_id.
+ * nfs_exi_id_lock Lock to protect the export ID list
+ *
+ * All exi_id_next, exi_id_overflow, and exi_id_tree are protected by
+ * nfs_exi_id_lock.
+ */
+static int exi_id_next;
+static bool_t exi_id_overflow;
+avl_tree_t exi_id_tree;
+kmutex_t nfs_exi_id_lock;
-static int unexport(exportinfo_t *);
+static int unexport(nfs_export_t *, exportinfo_t *);
static void exportfree(exportinfo_t *);
static int loadindex(exportdata_t *);
@@ -80,31 +93,18 @@ extern void nfsauth_cache_free(exportinfo_t *);
extern int sec_svc_loadrootnames(int, int, caddr_t **, model_t);
extern void sec_svc_freerootnames(int, int, caddr_t *);
-static int build_seclist_nodups(exportdata_t *, secinfo_t *, int);
-static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int);
-static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int);
-static void srv_secinfo_treeclimb(exportinfo_t *, secinfo_t *, int, bool_t);
+static int build_seclist_nodups(exportdata_t *, secinfo_t *, int);
+static void srv_secinfo_add(secinfo_t **, int *, secinfo_t *, int, int);
+static void srv_secinfo_remove(secinfo_t **, int *, secinfo_t *, int);
+static void srv_secinfo_treeclimb(nfs_export_t *, exportinfo_t *,
+ secinfo_t *, int, bool_t);
#ifdef VOLATILE_FH_TEST
static struct ex_vol_rename *find_volrnm_fh(exportinfo_t *, nfs_fh4 *);
static uint32_t find_volrnm_fh_id(exportinfo_t *, nfs_fh4 *);
-static void free_volrnm_list(exportinfo_t *);
+static void free_volrnm_list(exportinfo_t *);
#endif /* VOLATILE_FH_TEST */
-/*
- * exported_lock Read/Write lock that protects the exportinfo list.
- * This lock must be held when searching or modifiying
- * the exportinfo list.
- */
-krwlock_t exported_lock;
-
-/*
- * "public" and default (root) location for public filehandle
- */
-struct exportinfo *exi_public, *exi_root;
-
-fid_t exi_rootfid; /* for checking the default public file handle */
-
fhandle_t nullfh2; /* for comparing V2 filehandles */
/*
@@ -117,6 +117,15 @@ fhandle_t nullfh2; /* for comparing V2 filehandles */
#define exptablehash(fsid, fid) (nfs_fhhash((fsid), (fid)) & (EXPTABLESIZE - 1))
+extern nfs_export_t *
+nfs_get_export(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs_export_t *ne = ng->nfs_export;
+ ASSERT(ne != NULL);
+ return (ne);
+}
+
static uint8_t
xor_hash(uint8_t *data, int len)
{
@@ -693,7 +702,8 @@ vis2exi(treenode_t *tnode)
}
}
- ASSERT(exi_ret); /* Every visible should have its home exportinfo */
+ /* Every visible should have its home exportinfo */
+ ASSERT(exi_ret != NULL);
return (exi_ret);
}
@@ -702,14 +712,25 @@ vis2exi(treenode_t *tnode)
* Add or remove the newly exported or unexported security flavors of the
* given exportinfo from its ancestors upto the system root.
*/
-void
-srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt,
- bool_t isadd)
+static void
+srv_secinfo_treeclimb(nfs_export_t *ne, exportinfo_t *exip, secinfo_t *sec,
+ int seccnt, bool_t isadd)
{
- treenode_t *tnode = exip->exi_tree;
+ treenode_t *tnode;
- ASSERT(RW_WRITE_HELD(&exported_lock));
- ASSERT(tnode != NULL);
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
+
+ /*
+ * exi_tree can be null for the zone root
+ * which means we're already at the "top"
+ * and there's nothing more to "climb".
+ */
+ tnode = exip->exi_tree;
+ if (tnode == NULL) {
+ /* Should only happen for... */
+ ASSERT(exip == ne->exi_root);
+ return;
+ }
if (seccnt == 0)
return;
@@ -722,6 +743,7 @@ srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt,
* transferred from the PSEUDO export in exportfs()
*/
if (isadd && !(exip->exi_vp->v_flag & VROOT) &&
+ !VN_CMP(exip->exi_vp, EXI_TO_ZONEROOTVP(exip)) &&
tnode->tree_vis->vis_seccnt > 0) {
srv_secinfo_add(&exip->exi_export.ex_secinfo,
&exip->exi_export.ex_seccnt, tnode->tree_vis->vis_secinfo,
@@ -782,108 +804,302 @@ srv_secinfo_treeclimb(exportinfo_t *exip, secinfo_t *sec, int seccnt,
*(bucket) = (exi);
void
-export_link(exportinfo_t *exi)
+export_link(nfs_export_t *ne, exportinfo_t *exi)
{
exportinfo_t **bckt;
- bckt = &exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)];
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
+
+ bckt = &ne->exptable[exptablehash(&exi->exi_fsid, &exi->exi_fid)];
exp_hash_link(exi, fid_hash, bckt);
- bckt = &exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path,
+ bckt = &ne->exptable_path_hash[pkp_tab_hash(exi->exi_export.ex_path,
strlen(exi->exi_export.ex_path))];
exp_hash_link(exi, path_hash, bckt);
+ exi->exi_ne = ne;
}
/*
- * Initialization routine for export routines. Should only be called once.
+ * Helper functions for exi_id handling
*/
+static int
+exi_id_compar(const void *v1, const void *v2)
+{
+ const struct exportinfo *e1 = v1;
+ const struct exportinfo *e2 = v2;
+
+ if (e1->exi_id < e2->exi_id)
+ return (-1);
+ if (e1->exi_id > e2->exi_id)
+ return (1);
+
+ return (0);
+}
+
int
-nfs_exportinit(void)
+exi_id_get_next()
+{
+ struct exportinfo e;
+ int ret = exi_id_next;
+
+ ASSERT(MUTEX_HELD(&nfs_exi_id_lock));
+
+ do {
+ exi_id_next++;
+ if (exi_id_next == 0)
+ exi_id_overflow = TRUE;
+
+ if (!exi_id_overflow)
+ break;
+
+ if (exi_id_next == ret)
+ cmn_err(CE_PANIC, "exi_id exhausted");
+
+ e.exi_id = exi_id_next;
+ } while (avl_find(&exi_id_tree, &e, NULL) != NULL);
+
+ return (ret);
+}
+
+/*
+ * Get the root file handle for this zone.
+ * Called when nfs_svc() starts
+ */
+int
+nfs_export_get_rootfh(nfs_globals_t *g)
+{
+ nfs_export_t *ne = g->nfs_export;
+ int err;
+
+ ne->exi_rootfid.fid_len = MAXFIDSZ;
+ err = vop_fid_pseudo(ne->exi_root->exi_vp, &ne->exi_rootfid);
+ if (err != 0) {
+ ne->exi_rootfid.fid_len = 0;
+ return (err);
+ }
+
+ /* Setup the fhandle template exi_fh */
+ ne->exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid;
+ ne->exi_root->exi_fh.fh_xlen = ne->exi_rootfid.fid_len;
+ bcopy(ne->exi_rootfid.fid_data, ne->exi_root->exi_fh.fh_xdata,
+ ne->exi_rootfid.fid_len);
+ ne->exi_root->exi_fh.fh_len = sizeof (ne->exi_root->exi_fh.fh_data);
+
+ return (0);
+}
+
+void
+nfs_export_zone_init(nfs_globals_t *ng)
{
- int error;
int i;
+ nfs_export_t *ne;
+ zone_t *zone;
+
+ ne = kmem_zalloc(sizeof (*ne), KM_SLEEP);
- rw_init(&exported_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&ne->exported_lock, NULL, RW_DEFAULT, NULL);
+
+ ne->ne_globals = ng; /* "up" pointer */
/*
* Allocate the place holder for the public file handle, which
* is all zeroes. It is initially set to the root filesystem.
*/
- exi_root = kmem_zalloc(sizeof (*exi_root), KM_SLEEP);
- exi_public = exi_root;
+ ne->exi_root = kmem_zalloc(sizeof (*ne->exi_root), KM_SLEEP);
+ ne->exi_public = ne->exi_root;
- exi_root->exi_export.ex_flags = EX_PUBLIC;
- exi_root->exi_export.ex_pathlen = 1; /* length of "/" */
- exi_root->exi_export.ex_path =
- kmem_alloc(exi_root->exi_export.ex_pathlen + 1, KM_SLEEP);
- exi_root->exi_export.ex_path[0] = '/';
- exi_root->exi_export.ex_path[1] = '\0';
+ ne->exi_root->exi_export.ex_flags = EX_PUBLIC;
+ ne->exi_root->exi_export.ex_pathlen = 1; /* length of "/" */
+ ne->exi_root->exi_export.ex_path =
+ kmem_alloc(ne->exi_root->exi_export.ex_pathlen + 1, KM_SLEEP);
+ ne->exi_root->exi_export.ex_path[0] = '/';
+ ne->exi_root->exi_export.ex_path[1] = '\0';
- exi_root->exi_count = 1;
- mutex_init(&exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL);
+ ne->exi_root->exi_count = 1;
+ mutex_init(&ne->exi_root->exi_lock, NULL, MUTEX_DEFAULT, NULL);
- exi_root->exi_vp = rootdir;
- exi_rootfid.fid_len = MAXFIDSZ;
- error = vop_fid_pseudo(exi_root->exi_vp, &exi_rootfid);
- if (error) {
- mutex_destroy(&exi_root->exi_lock);
- kmem_free(exi_root, sizeof (*exi_root));
- return (error);
- }
+ /*
+ * Because we cannot:
+ * ASSERT(curzone->zone_id == ng->nfs_zoneid);
+ * We grab the zone pointer explicitly (like netstacks do) and
+ * set the rootvp here.
+ *
+ * Subsequent exportinfo_t's that get export_link()ed to "ne" also
+ * will backpoint to "ne" such that exi->exi_ne->exi_root->exi_vp
+ * will get the zone's rootvp for a given exportinfo_t.
+ */
+ zone = zone_find_by_id_nolock(ng->nfs_zoneid);
+ ne->exi_root->exi_vp = zone->zone_rootvp;
+ ne->exi_root->exi_zoneid = ng->nfs_zoneid;
/*
- * Initialize auth cache and auth cache lock
+ * Fill in ne->exi_rootfid later, in nfs_export_get_rootfid
+ * because we can't correctly return errors here.
*/
+
+ /* Initialize auth cache and auth cache lock */
for (i = 0; i < AUTH_TABLESIZE; i++) {
- exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t),
+ ne->exi_root->exi_cache[i] = kmem_alloc(sizeof (avl_tree_t),
KM_SLEEP);
- avl_create(exi_root->exi_cache[i], nfsauth_cache_clnt_compar,
- sizeof (struct auth_cache_clnt),
+ avl_create(ne->exi_root->exi_cache[i],
+ nfsauth_cache_clnt_compar, sizeof (struct auth_cache_clnt),
offsetof(struct auth_cache_clnt, authc_link));
}
- rw_init(&exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&ne->exi_root->exi_cache_lock, NULL, RW_DEFAULT, NULL);
- /* setup the fhandle template */
- exi_root->exi_fh.fh_fsid = rootdir->v_vfsp->vfs_fsid;
- exi_root->exi_fh.fh_xlen = exi_rootfid.fid_len;
- bcopy(exi_rootfid.fid_data, exi_root->exi_fh.fh_xdata,
- exi_rootfid.fid_len);
- exi_root->exi_fh.fh_len = sizeof (exi_root->exi_fh.fh_data);
+ /* setup exi_fh later, in nfs_export_get_rootfid */
- /*
- * Publish the exportinfo in the hash table
- */
- export_link(exi_root);
+ rw_enter(&ne->exported_lock, RW_WRITER);
- nfslog_init();
- ns_root = NULL;
+ /* Publish the exportinfo in the hash table */
+ export_link(ne, ne->exi_root);
- return (0);
+ /* Initialize exi_id and exi_kstats */
+ mutex_enter(&nfs_exi_id_lock);
+ ne->exi_root->exi_id = exi_id_get_next();
+ avl_add(&exi_id_tree, ne->exi_root);
+ mutex_exit(&nfs_exi_id_lock);
+
+ rw_exit(&ne->exported_lock);
+ ne->ns_root = NULL;
+
+ ng->nfs_export = ne;
}
/*
- * Finalization routine for export routines. Called to cleanup previously
- * initialization work when the NFS server module could not be loaded correctly.
+ * During zone shutdown, remove exports
*/
void
-nfs_exportfini(void)
+nfs_export_zone_shutdown(nfs_globals_t *ng)
+{
+ nfs_export_t *ne = ng->nfs_export;
+ struct exportinfo *exi, *nexi;
+ int i, errors;
+
+ rw_enter(&ne->exported_lock, RW_READER);
+
+ errors = 0;
+ for (i = 0; i < EXPTABLESIZE; i++) {
+
+ exi = ne->exptable[i];
+ if (exi != NULL)
+ exi_hold(exi);
+
+ while (exi != NULL) {
+
+ /*
+ * Get and hold next export before
+ * dropping the rwlock and unexport
+ */
+ nexi = exi->fid_hash.next;
+ if (nexi != NULL)
+ exi_hold(nexi);
+
+ rw_exit(&ne->exported_lock);
+
+ /*
+ * Skip ne->exi_root which gets special
+ * create/destroy handling.
+ */
+ if (exi != ne->exi_root &&
+ unexport(ne, exi) != 0)
+ errors++;
+ exi_rele(exi);
+
+ rw_enter(&ne->exported_lock, RW_READER);
+ exi = nexi;
+ }
+ }
+ if (errors > 0) {
+ cmn_err(CE_NOTE, "NFS: failed un-exports in zone %d",
+ (int)ng->nfs_zoneid);
+ }
+
+ rw_exit(&ne->exported_lock);
+}
+
+void
+nfs_export_zone_fini(nfs_globals_t *ng)
{
int i;
+ nfs_export_t *ne = ng->nfs_export;
+ struct exportinfo *exi;
+
+ ng->nfs_export = NULL;
+
+ rw_enter(&ne->exported_lock, RW_WRITER);
+
+ mutex_enter(&nfs_exi_id_lock);
+ avl_remove(&exi_id_tree, ne->exi_root);
+ mutex_exit(&nfs_exi_id_lock);
+
+ export_unlink(ne, ne->exi_root);
+
+ rw_exit(&ne->exported_lock);
+
+ /* Deallocate the place holder for the public file handle */
+ srv_secinfo_list_free(ne->exi_root->exi_export.ex_secinfo,
+ ne->exi_root->exi_export.ex_seccnt);
+ mutex_destroy(&ne->exi_root->exi_lock);
+
+ rw_destroy(&ne->exi_root->exi_cache_lock);
+ for (i = 0; i < AUTH_TABLESIZE; i++) {
+ avl_destroy(ne->exi_root->exi_cache[i]);
+ kmem_free(ne->exi_root->exi_cache[i], sizeof (avl_tree_t));
+ }
+
+ kmem_free(ne->exi_root->exi_export.ex_path,
+ ne->exi_root->exi_export.ex_pathlen + 1);
+ kmem_free(ne->exi_root, sizeof (*ne->exi_root));
/*
- * Deallocate the place holder for the public file handle.
+ * The shutdown hook should have left the exi_id_tree
+ * with nothing belonging to this zone.
*/
- srv_secinfo_list_free(exi_root->exi_export.ex_secinfo,
- exi_root->exi_export.ex_seccnt);
- mutex_destroy(&exi_root->exi_lock);
- rw_destroy(&exi_root->exi_cache_lock);
- for (i = 0; i < AUTH_TABLESIZE; i++) {
- avl_destroy(exi_root->exi_cache[i]);
- kmem_free(exi_root->exi_cache[i], sizeof (avl_tree_t));
+ mutex_enter(&nfs_exi_id_lock);
+ i = 0;
+ exi = avl_first(&exi_id_tree);
+ while (exi != NULL) {
+ if (exi->exi_zoneid == ng->nfs_zoneid)
+ i++;
+ exi = AVL_NEXT(&exi_id_tree, exi);
}
- kmem_free(exi_root, sizeof (*exi_root));
+ mutex_exit(&nfs_exi_id_lock);
+ if (i > 0) {
+ cmn_err(CE_NOTE,
+ "NFS: zone %d has %d export IDs left after shutdown",
+ (int)ng->nfs_zoneid, i);
+ }
+ rw_destroy(&ne->exported_lock);
+ kmem_free(ne, sizeof (*ne));
+}
+
+/*
+ * Initialization routine for export routines.
+ * Should only be called once.
+ */
+void
+nfs_exportinit(void)
+{
+ mutex_init(&nfs_exi_id_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* exi_id handling initialization */
+ exi_id_next = 0;
+ exi_id_overflow = FALSE;
+ avl_create(&exi_id_tree, exi_id_compar, sizeof (struct exportinfo),
+ offsetof(struct exportinfo, exi_id_link));
+
+ nfslog_init();
+}
- rw_destroy(&exported_lock);
+/*
+ * Finalization routine for export routines.
+ */
+void
+nfs_exportfini(void)
+{
+ avl_destroy(&exi_id_tree);
+ mutex_destroy(&nfs_exi_id_lock);
}
/*
@@ -922,6 +1138,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context,
int i, j;
rpc_gss_rawcred_t *raw_cred;
struct exportinfo *exi;
+ nfs_export_t *ne = nfs_get_export();
/*
* We don't deal with delegated credentials.
@@ -932,9 +1149,10 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context,
raw_cred = lock->raw_cred;
*cookie = NULL;
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
+
for (i = 0; i < EXPTABLESIZE; i++) {
- exi = exptable[i];
+ exi = ne->exptable[i];
while (exi) {
if (exi->exi_export.ex_seccnt > 0) {
struct secinfo *secp;
@@ -974,7 +1192,7 @@ rfs_gsscallback(struct svc_req *req, gss_cred_id_t deleg, void *gss_context,
}
}
done:
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
/*
* If no nfs pseudo number mapping can be found in the export
@@ -1041,6 +1259,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
int oldcnt;
int i;
struct pathname lookpn;
+ nfs_export_t *ne = nfs_get_export();
STRUCT_SET_HANDLE(uap, model, args);
@@ -1049,25 +1268,25 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
return (error);
/* Walk the export list looking for that pathname */
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
DTRACE_PROBE(nfss__i__exported_lock1_start);
- for (ex1 = exptable_path_hash[pkp_tab_hash(lookpn.pn_path,
+ for (ex1 = ne->exptable_path_hash[pkp_tab_hash(lookpn.pn_path,
strlen(lookpn.pn_path))]; ex1; ex1 = ex1->path_hash.next) {
- if (ex1 != exi_root && 0 ==
+ if (ex1 != ne->exi_root && 0 ==
strcmp(ex1->exi_export.ex_path, lookpn.pn_path)) {
exi_hold(ex1);
break;
}
}
DTRACE_PROBE(nfss__i__exported_lock1_stop);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
/* Is this an unshare? */
if (STRUCT_FGETP(uap, uex) == NULL) {
pn_free(&lookpn);
if (ex1 == NULL)
return (EINVAL);
- error = unexport(ex1);
+ error = unexport(ne, ex1);
exi_rele(ex1);
return (error);
}
@@ -1163,15 +1382,15 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
* Do not allow re-sharing a shared vnode under a different path
* PSEUDO export has ex_path fabricated, e.g. "/tmp (pseudo)", skip it.
*/
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
DTRACE_PROBE(nfss__i__exported_lock2_start);
- for (ex2 = exptable[exptablehash(&fsid, &fid)]; ex2;
+ for (ex2 = ne->exptable[exptablehash(&fsid, &fid)]; ex2;
ex2 = ex2->fid_hash.next) {
- if (ex2 != exi_root && !PSEUDO(ex2) &&
+ if (ex2 != ne->exi_root && !PSEUDO(ex2) &&
VN_CMP(ex2->exi_vp, vp) &&
strcmp(ex2->exi_export.ex_path, lookpn.pn_path) != 0) {
DTRACE_PROBE(nfss__i__exported_lock2_stop);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
VN_RELE(vp);
if (dvp != NULL)
VN_RELE(dvp);
@@ -1180,7 +1399,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
}
}
DTRACE_PROBE(nfss__i__exported_lock2_stop);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
pn_free(&lookpn);
exi = kmem_zalloc(sizeof (*exi), KM_SLEEP);
@@ -1188,6 +1407,8 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
exi->exi_fid = fid;
exi->exi_vp = vp;
exi->exi_count = 1;
+ exi->exi_zoneid = crgetzoneid(cr);
+ ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
exi->exi_volatile_dev = (vfssw[vp->v_vfsp->vfs_fstype].vsw_flag &
VSW_VOLATILEDEV) ? 1 : 0;
mutex_init(&exi->exi_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -1461,10 +1682,10 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
/*
* Insert the new entry at the front of the export list
*/
- rw_enter(&exported_lock, RW_WRITER);
+ rw_enter(&ne->exported_lock, RW_WRITER);
DTRACE_PROBE(nfss__i__exported_lock3_start);
- export_link(exi);
+ export_link(ne, exi);
/*
* Check the rest of the list for an old entry for the fs.
@@ -1472,8 +1693,11 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
* only reference and then free it.
*/
for (ex = exi->fid_hash.next; ex != NULL; ex = ex->fid_hash.next) {
- if (ex != exi_root && VN_CMP(ex->exi_vp, vp)) {
- export_unlink(ex);
+ if (ex != ne->exi_root && VN_CMP(ex->exi_vp, vp)) {
+ mutex_enter(&nfs_exi_id_lock);
+ avl_remove(&exi_id_tree, ex);
+ mutex_exit(&nfs_exi_id_lock);
+ export_unlink(ne, ex);
break;
}
}
@@ -1482,8 +1706,8 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
* If the public filehandle is pointing at the
* old entry, then point it back at the root.
*/
- if (ex != NULL && ex == exi_public)
- exi_public = exi_root;
+ if (ex != NULL && ex == ne->exi_public)
+ ne->exi_public = ne->exi_root;
/*
* If the public flag is on, make the global exi_public
@@ -1491,7 +1715,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
* we can distinguish it from the place holder export.
*/
if (kex->ex_flags & EX_PUBLIC) {
- exi_public = exi;
+ ne->exi_public = exi;
kex->ex_flags &= ~EX_PUBLIC;
}
@@ -1523,7 +1747,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
exi->exi_tree->tree_exi = exi;
/* Update the change timestamp */
- tree_update_change(exi->exi_tree, NULL);
+ tree_update_change(ne, exi->exi_tree, NULL);
}
/*
@@ -1533,7 +1757,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
*/
newcnt = build_seclist_nodups(&exi->exi_export, newsec, FALSE);
- srv_secinfo_treeclimb(exi, newsec, newcnt, TRUE);
+ srv_secinfo_treeclimb(ne, exi, newsec, newcnt, TRUE);
/*
* If re-sharing an old export entry, update the secinfo data
@@ -1558,7 +1782,7 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
* Remove old flavor refs last.
*/
srv_secinfo_exp2exp(&exi->exi_export, oldsec, oldcnt);
- srv_secinfo_treeclimb(ex, oldsec, oldcnt, FALSE);
+ srv_secinfo_treeclimb(ne, ex, oldsec, oldcnt, FALSE);
}
}
@@ -1571,10 +1795,24 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
ex->exi_visible = NULL;
}
+ /*
+ * Initialize exi_id and exi_kstats
+ */
+ if (ex != NULL) {
+ exi->exi_id = ex->exi_id;
+ } else {
+ mutex_enter(&nfs_exi_id_lock);
+ exi->exi_id = exi_id_get_next();
+ mutex_exit(&nfs_exi_id_lock);
+ }
+ mutex_enter(&nfs_exi_id_lock);
+ avl_add(&exi_id_tree, exi);
+ mutex_exit(&nfs_exi_id_lock);
+
DTRACE_PROBE(nfss__i__exported_lock3_stop);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
- if (exi_public == exi || kex->ex_flags & EX_LOG) {
+ if (ne->exi_public == exi || kex->ex_flags & EX_LOG) {
/*
* Log share operation to this buffer only.
*/
@@ -1588,9 +1826,9 @@ exportfs(struct exportfs_args *args, model_t model, cred_t *cr)
out7:
/* Unlink the new export in exptable. */
- export_unlink(exi);
+ export_unlink(ne, exi);
DTRACE_PROBE(nfss__i__exported_lock3_stop);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
out6:
if (kex->ex_flags & EX_INDEX)
kmem_free(kex->ex_index, strlen(kex->ex_index) + 1);
@@ -1634,40 +1872,44 @@ out1:
* Remove the exportinfo from the export list
*/
void
-export_unlink(struct exportinfo *exi)
+export_unlink(nfs_export_t *ne, struct exportinfo *exi)
{
- ASSERT(RW_WRITE_HELD(&exported_lock));
+ ASSERT(RW_WRITE_HELD(&ne->exported_lock));
exp_hash_unlink(exi, fid_hash);
exp_hash_unlink(exi, path_hash);
+ ASSERT3P(exi->exi_ne, ==, ne);
+ exi->exi_ne = NULL;
}
/*
* Unexport an exported filesystem
*/
static int
-unexport(struct exportinfo *exi)
+unexport(nfs_export_t *ne, struct exportinfo *exi)
{
struct secinfo cursec[MAX_FLAVORS];
int curcnt;
- rw_enter(&exported_lock, RW_WRITER);
+ rw_enter(&ne->exported_lock, RW_WRITER);
/* Check if exi is still linked in the export table */
if (!EXP_LINKED(exi) || PSEUDO(exi)) {
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
return (EINVAL);
}
- export_unlink(exi);
+ mutex_enter(&nfs_exi_id_lock);
+ avl_remove(&exi_id_tree, exi);
+ mutex_exit(&nfs_exi_id_lock);
+ export_unlink(ne, exi);
/*
* Remove security flavors before treeclimb_unexport() is called
* because srv_secinfo_treeclimb needs the namespace tree
*/
curcnt = build_seclist_nodups(&exi->exi_export, cursec, TRUE);
-
- srv_secinfo_treeclimb(exi, cursec, curcnt, FALSE);
+ srv_secinfo_treeclimb(ne, exi, cursec, curcnt, FALSE);
/*
* If there's a visible list, then need to leave
@@ -1677,7 +1919,7 @@ unexport(struct exportinfo *exi)
if (exi->exi_visible != NULL) {
struct exportinfo *newexi;
- newexi = pseudo_exportfs(exi->exi_vp, &exi->exi_fid,
+ newexi = pseudo_exportfs(ne, exi->exi_vp, &exi->exi_fid,
exi->exi_visible, &exi->exi_export);
exi->exi_visible = NULL;
@@ -1686,12 +1928,12 @@ unexport(struct exportinfo *exi)
newexi->exi_tree->tree_exi = newexi;
/* Update the change timestamp */
- tree_update_change(exi->exi_tree, NULL);
+ tree_update_change(ne, exi->exi_tree, NULL);
} else {
- treeclimb_unexport(exi);
+ treeclimb_unexport(ne, exi);
}
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
/*
* Need to call into the NFSv4 server and release all data
@@ -1699,7 +1941,7 @@ unexport(struct exportinfo *exi)
* the v4 server may be holding file locks or vnodes under
* this export.
*/
- rfs4_clean_state_exi(exi);
+ rfs4_clean_state_exi(ne, exi);
/*
* Notify the lock manager that the filesystem is being
@@ -1711,15 +1953,19 @@ unexport(struct exportinfo *exi)
* If this was a public export, restore
* the public filehandle to the root.
*/
- if (exi == exi_public) {
- exi_public = exi_root;
- nfslog_share_record(exi_public, CRED());
+ /*
+ * XXX KEBE ASKS --> Should CRED() instead be
+ * exi->exi_zone->zone_kcred?
+ */
+ if (exi == ne->exi_public) {
+ ne->exi_public = ne->exi_root;
+
+ nfslog_share_record(ne->exi_public, CRED());
}
- if (exi->exi_export.ex_flags & EX_LOG) {
+ if (exi->exi_export.ex_flags & EX_LOG)
nfslog_unshare_record(exi, CRED());
- }
exi_rele(exi);
return (0);
@@ -1946,7 +2192,8 @@ nfs_vptoexi(vnode_t *dvp, vnode_t *vp, cred_t *cr, int *walk,
* If we're at the root of this filesystem, then
* it's time to stop (with failure).
*/
- if (vp->v_flag & VROOT) {
+ ASSERT3P(vp->v_vfsp->vfs_zone, ==, curzone);
+ if ((vp->v_flag & VROOT) || VN_IS_CURZONEROOT(vp)) {
error = EINVAL;
break;
}
@@ -2446,9 +2693,10 @@ struct exportinfo *
checkexport(fsid_t *fsid, fid_t *fid)
{
struct exportinfo *exi;
+ nfs_export_t *ne = nfs_get_export();
- rw_enter(&exported_lock, RW_READER);
- for (exi = exptable[exptablehash(fsid, fid)];
+ rw_enter(&ne->exported_lock, RW_READER);
+ for (exi = ne->exptable[exptablehash(fsid, fid)];
exi != NULL;
exi = exi->fid_hash.next) {
if (exportmatch(exi, fsid, fid)) {
@@ -2459,15 +2707,15 @@ checkexport(fsid_t *fsid, fid_t *fid)
* handle.
*/
if (exi->exi_export.ex_flags & EX_PUBLIC) {
- exi = exi_public;
+ exi = ne->exi_public;
}
exi_hold(exi);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
return (exi);
}
}
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
return (NULL);
}
@@ -2483,10 +2731,11 @@ struct exportinfo *
checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp)
{
struct exportinfo *exi;
+ nfs_export_t *ne = nfs_get_export();
- ASSERT(RW_LOCK_HELD(&exported_lock));
+ ASSERT(RW_LOCK_HELD(&ne->exported_lock));
- for (exi = exptable[exptablehash(fsid, fid)];
+ for (exi = ne->exptable[exptablehash(fsid, fid)];
exi != NULL;
exi = exi->fid_hash.next) {
if (exportmatch(exi, fsid, fid)) {
@@ -2497,7 +2746,7 @@ checkexport4(fsid_t *fsid, fid_t *fid, vnode_t *vp)
* handle.
*/
if (exi->exi_export.ex_flags & EX_PUBLIC) {
- exi = exi_public;
+ exi = ne->exi_public;
}
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs_log.c b/usr/src/uts/common/fs/nfs/nfs_log.c
index 7cf0fe24e9..a314f4319a 100644
--- a/usr/src/uts/common/fs/nfs/nfs_log.c
+++ b/usr/src/uts/common/fs/nfs/nfs_log.c
@@ -18,10 +18,15 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/cred.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
@@ -43,8 +48,6 @@
#define NUM_RECORDS_TO_WRITE 256
#define NUM_BYTES_TO_WRITE 65536
-extern krwlock_t exported_lock;
-
static int nfslog_num_records_to_write = NUM_RECORDS_TO_WRITE;
static int nfslog_num_bytes_to_write = NUM_BYTES_TO_WRITE;
@@ -595,11 +598,8 @@ log_file_rele(struct log_file *lfp)
*/
/* ARGSUSED */
void *
-nfslog_record_alloc(
- struct exportinfo *exi,
- int alloc_indx,
- void **cookie,
- int flags)
+nfslog_record_alloc(struct exportinfo *exi, int alloc_indx, void **cookie,
+ int flags)
{
struct lr_alloc *lrp;
@@ -652,7 +652,7 @@ nfslog_record_alloc(
*/
void
nfslog_record_put(void *cookie, size_t size, bool_t sync,
- unsigned int which_buffers)
+ unsigned int which_buffers)
{
struct lr_alloc *lrp = (struct lr_alloc *)cookie;
struct log_buffer *lbp = lrp->lb;
@@ -768,8 +768,8 @@ nfslog_records_flush_to_disk_nolock(struct log_buffer *lbp)
* them to the end of the log file.
*/
static int
-nfslog_write_logrecords(struct log_file *lfp,
- struct lr_alloc *lrp_writers, int num_recs)
+nfslog_write_logrecords(struct log_file *lfp, struct lr_alloc *lrp_writers,
+ int num_recs)
{
struct uio uio;
struct iovec *iovp;
@@ -1161,8 +1161,8 @@ nfsl_flush(struct nfsl_flush_args *args, model_t model)
/*
* Do the work asynchronously
*/
- (void) thread_create(NULL, 0, nfslog_do_flush,
- tparams, 0, &p0, TS_RUN, minclsyspri);
+ (void) zthread_create(NULL, 0, nfslog_do_flush,
+ tparams, 0, minclsyspri);
}
return (error);
@@ -1249,8 +1249,7 @@ out:
*/
kmem_free(args->buff, args->buff_len);
kmem_free(tparams, sizeof (*tparams));
- thread_exit();
- /* NOTREACHED */
+ zthread_exit();
}
tparams->tp_error = error;
@@ -1529,6 +1528,7 @@ static int nfslog_dispatch_table_arglen = sizeof (nfslog_dispatch_table) /
*/
struct exportinfo *
nfslog_get_exi(
+ nfs_export_t *ne,
struct exportinfo *exi,
struct svc_req *req,
caddr_t res,
@@ -1560,7 +1560,7 @@ nfslog_get_exi(
return (exi);
}
- if (exi != exi_public)
+ if (exi != ne->exi_public)
return (NULL);
/*
@@ -1625,8 +1625,8 @@ static long long rfslog_records_ignored = 0;
*/
void
nfslog_write_record(struct exportinfo *exi, struct svc_req *req,
- caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb,
- unsigned int record_id, unsigned int which_buffers)
+ caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb,
+ unsigned int record_id, unsigned int which_buffers)
{
struct nfslog_prog_disp *progtable; /* prog struct */
struct nfslog_vers_disp *verstable; /* version struct */
@@ -1764,17 +1764,17 @@ nfslog_write_record(struct exportinfo *exi, struct svc_req *req,
static char *
get_publicfh_path(int *alloc_length)
{
- extern struct exportinfo *exi_public;
char *pubpath;
+ nfs_export_t *ne = nfs_get_export();
- rw_enter(&exported_lock, RW_READER);
+ rw_enter(&ne->exported_lock, RW_READER);
- *alloc_length = exi_public->exi_export.ex_pathlen + 1;
+ *alloc_length = ne->exi_public->exi_export.ex_pathlen + 1;
pubpath = kmem_alloc(*alloc_length, KM_SLEEP);
- (void) strcpy(pubpath, exi_public->exi_export.ex_path);
+ (void) strcpy(pubpath, ne->exi_public->exi_export.ex_path);
- rw_exit(&exported_lock);
+ rw_exit(&ne->exported_lock);
return (pubpath);
}
@@ -1870,11 +1870,8 @@ nfslog_unshare_record(struct exportinfo *exi, cred_t *cr)
void
-nfslog_getfh(struct exportinfo *exi,
- fhandle *fh,
- char *fname,
- enum uio_seg seg,
- cred_t *cr)
+nfslog_getfh(struct exportinfo *exi, fhandle *fh, char *fname, enum uio_seg seg,
+ cred_t *cr)
{
struct svc_req req;
int res = 0;
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index c6ae29d220..5b7658d048 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -22,8 +22,8 @@
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017 Joyent Inc
+ * Copyright 2019 Nexenta by DDN, Inc.
*/
/*
@@ -83,7 +83,6 @@
#include <nfs/nfs_clnt.h>
#include <nfs/nfs_acl.h>
#include <nfs/nfs_log.h>
-#include <nfs/nfs_cmd.h>
#include <nfs/lm.h>
#include <nfs/nfs_dispatch.h>
#include <nfs/nfs4_drc.h>
@@ -109,6 +108,10 @@ static struct modlinkage modlinkage = {
MODREV_1, (void *)&modlmisc, NULL
};
+zone_key_t nfssrv_zone_key;
+list_t nfssrv_globals_list;
+krwlock_t nfssrv_globals_rwl;
+
kmem_cache_t *nfs_xuio_cache;
int nfs_loaned_buffers = 0;
@@ -117,10 +120,7 @@ _init(void)
{
int status;
- if ((status = nfs_srvinit()) != 0) {
- cmn_err(CE_WARN, "_init: nfs_srvinit failed");
- return (status);
- }
+ nfs_srvinit();
status = mod_install((struct modlinkage *)&modlinkage);
if (status != 0) {
@@ -177,27 +177,26 @@ _info(struct modinfo *modinfop)
* supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
* public (i.e., not the placeholder).
*/
-#define PUBLICFH_CHECK(disp, exi, fsid, xfid) \
+#define PUBLICFH_CHECK(ne, disp, exi, fsid, xfid) \
((disp->dis_flags & RPC_PUBLICFH_OK) && \
((exi->exi_export.ex_flags & EX_PUBLIC) || \
- (exi == exi_public && exportmatch(exi_root, \
+ (exi == ne->exi_public && exportmatch(ne->exi_root, \
fsid, xfid))))
static void nfs_srv_shutdown_all(int);
-static void rfs4_server_start(int);
+static void rfs4_server_start(nfs_globals_t *, int);
static void nullfree(void);
static void rfs_dispatch(struct svc_req *, SVCXPRT *);
static void acl_dispatch(struct svc_req *, SVCXPRT *);
-static void common_dispatch(struct svc_req *, SVCXPRT *,
- rpcvers_t, rpcvers_t, char *,
- struct rpc_disptable *);
-static void hanfsv4_failover(void);
static int checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
bool_t, bool_t *);
static char *client_name(struct svc_req *req);
static char *client_addr(struct svc_req *req, char *buf);
extern int sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
extern bool_t sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
+static void *nfs_server_zone_init(zoneid_t);
+static void nfs_server_zone_fini(zoneid_t, void *);
+static void nfs_server_zone_shutdown(zoneid_t, void *);
#define NFSLOG_COPY_NETBUF(exi, xprt, nb) { \
(nb)->maxlen = (xprt)->xp_rtaddr.maxlen; \
@@ -248,24 +247,6 @@ static SVC_CALLOUT __nfs_sc_rdma[] = {
static SVC_CALLOUT_TABLE nfs_sct_rdma = {
sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
};
-rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
-rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
-
-/*
- * Used to track the state of the server so that initialization
- * can be done properly.
- */
-typedef enum {
- NFS_SERVER_STOPPED, /* server state destroyed */
- NFS_SERVER_STOPPING, /* server state being destroyed */
- NFS_SERVER_RUNNING,
- NFS_SERVER_QUIESCED, /* server state preserved */
- NFS_SERVER_OFFLINE /* server pool offline */
-} nfs_server_running_t;
-
-static nfs_server_running_t nfs_server_upordown;
-static kmutex_t nfs_server_upordown_lock;
-static kcondvar_t nfs_server_upordown_cv;
/*
* DSS: distributed stable storage
@@ -277,10 +258,24 @@ int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
bool_t rfs4_minorvers_mismatch(struct svc_req *, SVCXPRT *, void *);
/*
- * RDMA wait variables.
+ * Stash NFS zone globals in TSD to avoid some lock contention
+ * from frequent zone_getspecific calls.
*/
-static kcondvar_t rdma_wait_cv;
-static kmutex_t rdma_wait_mutex;
+static uint_t nfs_server_tsd_key;
+
+nfs_globals_t *
+nfs_srv_getzg(void)
+{
+ nfs_globals_t *ng;
+
+ ng = tsd_get(nfs_server_tsd_key);
+ if (ng == NULL) {
+ ng = zone_getspecific(nfssrv_zone_key, curzone);
+ (void) tsd_set(nfs_server_tsd_key, ng);
+ }
+
+ return (ng);
+}
/*
* Will be called at the point the server pool is being unregistered
@@ -291,11 +286,15 @@ static kmutex_t rdma_wait_mutex;
void
nfs_srv_offline(void)
{
- mutex_enter(&nfs_server_upordown_lock);
- if (nfs_server_upordown == NFS_SERVER_RUNNING) {
- nfs_server_upordown = NFS_SERVER_OFFLINE;
+ nfs_globals_t *ng;
+
+ ng = nfs_srv_getzg();
+
+ mutex_enter(&ng->nfs_server_upordown_lock);
+ if (ng->nfs_server_upordown == NFS_SERVER_RUNNING) {
+ ng->nfs_server_upordown = NFS_SERVER_OFFLINE;
}
- mutex_exit(&nfs_server_upordown_lock);
+ mutex_exit(&ng->nfs_server_upordown_lock);
}
/*
@@ -324,15 +323,18 @@ nfs_srv_quiesce_all(void)
}
static void
-nfs_srv_shutdown_all(int quiesce) {
- mutex_enter(&nfs_server_upordown_lock);
+nfs_srv_shutdown_all(int quiesce)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+
+ mutex_enter(&ng->nfs_server_upordown_lock);
if (quiesce) {
- if (nfs_server_upordown == NFS_SERVER_RUNNING ||
- nfs_server_upordown == NFS_SERVER_OFFLINE) {
- nfs_server_upordown = NFS_SERVER_QUIESCED;
- cv_signal(&nfs_server_upordown_cv);
+ if (ng->nfs_server_upordown == NFS_SERVER_RUNNING ||
+ ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
+ ng->nfs_server_upordown = NFS_SERVER_QUIESCED;
+ cv_signal(&ng->nfs_server_upordown_cv);
- /* reset DSS state, for subsequent warm restart */
+ /* reset DSS state */
rfs4_dss_numnewpaths = 0;
rfs4_dss_newpaths = NULL;
@@ -340,22 +342,27 @@ nfs_srv_shutdown_all(int quiesce) {
"NFSv4 state has been preserved");
}
} else {
- if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
- nfs_server_upordown = NFS_SERVER_STOPPING;
- mutex_exit(&nfs_server_upordown_lock);
- rfs4_state_fini();
- rfs4_fini_drc(nfs4_drc);
- mutex_enter(&nfs_server_upordown_lock);
- nfs_server_upordown = NFS_SERVER_STOPPED;
- cv_signal(&nfs_server_upordown_cv);
+ if (ng->nfs_server_upordown == NFS_SERVER_OFFLINE) {
+ ng->nfs_server_upordown = NFS_SERVER_STOPPING;
+ mutex_exit(&ng->nfs_server_upordown_lock);
+ rfs4_state_zone_fini();
+ rfs4_fini_drc();
+ mutex_enter(&ng->nfs_server_upordown_lock);
+ ng->nfs_server_upordown = NFS_SERVER_STOPPED;
+
+ /* reset DSS state */
+ rfs4_dss_numnewpaths = 0;
+ rfs4_dss_newpaths = NULL;
+
+ cv_signal(&ng->nfs_server_upordown_cv);
}
}
- mutex_exit(&nfs_server_upordown_lock);
+ mutex_exit(&ng->nfs_server_upordown_lock);
}
static int
nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
- rpcvers_t versmin, rpcvers_t versmax)
+ rpcvers_t versmin, rpcvers_t versmax)
{
struct strioctl strioc;
struct T_info_ack tinfo;
@@ -418,6 +425,7 @@ nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
int
nfs_svc(struct nfs_svc_args *arg, model_t model)
{
+ nfs_globals_t *ng;
file_t *fp;
SVCMASTERXPRT *xprt;
int error;
@@ -432,6 +440,7 @@ nfs_svc(struct nfs_svc_args *arg, model_t model)
model = model; /* STRUCT macros don't always refer to it */
#endif
+ ng = nfs_srv_getzg();
STRUCT_SET_HANDLE(uap, model, arg);
/* Check privileges in nfssys() */
@@ -439,6 +448,10 @@ nfs_svc(struct nfs_svc_args *arg, model_t model)
if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
return (EBADF);
+ /* Setup global file handle in nfs_export */
+ if ((error = nfs_export_get_rootfh(ng)) != 0)
+ return (error);
+
/*
* Set read buffer size to rsize
* and add room for RPC headers.
@@ -465,27 +478,27 @@ nfs_svc(struct nfs_svc_args *arg, model_t model)
return (error);
}
- nfs_versmin = STRUCT_FGET(uap, versmin);
- nfs_versmax = STRUCT_FGET(uap, versmax);
+ ng->nfs_versmin = STRUCT_FGET(uap, versmin);
+ ng->nfs_versmax = STRUCT_FGET(uap, versmax);
/* Double check the vers min/max ranges */
- if ((nfs_versmin > nfs_versmax) ||
- (nfs_versmin < NFS_VERSMIN) ||
- (nfs_versmax > NFS_VERSMAX)) {
- nfs_versmin = NFS_VERSMIN_DEFAULT;
- nfs_versmax = NFS_VERSMAX_DEFAULT;
+ if ((ng->nfs_versmin > ng->nfs_versmax) ||
+ (ng->nfs_versmin < NFS_VERSMIN) ||
+ (ng->nfs_versmax > NFS_VERSMAX)) {
+ ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
+ ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
}
- if (error =
- nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
+ if (error = nfs_srv_set_sc_versions(fp, &sctp, ng->nfs_versmin,
+ ng->nfs_versmax)) {
releasef(STRUCT_FGET(uap, fd));
kmem_free(addrmask.buf, addrmask.maxlen);
return (error);
}
/* Initialize nfsv4 server */
- if (nfs_versmax == (rpcvers_t)NFS_V4)
- rfs4_server_start(STRUCT_FGET(uap, delegation));
+ if (ng->nfs_versmax == (rpcvers_t)NFS_V4)
+ rfs4_server_start(ng, STRUCT_FGET(uap, delegation));
/* Create a transport handle. */
error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
@@ -504,59 +517,36 @@ nfs_svc(struct nfs_svc_args *arg, model_t model)
}
static void
-rfs4_server_start(int nfs4_srv_delegation)
+rfs4_server_start(nfs_globals_t *ng, int nfs4_srv_delegation)
{
/*
* Determine if the server has previously been "started" and
* if not, do the per instance initialization
*/
- mutex_enter(&nfs_server_upordown_lock);
+ mutex_enter(&ng->nfs_server_upordown_lock);
- if (nfs_server_upordown != NFS_SERVER_RUNNING) {
+ if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
/* Do we need to stop and wait on the previous server? */
- while (nfs_server_upordown == NFS_SERVER_STOPPING ||
- nfs_server_upordown == NFS_SERVER_OFFLINE)
- cv_wait(&nfs_server_upordown_cv,
- &nfs_server_upordown_lock);
+ while (ng->nfs_server_upordown == NFS_SERVER_STOPPING ||
+ ng->nfs_server_upordown == NFS_SERVER_OFFLINE)
+ cv_wait(&ng->nfs_server_upordown_cv,
+ &ng->nfs_server_upordown_lock);
- if (nfs_server_upordown != NFS_SERVER_RUNNING) {
+ if (ng->nfs_server_upordown != NFS_SERVER_RUNNING) {
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
(void) svc_pool_control(NFS_SVCPOOL_ID,
SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
- /* is this an nfsd warm start? */
- if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
- cmn_err(CE_NOTE, "nfs_server: "
- "server was previously quiesced; "
- "existing NFSv4 state will be re-used");
-
- /*
- * HA-NFSv4: this is also the signal
- * that a Resource Group failover has
- * occurred.
- */
- if (cluster_bootflags & CLUSTER_BOOTED)
- hanfsv4_failover();
- } else {
- /* cold start */
- rfs4_state_init();
- nfs4_drc = rfs4_init_drc(nfs4_drc_max,
- nfs4_drc_hash);
- }
-
- /*
- * Check to see if delegation is to be
- * enabled at the server
- */
- if (nfs4_srv_delegation != FALSE)
- rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
+ rfs4_do_server_start(ng->nfs_server_upordown,
+ nfs4_srv_delegation,
+ cluster_bootflags & CLUSTER_BOOTED);
- nfs_server_upordown = NFS_SERVER_RUNNING;
+ ng->nfs_server_upordown = NFS_SERVER_RUNNING;
}
- cv_signal(&nfs_server_upordown_cv);
+ cv_signal(&ng->nfs_server_upordown_cv);
}
- mutex_exit(&nfs_server_upordown_lock);
+ mutex_exit(&ng->nfs_server_upordown_lock);
}
/*
@@ -566,6 +556,7 @@ rfs4_server_start(int nfs4_srv_delegation)
int
rdma_start(struct rdma_svc_args *rsa)
{
+ nfs_globals_t *ng;
int error;
rdma_xprt_group_t started_rdma_xprts;
rdma_stat stat;
@@ -578,8 +569,10 @@ rdma_start(struct rdma_svc_args *rsa)
rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
}
- nfs_versmin = rsa->nfs_versmin;
- nfs_versmax = rsa->nfs_versmax;
+
+ ng = nfs_srv_getzg();
+ ng->nfs_versmin = rsa->nfs_versmin;
+ ng->nfs_versmax = rsa->nfs_versmax;
/* Set the versions in the callout table */
__nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
@@ -593,7 +586,7 @@ rdma_start(struct rdma_svc_args *rsa)
/* Initialize nfsv4 server */
if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
- rfs4_server_start(rsa->delegation);
+ rfs4_server_start(ng, rsa->delegation);
started_rdma_xprts.rtg_count = 0;
started_rdma_xprts.rtg_listhead = NULL;
@@ -610,7 +603,7 @@ restart:
/*
* wait till either interrupted by a signal on
* nfs service stop/restart or signalled by a
- * rdma plugin attach/detatch.
+ * rdma attach/detatch.
*/
stat = rdma_kwait();
@@ -661,10 +654,10 @@ void
rpc_null_v3(caddr_t *argp, caddr_t *resp, struct exportinfo *exi,
struct svc_req *req, cred_t *cr, bool_t ro)
{
- DTRACE_NFSV3_3(op__null__start, struct svc_req *, req,
- cred_t *, cr, vnode_t *, NULL);
- DTRACE_NFSV3_3(op__null__done, struct svc_req *, req,
- cred_t *, cr, vnode_t *, NULL);
+ DTRACE_NFSV3_4(op__null__start, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
+ DTRACE_NFSV3_4(op__null__done, struct svc_req *, req,
+ cred_t *, cr, vnode_t *, NULL, struct exportinfo *, exi);
}
/* ARGSUSED */
@@ -1342,13 +1335,13 @@ union rfs_res {
static struct rpc_disptable rfs_disptable[] = {
{sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
rfscallnames_v2,
- &rfsproccnt_v2_ptr, rfsdisptab_v2},
+ rfsdisptab_v2},
{sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
rfscallnames_v3,
- &rfsproccnt_v3_ptr, rfsdisptab_v3},
+ rfsdisptab_v3},
{sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
rfscallnames_v4,
- &rfsproccnt_v4_ptr, rfsdisptab_v4},
+ rfsdisptab_v4},
};
/*
@@ -1367,7 +1360,6 @@ static int cred_hits = 0;
static int cred_misses = 0;
#endif
-
#ifdef DEBUG
/*
* Debug code to allow disabling of rfs_dispatch() use of
@@ -1471,11 +1463,9 @@ auth_tooweak(struct svc_req *req, char *res)
return (FALSE);
}
-
static void
common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
- rpcvers_t max_vers, char *pgmname,
- struct rpc_disptable *disptable)
+ rpcvers_t max_vers, char *pgmname, struct rpc_disptable *disptable)
{
int which;
rpcvers_t vers;
@@ -1508,9 +1498,18 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
char **procnames;
char cbuf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
bool_t ro = FALSE;
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs_export_t *ne = ng->nfs_export;
+ kstat_named_t *svstat, *procstat;
+
+ ASSERT(req->rq_prog == NFS_PROGRAM || req->rq_prog == NFS_ACL_PROGRAM);
vers = req->rq_vers;
+ svstat = ng->svstat[req->rq_vers];
+ procstat = (req->rq_prog == NFS_PROGRAM) ?
+ ng->rfsproccnt[vers] : ng->aclproccnt[vers];
+
if (vers < min_vers || vers > max_vers) {
svcerr_progvers(req->rq_xprt, min_vers, max_vers);
error++;
@@ -1526,7 +1525,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
goto done;
}
- (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
+ procstat[which].value.ui64++;
disp = &disptable[(int)vers].dis_table[which];
procnames = disptable[(int)vers].dis_procnames;
@@ -1632,13 +1631,15 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
cr = xprt->xp_cred;
ASSERT(cr != NULL);
#ifdef DEBUG
- if (crgetref(cr) != 1) {
- crfree(cr);
- cr = crget();
- xprt->xp_cred = cr;
- cred_misses++;
- } else
- cred_hits++;
+ {
+ if (crgetref(cr) != 1) {
+ crfree(cr);
+ cr = crget();
+ xprt->xp_cred = cr;
+ cred_misses++;
+ } else
+ cred_hits++;
+ }
#else
if (crgetref(cr) != 1) {
crfree(cr);
@@ -1650,7 +1651,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
exi = checkexport(fsid, xfid);
if (exi != NULL) {
- publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
+ publicfh_ok = PUBLICFH_CHECK(ne, disp, exi, fsid, xfid);
/*
* Don't allow non-V4 clients access
@@ -1763,7 +1764,7 @@ common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
* file system.
*/
if (nfslog_buffer_list != NULL) {
- nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
+ nfslog_exi = nfslog_get_exi(ne, exi, req, res, &nfslog_rec_id);
/*
* Is logging enabled?
*/
@@ -1846,9 +1847,8 @@ done:
if (exi != NULL)
exi_rele(exi);
- global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
-
- global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
+ svstat[NFS_BADCALLS].value.ui64 += error;
+ svstat[NFS_CALLS].value.ui64++;
}
static void
@@ -1971,10 +1971,10 @@ static struct rpcdisp acldisptab_v3[] = {
static struct rpc_disptable acl_disptable[] = {
{sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
aclcallnames_v2,
- &aclproccnt_v2_ptr, acldisptab_v2},
+ acldisptab_v2},
{sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
aclcallnames_v3,
- &aclproccnt_v3_ptr, acldisptab_v3},
+ acldisptab_v3},
};
static void
@@ -2568,34 +2568,29 @@ client_addr(struct svc_req *req, char *buf)
* - Initialize all locks
* - initialize the version 3 write verifier
*/
-int
+void
nfs_srvinit(void)
{
- int error;
- if (getzoneid() != GLOBAL_ZONEID)
- return (EACCES);
+ /* Truly global stuff in this module (not per zone) */
+ rw_init(&nfssrv_globals_rwl, NULL, RW_DEFAULT, NULL);
+ list_create(&nfssrv_globals_list, sizeof (nfs_globals_t),
+ offsetof(nfs_globals_t, nfs_g_link));
+ tsd_create(&nfs_server_tsd_key, NULL);
- error = nfs_exportinit();
- if (error != 0)
- return (error);
- error = rfs4_srvrinit();
- if (error != 0) {
- nfs_exportfini();
- return (error);
- }
+ /* The order here is important */
+ nfs_exportinit();
rfs_srvrinit();
rfs3_srvrinit();
+ rfs4_srvrinit();
nfsauth_init();
- /* Init the stuff to control start/stop */
- nfs_server_upordown = NFS_SERVER_STOPPED;
- mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
-
- return (0);
+ /*
+ * NFS server zone-specific global variables
+ * Note the zone_init is called for the GZ here.
+ */
+ zone_key_create(&nfssrv_zone_key, nfs_server_zone_init,
+ nfs_server_zone_shutdown, nfs_server_zone_fini);
}
/*
@@ -2606,21 +2601,126 @@ nfs_srvinit(void)
void
nfs_srvfini(void)
{
+
+ /*
+ * NFS server zone-specific global variables
+ * Note the zone_fini is called for the GZ here.
+ */
+ (void) zone_key_delete(nfssrv_zone_key);
+
+ /* The order here is important (reverse of init) */
nfsauth_fini();
+ rfs4_srvrfini();
rfs3_srvrfini();
rfs_srvrfini();
nfs_exportfini();
- mutex_destroy(&nfs_server_upordown_lock);
- cv_destroy(&nfs_server_upordown_cv);
- mutex_destroy(&rdma_wait_mutex);
- cv_destroy(&rdma_wait_cv);
+ /* Truly global stuff in this module (not per zone) */
+ tsd_destroy(&nfs_server_tsd_key);
+ list_destroy(&nfssrv_globals_list);
+ rw_destroy(&nfssrv_globals_rwl);
}
/*
- * Set up an iovec array of up to cnt pointers.
+ * Zone init, shutdown, fini functions for the NFS server
+ *
+ * This design is careful to create the entire hierarhcy of
+ * NFS server "globals" (including those created by various
+ * per-module *_zone_init functions, etc.) so that all these
+ * objects have exactly the same lifetime.
+ *
+ * These objects are also kept on a list for two reasons:
+ * 1: It makes finding these in mdb _much_ easier.
+ * 2: It allows operating across all zone globals for
+ * functions like nfs_auth.c:exi_cache_reclaim
*/
+static void *
+nfs_server_zone_init(zoneid_t zoneid)
+{
+ nfs_globals_t *ng;
+
+ ng = kmem_zalloc(sizeof (*ng), KM_SLEEP);
+
+ ng->nfs_versmin = NFS_VERSMIN_DEFAULT;
+ ng->nfs_versmax = NFS_VERSMAX_DEFAULT;
+
+ /* Init the stuff to control start/stop */
+ ng->nfs_server_upordown = NFS_SERVER_STOPPED;
+ mutex_init(&ng->nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ng->nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&ng->rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ng->rdma_wait_cv, NULL, CV_DEFAULT, NULL);
+
+ ng->nfs_zoneid = zoneid;
+
+ /*
+ * Order here is important.
+ * export init must precede srv init calls.
+ */
+ nfs_export_zone_init(ng);
+ rfs_stat_zone_init(ng);
+ rfs_srv_zone_init(ng);
+ rfs3_srv_zone_init(ng);
+ rfs4_srv_zone_init(ng);
+ nfsauth_zone_init(ng);
+
+ rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+ list_insert_tail(&nfssrv_globals_list, ng);
+ rw_exit(&nfssrv_globals_rwl);
+
+ return (ng);
+}
+
+/* ARGSUSED */
+static void
+nfs_server_zone_shutdown(zoneid_t zoneid, void *data)
+{
+ nfs_globals_t *ng;
+
+ ng = (nfs_globals_t *)data;
+
+ /*
+ * Order is like _fini, but only
+ * some modules need this hook.
+ */
+ nfsauth_zone_shutdown(ng);
+ nfs_export_zone_shutdown(ng);
+}
+
+/* ARGSUSED */
+static void
+nfs_server_zone_fini(zoneid_t zoneid, void *data)
+{
+ nfs_globals_t *ng;
+
+ ng = (nfs_globals_t *)data;
+ rw_enter(&nfssrv_globals_rwl, RW_WRITER);
+ list_remove(&nfssrv_globals_list, ng);
+ rw_exit(&nfssrv_globals_rwl);
+
+ /*
+ * Order here is important.
+ * reverse order from init
+ */
+ nfsauth_zone_fini(ng);
+ rfs4_srv_zone_fini(ng);
+ rfs3_srv_zone_fini(ng);
+ rfs_srv_zone_fini(ng);
+ rfs_stat_zone_fini(ng);
+ nfs_export_zone_fini(ng);
+
+ mutex_destroy(&ng->nfs_server_upordown_lock);
+ cv_destroy(&ng->nfs_server_upordown_cv);
+ mutex_destroy(&ng->rdma_wait_mutex);
+ cv_destroy(&ng->rdma_wait_cv);
+
+ kmem_free(ng, sizeof (*ng));
+}
+
+/*
+ * Set up an iovec array of up to cnt pointers.
+ */
void
mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
{
@@ -2858,11 +2958,13 @@ rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
/* Release the reference on the old exi value */
ASSERT(*exi != NULL);
exi_rele(*exi);
+ *exi = NULL;
if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
VN_RELE(*vpp);
goto publicfh_done;
}
+ /* Have a new *exi */
}
}
@@ -2889,6 +2991,8 @@ rfs_pathname(
struct pathname pn;
int error;
+ ASSERT3U(crgetzoneid(cr), ==, curzone->zone_id);
+
/*
* If pathname starts with '/', then set startdvp to root.
*/
@@ -2896,7 +3000,7 @@ rfs_pathname(
while (*path == '/')
path++;
- startdvp = rootdir;
+ startdvp = ZONE_ROOTVP();
}
error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
@@ -2919,7 +3023,7 @@ rfs_pathname(
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
- rootdir, startdvp, cr);
+ ZONE_ROOTVP(), startdvp, cr);
}
if (error == ENAMETOOLONG) {
/*
@@ -2936,7 +3040,7 @@ rfs_pathname(
}
VN_HOLD(startdvp);
error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
- rootdir, startdvp, cr);
+ ZONE_ROOTVP(), startdvp, cr);
pn_free(&pn);
}
@@ -3041,168 +3145,6 @@ nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
}
/*
- * Do the main work of handling HA-NFSv4 Resource Group failover on
- * Sun Cluster.
- * We need to detect whether any RG admin paths have been added or removed,
- * and adjust resources accordingly.
- * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
- * order to scale, the list and array of paths need to be held in more
- * suitable data structures.
- */
-static void
-hanfsv4_failover(void)
-{
- int i, start_grace, numadded_paths = 0;
- char **added_paths = NULL;
- rfs4_dss_path_t *dss_path;
-
- /*
- * Note: currently, rfs4_dss_pathlist cannot be NULL, since
- * it will always include an entry for NFS4_DSS_VAR_DIR. If we
- * make the latter dynamically specified too, the following will
- * need to be adjusted.
- */
-
- /*
- * First, look for removed paths: RGs that have been failed-over
- * away from this node.
- * Walk the "currently-serving" rfs4_dss_pathlist and, for each
- * path, check if it is on the "passed-in" rfs4_dss_newpaths array
- * from nfsd. If not, that RG path has been removed.
- *
- * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
- * any duplicates.
- */
- dss_path = rfs4_dss_pathlist;
- do {
- int found = 0;
- char *path = dss_path->path;
-
- /* used only for non-HA so may not be removed */
- if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
- dss_path = dss_path->next;
- continue;
- }
-
- for (i = 0; i < rfs4_dss_numnewpaths; i++) {
- int cmpret;
- char *newpath = rfs4_dss_newpaths[i];
-
- /*
- * Since nfsd has sorted rfs4_dss_newpaths for us,
- * once the return from strcmp is negative we know
- * we've passed the point where "path" should be,
- * and can stop searching: "path" has been removed.
- */
- cmpret = strcmp(path, newpath);
- if (cmpret < 0)
- break;
- if (cmpret == 0) {
- found = 1;
- break;
- }
- }
-
- if (found == 0) {
- unsigned index = dss_path->index;
- rfs4_servinst_t *sip = dss_path->sip;
- rfs4_dss_path_t *path_next = dss_path->next;
-
- /*
- * This path has been removed.
- * We must clear out the servinst reference to
- * it, since it's now owned by another
- * node: we should not attempt to touch it.
- */
- ASSERT(dss_path == sip->dss_paths[index]);
- sip->dss_paths[index] = NULL;
-
- /* remove from "currently-serving" list, and destroy */
- remque(dss_path);
- /* allow for NUL */
- kmem_free(dss_path->path, strlen(dss_path->path) + 1);
- kmem_free(dss_path, sizeof (rfs4_dss_path_t));
-
- dss_path = path_next;
- } else {
- /* path was found; not removed */
- dss_path = dss_path->next;
- }
- } while (dss_path != rfs4_dss_pathlist);
-
- /*
- * Now, look for added paths: RGs that have been failed-over
- * to this node.
- * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
- * for each path, check if it is on the "currently-serving"
- * rfs4_dss_pathlist. If not, that RG path has been added.
- *
- * Note: we don't do duplicate detection here; nfsd does that for us.
- *
- * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
- * an upper bound for the size needed for added_paths[numadded_paths].
- */
-
- /* probably more space than we need, but guaranteed to be enough */
- if (rfs4_dss_numnewpaths > 0) {
- size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
- added_paths = kmem_zalloc(sz, KM_SLEEP);
- }
-
- /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
- for (i = 0; i < rfs4_dss_numnewpaths; i++) {
- int found = 0;
- char *newpath = rfs4_dss_newpaths[i];
-
- dss_path = rfs4_dss_pathlist;
- do {
- char *path = dss_path->path;
-
- /* used only for non-HA */
- if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
- dss_path = dss_path->next;
- continue;
- }
-
- if (strncmp(path, newpath, strlen(path)) == 0) {
- found = 1;
- break;
- }
-
- dss_path = dss_path->next;
- } while (dss_path != rfs4_dss_pathlist);
-
- if (found == 0) {
- added_paths[numadded_paths] = newpath;
- numadded_paths++;
- }
- }
-
- /* did we find any added paths? */
- if (numadded_paths > 0) {
- /* create a new server instance, and start its grace period */
- start_grace = 1;
- rfs4_servinst_create(start_grace, numadded_paths, added_paths);
-
- /* read in the stable storage state from these paths */
- rfs4_dss_readstate(numadded_paths, added_paths);
-
- /*
- * Multiple failovers during a grace period will cause
- * clients of the same resource group to be partitioned
- * into different server instances, with different
- * grace periods. Since clients of the same resource
- * group must be subject to the same grace period,
- * we need to reset all currently active grace periods.
- */
- rfs4_grace_reset_all();
- }
-
- if (rfs4_dss_numnewpaths > 0)
- kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
-}
-
-/*
* Used by NFSv3 and NFSv4 server to query label of
* a pathname component during lookup/access ops.
*/
diff --git a/usr/src/uts/common/fs/nfs/nfs_srv.c b/usr/src/uts/common/fs/nfs/nfs_srv.c
index 2535ab3219..b320f599df 100644
--- a/usr/src/uts/common/fs/nfs/nfs_srv.c
+++ b/usr/src/uts/common/fs/nfs/nfs_srv.c
@@ -30,6 +30,11 @@
* All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -70,6 +75,21 @@
#include <sys/strsubr.h>
+struct rfs_async_write_list;
+
+/*
+ * Zone globals of NFSv2 server
+ */
+typedef struct nfs_srv {
+ kmutex_t async_write_lock;
+ struct rfs_async_write_list *async_write_head;
+
+ /*
+ * enables write clustering if == 1
+ */
+ int write_async;
+} nfs_srv_t;
+
/*
* These are the interface routines for the server side of the
* Network File System. See the NFS version 2 protocol specification
@@ -80,6 +100,7 @@ static int sattr_to_vattr(struct nfssattr *, struct vattr *);
static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
cred_t *);
+
/*
* Some "over the wire" UNIX file types. These are encoded
* into the mode. This needs to be fixed in the next rev.
@@ -91,6 +112,15 @@ static void acl_perm(struct vnode *, struct exportinfo *, struct vattr *,
u_longlong_t nfs2_srv_caller_id;
+static nfs_srv_t *
+nfs_get_srv(void)
+{
+ nfs_globals_t *ng = nfs_srv_getzg();
+ nfs_srv_t *srv = ng->nfs_srv;
+ ASSERT(srv != NULL);
+ return (srv);
+}
+
/*
* Get file attributes.
* Returns the current attributes of the file with the given fhandle.
@@ -386,17 +416,20 @@ rfs_climb_crossmnt(vnode_t **dvpp, struct exportinfo **exip, cred_t *cr)
{
struct exportinfo *exi;
vnode_t *dvp = *dvpp;
+ vnode_t *zone_rootvp;
- ASSERT(dvp->v_flag & VROOT);
+ zone_rootvp = (*exip)->exi_ne->exi_root->exi_vp;
+ ASSERT((dvp->v_flag & VROOT) || VN_CMP(zone_rootvp, dvp));
VN_HOLD(dvp);
- dvp = untraverse(dvp);
+ dvp = untraverse(dvp, zone_rootvp);
exi = nfs_vptoexi(NULL, dvp, cr, NULL, NULL, FALSE);
if (exi == NULL) {
VN_RELE(dvp);
return (-1);
}
+ ASSERT3U(exi->exi_zoneid, ==, (*exip)->exi_zoneid);
exi_rele(*exip);
*exip = exi;
VN_RELE(*dvpp);
@@ -446,7 +479,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
* location of the public filehandle.
*/
if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
- dvp = rootdir;
+ dvp = ZONE_ROOTVP();
VN_HOLD(dvp);
} else {
dvp = nfs_fhtovp(fhp, exi);
@@ -457,6 +490,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
}
exi_hold(exi);
+ ASSERT3U(exi->exi_zoneid, ==, curzone->zone_id);
/*
* Not allow lookup beyond root.
@@ -466,7 +500,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
if (strcmp(da->da_name, "..") == 0 &&
EQFID(&exi->exi_fid, (fid_t *)&fhp->fh_len)) {
if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
- (dvp->v_flag & VROOT)) {
+ ((dvp->v_flag & VROOT) || VN_IS_CURZONEROOT(dvp))) {
/*
* special case for ".." and 'nohide'exported root
*/
@@ -502,6 +536,7 @@ rfs_lookup(struct nfsdiropargs *da, struct nfsdiropres *dr,
publicfh_flag = TRUE;
exi_rele(exi);
+ exi = NULL;
error = rfs_publicfh_mclookup(name, dvp, cr, &vp, &exi,
&sec);
@@ -635,10 +670,12 @@ rfs_readlink(fhandle_t *fhp, struct nfsrdlnres *rl, struct exportinfo *exi,
if (is_referral) {
char *s;
size_t strsz;
+ kstat_named_t *stat =
+ exi->exi_ne->ne_globals->svstat[NFS_VERSION];
/* Get an artificial symlink based on a referral */
s = build_symlink(vp, cr, &strsz);
- global_svstat_ptr[2][NFS_REFERLINKS].value.ui64++;
+ stat[NFS_REFERLINKS].value.ui64++;
DTRACE_PROBE2(nfs2serv__func__referral__reflink,
vnode_t *, vp, char *, s);
if (s == NULL)
@@ -775,6 +812,8 @@ rfs_read(struct nfsreadargs *ra, struct nfsrdresult *rr,
/* check if a monitor detected a delegation conflict */
if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
+ if (in_crit)
+ nbl_end_crit(vp);
VN_RELE(vp);
/* mark as wouldblock so response is dropped */
curthread->t_flag |= T_WOULDBLOCK;
@@ -1100,10 +1139,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
/* check if a monitor detected a delegation conflict */
if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
- VN_RELE(vp);
- /* mark as wouldblock so response is dropped */
- curthread->t_flag |= T_WOULDBLOCK;
- return;
+ goto out;
}
if (wa->wa_data || wa->wa_rlist) {
@@ -1143,6 +1179,7 @@ rfs_write_sync(struct nfswriteargs *wa, struct nfsattrstat *ns,
error = VOP_WRITE(vp, &uio, FSYNC, cr, &ct);
curthread->t_cred = savecred;
} else {
+
iovcnt = 0;
for (m = wa->wa_mblk; m != NULL; m = m->b_cont)
iovcnt++;
@@ -1286,8 +1323,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
cred_t *savecred;
int in_crit = 0;
caller_context_t ct;
+ nfs_srv_t *nsrv;
- if (!rfs_write_async) {
+ ASSERT(exi == NULL || exi->exi_zoneid == curzone->zone_id);
+ nsrv = nfs_get_srv();
+ if (!nsrv->write_async) {
rfs_write_sync(wa, ns, exi, req, cr, ro);
return;
}
@@ -1312,8 +1352,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
* Look to see if there is already a cluster started
* for this file.
*/
- mutex_enter(&rfs_async_write_lock);
- for (lp = rfs_async_write_head; lp != NULL; lp = lp->next) {
+ mutex_enter(&nsrv->async_write_lock);
+ for (lp = nsrv->async_write_head; lp != NULL; lp = lp->next) {
if (bcmp(&wa->wa_fhandle, lp->fhp,
sizeof (fhandle_t)) == 0)
break;
@@ -1339,8 +1379,8 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
else
trp->list = nrp;
while (nrp->ns->ns_status == RFSWRITE_INITVAL)
- cv_wait(&lp->cv, &rfs_async_write_lock);
- mutex_exit(&rfs_async_write_lock);
+ cv_wait(&lp->cv, &nsrv->async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
return;
}
@@ -1357,15 +1397,15 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
nlp->list = nrp;
nlp->next = NULL;
- if (rfs_async_write_head == NULL) {
- rfs_async_write_head = nlp;
+ if (nsrv->async_write_head == NULL) {
+ nsrv->async_write_head = nlp;
} else {
- lp = rfs_async_write_head;
+ lp = nsrv->async_write_head;
while (lp->next != NULL)
lp = lp->next;
lp->next = nlp;
}
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
/*
* Convert the file handle common to all of the requests
@@ -1373,11 +1413,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
*/
vp = nfs_fhtovp(&wa->wa_fhandle, exi);
if (vp == NULL) {
- mutex_enter(&rfs_async_write_lock);
- if (rfs_async_write_head == nlp)
- rfs_async_write_head = nlp->next;
+ mutex_enter(&nsrv->async_write_lock);
+ if (nsrv->async_write_head == nlp)
+ nsrv->async_write_head = nlp->next;
else {
- lp = rfs_async_write_head;
+ lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
@@ -1388,7 +1428,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
return;
}
@@ -1399,11 +1439,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
*/
if (vp->v_type != VREG) {
VN_RELE(vp);
- mutex_enter(&rfs_async_write_lock);
- if (rfs_async_write_head == nlp)
- rfs_async_write_head = nlp->next;
+ mutex_enter(&nsrv->async_write_lock);
+ if (nsrv->async_write_head == nlp)
+ nsrv->async_write_head = nlp->next;
else {
- lp = rfs_async_write_head;
+ lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
@@ -1414,7 +1454,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
rp->thread->t_flag |= t_flag;
}
cv_broadcast(&nlp->cv);
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
return;
}
@@ -1446,11 +1486,11 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
VN_RELE(vp);
/* mark as wouldblock so response is dropped */
curthread->t_flag |= T_WOULDBLOCK;
- mutex_enter(&rfs_async_write_lock);
- if (rfs_async_write_head == nlp)
- rfs_async_write_head = nlp->next;
+ mutex_enter(&nsrv->async_write_lock);
+ if (nsrv->async_write_head == nlp)
+ nsrv->async_write_head = nlp->next;
else {
- lp = rfs_async_write_head;
+ lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
@@ -1462,7 +1502,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
}
}
cv_broadcast(&nlp->cv);
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
return;
}
@@ -1484,16 +1524,16 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
* will allow more requests to be clustered in this
* second cluster.
*/
- mutex_enter(&rfs_async_write_lock);
- if (rfs_async_write_head == nlp)
- rfs_async_write_head = nlp->next;
+ mutex_enter(&nsrv->async_write_lock);
+ if (nsrv->async_write_head == nlp)
+ nsrv->async_write_head = nlp->next;
else {
- lp = rfs_async_write_head;
+ lp = nsrv->async_write_head;
while (lp->next != nlp)
lp = lp->next;
lp->next = nlp->next;
}
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
/*
* Step through the list of requests in this cluster.
@@ -1738,7 +1778,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
VN_RELE(vp);
t_flag = curthread->t_flag & T_WOULDBLOCK;
- mutex_enter(&rfs_async_write_lock);
+ mutex_enter(&nsrv->async_write_lock);
for (rp = nlp->list; rp != NULL; rp = rp->list) {
if (rp->ns->ns_status == RFSWRITE_INITVAL) {
rp->ns->ns_status = puterrno(error);
@@ -1746,7 +1786,7 @@ rfs_write(struct nfswriteargs *wa, struct nfsattrstat *ns,
}
}
cv_broadcast(&nlp->cv);
- mutex_exit(&rfs_async_write_lock);
+ mutex_exit(&nsrv->async_write_lock);
}
@@ -2211,7 +2251,7 @@ rfs_rename(struct nfsrnmargs *args, enum nfsstat *status,
/* Check for delegation on the file being renamed over, if it exists */
- if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
+ if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE &&
VOP_LOOKUP(tovp, args->rna_to.da_name, &targvp, NULL, 0, NULL, cr,
NULL, NULL, NULL) == 0) {
@@ -2578,7 +2618,7 @@ rfs_rmdir(struct nfsdiropargs *da, enum nfsstat *status,
* supplying a vnode known to exist and illegal to
* remove.
*/
- error = VOP_RMDIR(vp, da->da_name, rootdir, cr, NULL, 0);
+ error = VOP_RMDIR(vp, da->da_name, ZONE_ROOTVP(), cr, NULL, 0);
/*
* Force modified data and metadata out to stable storage.
@@ -2853,7 +2893,7 @@ sattr_to_vattr(struct nfssattr *sa, struct vattr *vap)
return (0);
}
-static enum nfsftype vt_to_nf[] = {
+static const enum nfsftype vt_to_nf[] = {
0, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, 0, 0, 0, NFSOC, 0
};
@@ -3072,14 +3112,38 @@ acl_perm(struct vnode *vp, struct exportinfo *exi, struct vattr *va, cred_t *cr)
void
rfs_srvrinit(void)
{
- mutex_init(&rfs_async_write_lock, NULL, MUTEX_DEFAULT, NULL);
nfs2_srv_caller_id = fs_new_caller_id();
}
void
rfs_srvrfini(void)
{
- mutex_destroy(&rfs_async_write_lock);
+}
+
+/* ARGSUSED */
+void
+rfs_srv_zone_init(nfs_globals_t *ng)
+{
+ nfs_srv_t *ns;
+
+ ns = kmem_zalloc(sizeof (*ns), KM_SLEEP);
+
+ mutex_init(&ns->async_write_lock, NULL, MUTEX_DEFAULT, NULL);
+ ns->write_async = 1;
+
+ ng->nfs_srv = ns;
+}
+
+/* ARGSUSED */
+void
+rfs_srv_zone_fini(nfs_globals_t *ng)
+{
+ nfs_srv_t *ns = ng->nfs_srv;
+
+ ng->nfs_srv = NULL;
+
+ mutex_destroy(&ns->async_write_lock);
+ kmem_free(ns, sizeof (*ns));
}
static int
diff --git a/usr/src/uts/common/fs/nfs/nfs_stats.c b/usr/src/uts/common/fs/nfs/nfs_stats.c
index baaf47a82a..97f820d756 100644
--- a/usr/src/uts/common/fs/nfs/nfs_stats.c
+++ b/usr/src/uts/common/fs/nfs/nfs_stats.c
@@ -45,8 +45,8 @@ zone_key_t nfsstat_zone_key;
*/
static kstat_named_t *
nfsstat_zone_init_common(zoneid_t zoneid, const char *module, int vers,
- const char *name, const kstat_named_t *template,
- size_t template_size)
+ const char *name, const kstat_named_t *template,
+ size_t template_size)
{
kstat_t *ksp;
kstat_named_t *ks_data;
@@ -68,7 +68,7 @@ nfsstat_zone_init_common(zoneid_t zoneid, const char *module, int vers,
*/
static void
nfsstat_zone_fini_common(zoneid_t zoneid, const char *module, int vers,
- const char *name)
+ const char *name)
{
kstat_delete_byname_zone(module, vers, name, zoneid);
}
@@ -89,38 +89,22 @@ static const kstat_named_t svstat_tmpl[] = {
{ "referlinks", KSTAT_DATA_UINT64 },
};
-/* Points to the global zone server kstat data for all nfs versions */
-kstat_named_t *global_svstat_ptr[NFS_VERSMAX + 1];
-
static void
nfsstat_zone_init_server(zoneid_t zoneid, kstat_named_t *svstatp[])
{
int vers;
- /*
- * first two indexes of these arrays are not used, so initialize
- * to NULL
- */
- svstatp[0] = NULL;
- svstatp[1] = NULL;
- global_svstat_ptr[0] = NULL;
- global_svstat_ptr[0] = NULL;
-
for (vers = NFS_VERSION; vers <= NFS_V4; vers++) {
svstatp[vers] = nfsstat_zone_init_common(zoneid, "nfs", vers,
"nfs_server", svstat_tmpl, sizeof (svstat_tmpl));
- if (zoneid == GLOBAL_ZONEID)
- global_svstat_ptr[vers] = svstatp[vers];
}
}
static void
-nfsstat_zone_fini_server(zoneid_t zoneid, kstat_named_t **svstatp)
+nfsstat_zone_fini_server(zoneid_t zoneid, kstat_named_t *svstatp[])
{
int vers;
for (vers = NFS_VERSION; vers <= NFS_V4; vers++) {
- if (zoneid == GLOBAL_ZONEID)
- global_svstat_ptr[vers] = NULL;
nfsstat_zone_fini_common(zoneid, "nfs", vers, "nfs_server");
kmem_free(svstatp[vers], sizeof (svstat_tmpl));
}
@@ -188,29 +172,6 @@ static const kstat_named_t rfsproccnt_v2_tmpl[] = {
{ "statfs", KSTAT_DATA_UINT64 }
};
-kstat_named_t *rfsproccnt_v2_ptr;
-
-static void
-nfsstat_zone_init_rfsproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v2",
- rfsproccnt_v2_tmpl, sizeof (rfsproccnt_v2_tmpl));
- statsp->rfsproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v2_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_rfsproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v2_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v2");
- kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v2_tmpl));
-}
-
/*
* NFSv2 client ACL stats
*/
@@ -249,30 +210,6 @@ static const kstat_named_t aclproccnt_v2_tmpl[] = {
{ "getxattrdir", KSTAT_DATA_UINT64 }
};
-kstat_named_t *aclproccnt_v2_ptr;
-
-static void
-nfsstat_zone_init_aclproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0,
- "aclproccnt_v2", aclproccnt_v2_tmpl,
- sizeof (aclproccnt_v2_tmpl));
- statsp->aclproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v2_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_aclproc_v2(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v2_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v2");
- kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v2_tmpl));
-}
-
/*
* NFSv3 client stats
*/
@@ -343,29 +280,6 @@ static const kstat_named_t rfsproccnt_v3_tmpl[] = {
{ "commit", KSTAT_DATA_UINT64 }
};
-kstat_named_t *rfsproccnt_v3_ptr;
-
-static void
-nfsstat_zone_init_rfsproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v3",
- rfsproccnt_v3_tmpl, sizeof (rfsproccnt_v3_tmpl));
- statsp->rfsproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v3_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_rfsproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v3_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v3");
- kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v3_tmpl));
-}
-
/*
* NFSv3 client ACL stats
*/
@@ -400,30 +314,6 @@ static const kstat_named_t aclproccnt_v3_tmpl[] = {
{ "getxattrdir", KSTAT_DATA_UINT64 }
};
-kstat_named_t *aclproccnt_v3_ptr;
-
-static void
-nfsstat_zone_init_aclproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0,
- "aclproccnt_v3", aclproccnt_v3_tmpl,
- sizeof (aclproccnt_v3_tmpl));
- statsp->aclproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v3_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_aclproc_v3(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v3_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v3");
- kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v3_tmpl));
-}
-
/*
* NFSv4 client stats
*/
@@ -530,29 +420,6 @@ static const kstat_named_t rfsproccnt_v4_tmpl[] = {
{ "illegal", KSTAT_DATA_UINT64 },
};
-kstat_named_t *rfsproccnt_v4_ptr;
-
-static void
-nfsstat_zone_init_rfsproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs", 0, "rfsproccnt_v4",
- rfsproccnt_v4_tmpl, sizeof (rfsproccnt_v4_tmpl));
- statsp->rfsproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v4_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_rfsproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- rfsproccnt_v4_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v4");
- kmem_free(statsp->rfsproccnt_ptr, sizeof (rfsproccnt_v4_tmpl));
-}
-
/*
* NFSv4 client ACL stats
*/
@@ -577,39 +444,6 @@ nfsstat_zone_fini_aclreq_v4(zoneid_t zoneid, struct nfs_version_stats *statsp)
}
/*
- * NFSv4 server ACL stats
- */
-static const kstat_named_t aclproccnt_v4_tmpl[] = {
- { "null", KSTAT_DATA_UINT64 },
- { "getacl", KSTAT_DATA_UINT64 },
- { "setacl", KSTAT_DATA_UINT64 }
-};
-
-kstat_named_t *aclproccnt_v4_ptr;
-
-static void
-nfsstat_zone_init_aclproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- kstat_named_t *ks_data;
-
- ks_data = nfsstat_zone_init_common(zoneid, "nfs_acl", 0,
- "aclproccnt_v4", aclproccnt_v4_tmpl,
- sizeof (aclproccnt_v4_tmpl));
- statsp->aclproccnt_ptr = ks_data;
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v4_ptr = ks_data;
-}
-
-static void
-nfsstat_zone_fini_aclproc_v4(zoneid_t zoneid, struct nfs_version_stats *statsp)
-{
- if (zoneid == GLOBAL_ZONEID)
- aclproccnt_v4_ptr = NULL;
- nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v4");
- kmem_free(statsp->aclproccnt_ptr, sizeof (aclproccnt_v4_tmpl));
-}
-
-/*
* Zone initializer callback to setup the kstats.
*/
void *
@@ -620,31 +454,20 @@ nfsstat_zone_init(zoneid_t zoneid)
nfs_stats_ptr = kmem_zalloc(sizeof (*nfs_stats_ptr), KM_SLEEP);
/*
- * Initialize all versions of the nfs_server
- */
- nfsstat_zone_init_server(zoneid, nfs_stats_ptr->nfs_stats_svstat_ptr);
-
- /*
* Initialize v2 stats
*/
nfsstat_zone_init_rfsreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
- nfsstat_zone_init_rfsproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
nfsstat_zone_init_aclreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
- nfsstat_zone_init_aclproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
/*
* Initialize v3 stats
*/
nfsstat_zone_init_rfsreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
- nfsstat_zone_init_rfsproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
nfsstat_zone_init_aclreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
- nfsstat_zone_init_aclproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
/*
* Initialize v4 stats
*/
nfsstat_zone_init_rfsreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
- nfsstat_zone_init_rfsproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
nfsstat_zone_init_aclreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
- nfsstat_zone_init_aclproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
return (nfs_stats_ptr);
}
@@ -658,31 +481,74 @@ nfsstat_zone_fini(zoneid_t zoneid, void *data)
struct nfs_stats *nfs_stats_ptr = data;
/*
- * Free nfs:0:nfs_server stats
- */
- nfsstat_zone_fini_server(zoneid, nfs_stats_ptr->nfs_stats_svstat_ptr);
-
- /*
* Free v2 stats
*/
nfsstat_zone_fini_rfsreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
- nfsstat_zone_fini_rfsproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
nfsstat_zone_fini_aclreq_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
- nfsstat_zone_fini_aclproc_v2(zoneid, &nfs_stats_ptr->nfs_stats_v2);
/*
* Free v3 stats
*/
nfsstat_zone_fini_rfsreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
- nfsstat_zone_fini_rfsproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
nfsstat_zone_fini_aclreq_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
- nfsstat_zone_fini_aclproc_v3(zoneid, &nfs_stats_ptr->nfs_stats_v3);
/*
* Free v4 stats
*/
nfsstat_zone_fini_rfsreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
- nfsstat_zone_fini_rfsproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
nfsstat_zone_fini_aclreq_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
- nfsstat_zone_fini_aclproc_v4(zoneid, &nfs_stats_ptr->nfs_stats_v4);
kmem_free(nfs_stats_ptr, sizeof (*nfs_stats_ptr));
}
+
+void
+rfs_stat_zone_init(nfs_globals_t *ng)
+{
+ zoneid_t zoneid = ng->nfs_zoneid;
+
+ /* Initialize all versions of the nfs_server */
+ nfsstat_zone_init_server(zoneid, ng->svstat);
+
+ /* NFS proc */
+ ng->rfsproccnt[NFS_V2] = nfsstat_zone_init_common(zoneid, "nfs", 0,
+ "rfsproccnt_v2", rfsproccnt_v2_tmpl, sizeof (rfsproccnt_v2_tmpl));
+
+ ng->rfsproccnt[NFS_V3] = nfsstat_zone_init_common(zoneid, "nfs", 0,
+ "rfsproccnt_v3", rfsproccnt_v3_tmpl, sizeof (rfsproccnt_v3_tmpl));
+
+ ng->rfsproccnt[NFS_V4] = nfsstat_zone_init_common(zoneid, "nfs", 0,
+ "rfsproccnt_v4", rfsproccnt_v4_tmpl, sizeof (rfsproccnt_v4_tmpl));
+
+ /* ACL proc */
+ ng->aclproccnt[NFS_V2] = nfsstat_zone_init_common(zoneid, "nfs_acl", 0,
+ "aclproccnt_v2", aclproccnt_v2_tmpl, sizeof (aclproccnt_v2_tmpl));
+
+ ng->aclproccnt[NFS_V3] = nfsstat_zone_init_common(zoneid, "nfs_acl", 0,
+ "aclproccnt_v3", aclproccnt_v3_tmpl, sizeof (aclproccnt_v3_tmpl));
+
+}
+
+void
+rfs_stat_zone_fini(nfs_globals_t *ng)
+{
+ zoneid_t zoneid = ng->nfs_zoneid;
+
+ /* Free nfs:x:nfs_server stats */
+ nfsstat_zone_fini_server(zoneid, ng->svstat);
+
+ /* NFS */
+ nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v2");
+ kmem_free(ng->rfsproccnt[NFS_V2], sizeof (rfsproccnt_v2_tmpl));
+
+ nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v3");
+ kmem_free(ng->rfsproccnt[NFS_V3], sizeof (rfsproccnt_v3_tmpl));
+
+ nfsstat_zone_fini_common(zoneid, "nfs", 0, "rfsproccnt_v4");
+ kmem_free(ng->rfsproccnt[NFS_V4], sizeof (rfsproccnt_v4_tmpl));
+
+ /* ACL */
+ nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v2");
+ kmem_free(ng->aclproccnt[NFS_V2], sizeof (aclproccnt_v2_tmpl));
+
+ nfsstat_zone_fini_common(zoneid, "nfs_acl", 0, "aclproccnt_v3");
+ kmem_free(ng->aclproccnt[NFS_V3], sizeof (aclproccnt_v3_tmpl));
+
+}
diff --git a/usr/src/uts/common/fs/nfs/nfs_sys.c b/usr/src/uts/common/fs/nfs/nfs_sys.c
index b4fc9884b1..f3ad8b482e 100644
--- a/usr/src/uts/common/fs/nfs/nfs_sys.c
+++ b/usr/src/uts/common/fs/nfs/nfs_sys.c
@@ -18,15 +18,23 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2017 Joyent, Inc.
*
+ */
+
+/*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/types.h>
#include <rpc/types.h>
#include <sys/systm.h>
@@ -81,8 +89,6 @@ nfs_export(void *arg)
{
STRUCT_DECL(exportfs_args, ea);
- if (!INGLOBALZONE(curproc))
- return (set_errno(EPERM));
STRUCT_INIT(ea, get_udatamodel());
if (copyin(arg, STRUCT_BUF(ea), STRUCT_SIZE(ea)))
return (set_errno(EFAULT));
@@ -112,9 +118,6 @@ nfssys(enum nfssys_op opcode, void *arg)
break;
}
- if (!INGLOBALZONE(curproc))
- return (set_errno(EPERM));
-
STRUCT_INIT(u_clr, get_udatamodel());
if (copyin(arg, STRUCT_BUF(u_clr), STRUCT_SIZE(u_clr)))
@@ -165,8 +168,6 @@ nfssys(enum nfssys_op opcode, void *arg)
struct rdma_svc_args rsa;
char netstore[20] = "tcp";
- if (!INGLOBALZONE(curproc))
- return (set_errno(EPERM));
if (get_udatamodel() != DATAMODEL_NATIVE) {
STRUCT_DECL(rdma_svc_args, ursa);
@@ -190,9 +191,6 @@ nfssys(enum nfssys_op opcode, void *arg)
case NFS_SVC: { /* NFS server daemon */
STRUCT_DECL(nfs_svc_args, nsa);
-
- if (!INGLOBALZONE(curproc))
- return (set_errno(EPERM));
STRUCT_INIT(nsa, get_udatamodel());
if (copyin(arg, STRUCT_BUF(nsa), STRUCT_SIZE(nsa)))
@@ -210,8 +208,6 @@ nfssys(enum nfssys_op opcode, void *arg)
case NFS_GETFH: { /* get a file handle */
STRUCT_DECL(nfs_getfh_args, nga);
- if (!INGLOBALZONE(curproc))
- return (set_errno(EPERM));
STRUCT_INIT(nga, get_udatamodel());
if (copyin(arg, STRUCT_BUF(nga), STRUCT_SIZE(nga)))
return (set_errno(EFAULT));
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 27c38da40e..0a9589a373 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -2549,24 +2549,16 @@ static size_t
prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
{
char *pathname;
- vnode_t *vrootp;
size_t pathlen;
size_t sz = 0;
pathlen = MAXPATHLEN + 1;
pathname = kmem_alloc(pathlen, KM_SLEEP);
- mutex_enter(&p->p_lock);
- if ((vrootp = PTOU(p)->u_rdir) == NULL)
- vrootp = rootdir;
- VN_HOLD(vrootp);
- mutex_exit(&p->p_lock);
-
- if (vnodetopath(vrootp, vp, pathname, pathlen, cred) == 0) {
+ if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
sz += prfdinfomisc(data, PR_PATHNAME,
pathname, strlen(pathname) + 1);
}
- VN_RELE(vrootp);
kmem_free(pathname, pathlen);
return (sz);
@@ -2825,7 +2817,7 @@ prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
int
prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
- list_t *data)
+ cred_t *file_cred, list_t *data)
{
vattr_t vattr;
int error;
@@ -2852,9 +2844,20 @@ prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
fdinfo->pr_offset = -1;
- /* Attributes */
+ /*
+ * Attributes
+ *
+ * We have two cred_t structures available here.
+ * 'cred' is the caller's credential, and 'file_cred' is the credential
+ * for the file being inspected.
+ *
+ * When looking up the file attributes, file_cred is used in order
+ * that the correct ownership is set for doors and FIFOs. Since the
+ * caller has permission to read the fdinfo file in proc, this does
+ * not expose any additional information.
+ */
vattr.va_mask = AT_STAT;
- if (VOP_GETATTR(vp, &vattr, 0, cred, NULL) == 0) {
+ if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
fdinfo->pr_major = getmajor(vattr.va_fsid);
fdinfo->pr_minor = getminor(vattr.va_fsid);
fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
diff --git a/usr/src/uts/common/fs/proc/prvnops.c b/usr/src/uts/common/fs/proc/prvnops.c
index 458b1628ca..d096168b48 100644
--- a/usr/src/uts/common/fs/proc/prvnops.c
+++ b/usr/src/uts/common/fs/proc/prvnops.c
@@ -668,7 +668,7 @@ static int (*pr_read_function[PR_NFILES])() = {
/* ARGSUSED */
static int
-pr_read_inval(prnode_t *pnp, uio_t *uiop)
+pr_read_inval(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
/*
* No read() on any /proc directory, use getdents(2) instead.
@@ -801,7 +801,7 @@ pr_read_as(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_status(prnode_t *pnp, uio_t *uiop)
+pr_read_status(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
pstatus_t *sp;
int error;
@@ -823,7 +823,7 @@ pr_read_status(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lstatus(prnode_t *pnp, uio_t *uiop)
+pr_read_lstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -869,7 +869,7 @@ pr_read_lstatus(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_psinfo(prnode_t *pnp, uio_t *uiop)
+pr_read_psinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
psinfo_t psinfo;
proc_t *p;
@@ -896,7 +896,7 @@ pr_read_psinfo(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_fdinfo(prnode_t *pnp, uio_t *uiop)
+pr_read_fdinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prfdinfo_t *fdinfo;
list_t data;
@@ -904,7 +904,7 @@ pr_read_fdinfo(prnode_t *pnp, uio_t *uiop)
vnode_t *vp;
uint_t fd;
file_t *fp;
- cred_t *cred;
+ cred_t *file_cred;
short ufp_flag;
int error = 0;
@@ -959,8 +959,8 @@ pr_read_fdinfo(prnode_t *pnp, uio_t *uiop)
if ((fdinfo->pr_fileflags & (FSEARCH | FEXEC)) == 0)
fdinfo->pr_fileflags += FOPEN;
fdinfo->pr_offset = fp->f_offset;
- cred = fp->f_cred;
- crhold(cred);
+ file_cred = fp->f_cred;
+ crhold(file_cred);
/*
* Information from the vnode (rather than the file_t) is retrieved
* later, in prgetfdinfo() - for example sock_getfasync()
@@ -969,9 +969,9 @@ pr_read_fdinfo(prnode_t *pnp, uio_t *uiop)
prunlock(pnp);
- error = prgetfdinfo(p, vp, fdinfo, cred, &data);
+ error = prgetfdinfo(p, vp, fdinfo, cr, file_cred, &data);
- crfree(cred);
+ crfree(file_cred);
VN_RELE(vp);
@@ -985,7 +985,7 @@ out:
}
static int
-pr_read_lpsinfo(prnode_t *pnp, uio_t *uiop)
+pr_read_lpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -1099,28 +1099,28 @@ readmap_common:
}
static int
-pr_read_map(prnode_t *pnp, uio_t *uiop)
+pr_read_map(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_MAP);
return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_rmap(prnode_t *pnp, uio_t *uiop)
+pr_read_rmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_RMAP);
return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_xmap(prnode_t *pnp, uio_t *uiop)
+pr_read_xmap(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_XMAP);
return (pr_read_map_common(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_cred(prnode_t *pnp, uio_t *uiop)
+pr_read_cred(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
prcred_t *pcrp;
@@ -1155,7 +1155,7 @@ out:
}
static int
-pr_read_priv(prnode_t *pnp, uio_t *uiop)
+pr_read_priv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
size_t psize = prgetprivsize();
@@ -1179,7 +1179,7 @@ out:
}
static int
-pr_read_sigact(prnode_t *pnp, uio_t *uiop)
+pr_read_sigact(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
proc_t *p;
@@ -1218,7 +1218,7 @@ out:
}
static int
-pr_read_auxv(prnode_t *pnp, uio_t *uiop)
+pr_read_auxv(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
auxv_t auxv[__KERN_NAUXV_IMPL];
proc_t *p;
@@ -1253,7 +1253,7 @@ pr_read_auxv(prnode_t *pnp, uio_t *uiop)
* For now let's just have a ldt of size 0 for 64-bit processes.
*/
static int
-pr_read_ldt(prnode_t *pnp, uio_t *uiop)
+pr_read_ldt(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
struct ssd *ssd;
@@ -1289,7 +1289,7 @@ pr_read_ldt(prnode_t *pnp, uio_t *uiop)
#endif /* __x86 */
static int
-pr_read_usage(prnode_t *pnp, uio_t *uiop)
+pr_read_usage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prhusage_t *pup;
prusage_t *upup;
@@ -1378,7 +1378,7 @@ out:
}
static int
-pr_read_lusage(prnode_t *pnp, uio_t *uiop)
+pr_read_lusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int nlwp;
prhusage_t *pup;
@@ -1489,7 +1489,7 @@ pr_read_lusage(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_pagedata(prnode_t *pnp, uio_t *uiop)
+pr_read_pagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
int error;
@@ -1514,7 +1514,7 @@ pr_read_pagedata(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_opagedata(prnode_t *pnp, uio_t *uiop)
+pr_read_opagedata(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
struct as *as;
@@ -1541,7 +1541,7 @@ pr_read_opagedata(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_watch(prnode_t *pnp, uio_t *uiop)
+pr_read_watch(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
int error;
@@ -1587,7 +1587,7 @@ pr_read_watch(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lwpstatus(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpstatus(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
lwpstatus_t *sp;
int error;
@@ -1618,7 +1618,7 @@ out:
}
static int
-pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
lwpsinfo_t lwpsinfo;
proc_t *p;
@@ -1665,7 +1665,7 @@ pr_read_lwpsinfo(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lwpusage(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpusage(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prhusage_t *pup;
prusage_t *upup;
@@ -1716,7 +1716,7 @@ out:
}
static int
-pr_read_lwpname(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpname(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
char lwpname[THREAD_NAME_MAX];
kthread_t *t;
@@ -1744,7 +1744,7 @@ pr_read_lwpname(prnode_t *pnp, uio_t *uiop)
/* ARGSUSED */
static int
-pr_read_xregs(prnode_t *pnp, uio_t *uiop)
+pr_read_xregs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
#if defined(__sparc)
proc_t *p;
@@ -1785,7 +1785,7 @@ out:
}
static int
-pr_read_spymaster(prnode_t *pnp, uio_t *uiop)
+pr_read_spymaster(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
psinfo_t psinfo;
int error;
@@ -1815,7 +1815,7 @@ pr_read_spymaster(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_secflags(prnode_t *pnp, uio_t *uiop)
+pr_read_secflags(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prsecflags_t ret;
int error;
@@ -1836,7 +1836,7 @@ pr_read_secflags(prnode_t *pnp, uio_t *uiop)
#if defined(__sparc)
static int
-pr_read_gwindows(prnode_t *pnp, uio_t *uiop)
+pr_read_gwindows(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -1880,7 +1880,7 @@ out:
/* ARGSUSED */
static int
-pr_read_asrs(prnode_t *pnp, uio_t *uiop)
+pr_read_asrs(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int error;
@@ -1917,7 +1917,7 @@ pr_read_asrs(prnode_t *pnp, uio_t *uiop)
#endif /* __sparc */
static int
-pr_read_piddir(prnode_t *pnp, uio_t *uiop)
+pr_read_piddir(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_PIDDIR);
ASSERT(pnp->pr_pidfile != NULL);
@@ -1930,7 +1930,7 @@ pr_read_piddir(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_pidfile(prnode_t *pnp, uio_t *uiop)
+pr_read_pidfile(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int error;
@@ -2040,7 +2040,7 @@ static int (*pr_read_function_32[PR_NFILES])() = {
};
static int
-pr_read_status_32(prnode_t *pnp, uio_t *uiop)
+pr_read_status_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
pstatus32_t *sp;
proc_t *p;
@@ -2074,7 +2074,7 @@ pr_read_status_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -2128,7 +2128,7 @@ pr_read_lstatus_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop)
+pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
psinfo32_t psinfo;
proc_t *p;
@@ -2155,7 +2155,7 @@ pr_read_psinfo_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lpsinfo_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -2271,28 +2271,28 @@ readmap32_common:
}
static int
-pr_read_map_32(prnode_t *pnp, uio_t *uiop)
+pr_read_map_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_MAP);
return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_rmap_32(prnode_t *pnp, uio_t *uiop)
+pr_read_rmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_RMAP);
return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_xmap_32(prnode_t *pnp, uio_t *uiop)
+pr_read_xmap_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
ASSERT(pnp->pr_type == PR_XMAP);
return (pr_read_map_common_32(pnp, uiop, pnp->pr_type));
}
static int
-pr_read_sigact_32(prnode_t *pnp, uio_t *uiop)
+pr_read_sigact_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
proc_t *p;
@@ -2336,7 +2336,7 @@ out:
}
static int
-pr_read_auxv_32(prnode_t *pnp, uio_t *uiop)
+pr_read_auxv_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
auxv32_t auxv[__KERN_NAUXV_IMPL];
proc_t *p;
@@ -2371,7 +2371,7 @@ pr_read_auxv_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_usage_32(prnode_t *pnp, uio_t *uiop)
+pr_read_usage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prhusage_t *pup;
prusage32_t *upup;
@@ -2460,7 +2460,7 @@ out:
}
static int
-pr_read_lusage_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
int nlwp;
prhusage_t *pup;
@@ -2572,7 +2572,7 @@ pr_read_lusage_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop)
+pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
int error;
@@ -2602,7 +2602,7 @@ pr_read_pagedata_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop)
+pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
struct as *as;
@@ -2635,7 +2635,7 @@ pr_read_opagedata_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_watch_32(prnode_t *pnp, uio_t *uiop)
+pr_read_watch_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
int error;
@@ -2685,7 +2685,7 @@ pr_read_watch_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lwpstatus_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpstatus_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
lwpstatus32_t *sp;
proc_t *p;
@@ -2728,7 +2728,7 @@ out:
}
static int
-pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
lwpsinfo32_t lwpsinfo;
proc_t *p;
@@ -2773,7 +2773,7 @@ pr_read_lwpsinfo_32(prnode_t *pnp, uio_t *uiop)
}
static int
-pr_read_lwpusage_32(prnode_t *pnp, uio_t *uiop)
+pr_read_lwpusage_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
prhusage_t *pup;
prusage32_t *upup;
@@ -2824,7 +2824,7 @@ out:
}
static int
-pr_read_spymaster_32(prnode_t *pnp, uio_t *uiop)
+pr_read_spymaster_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
psinfo32_t psinfo;
int error;
@@ -2855,7 +2855,7 @@ pr_read_spymaster_32(prnode_t *pnp, uio_t *uiop)
#if defined(__sparc)
static int
-pr_read_gwindows_32(prnode_t *pnp, uio_t *uiop)
+pr_read_gwindows_32(prnode_t *pnp, uio_t *uiop, cred_t *cr)
{
proc_t *p;
kthread_t *t;
@@ -2921,11 +2921,11 @@ prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
* data. An ILP32 process will see ILP32 data.
*/
if (curproc->p_model == DATAMODEL_LP64)
- return (pr_read_function[pnp->pr_type](pnp, uiop));
+ return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
else
- return (pr_read_function_32[pnp->pr_type](pnp, uiop));
+ return (pr_read_function_32[pnp->pr_type](pnp, uiop, cr));
#else
- return (pr_read_function[pnp->pr_type](pnp, uiop));
+ return (pr_read_function[pnp->pr_type](pnp, uiop, cr));
#endif
}
diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c
index 1fa1617ec8..1065d86719 100644
--- a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c
+++ b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c
@@ -23,6 +23,10 @@
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <sys/atomic.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
@@ -243,16 +247,6 @@ sharefs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
return (EBUSY);
/*
- * Only allow an unmount iff there are no entries in memory.
- */
- rw_enter(&sharetab_lock, RW_READER);
- if (sharetab_size != 0) {
- rw_exit(&sharetab_lock);
- return (EBUSY);
- }
- rw_exit(&sharetab_lock);
-
- /*
* Release the last hold on the root vnode
*/
VN_RELE(data->sharefs_vfs_root);
diff --git a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c
index 2ca3f293a5..8e5a9a2cc7 100644
--- a/usr/src/uts/common/fs/sharefs/sharefs_vnops.c
+++ b/usr/src/uts/common/fs/sharefs/sharefs_vnops.c
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#include <fs/fs_subr.h>
#include <sys/errno.h>
@@ -45,7 +49,7 @@
* the shares enumerated.
*/
static int
-sharefs_snap_create(shnode_t *sft)
+sharefs_snap_create(sharetab_globals_t *sg, shnode_t *sft)
{
sharetab_t *sht;
share_t *sh;
@@ -53,16 +57,16 @@ sharefs_snap_create(shnode_t *sft)
int iCount = 0;
char *buf;
- rw_enter(&sharefs_lock, RW_WRITER);
- rw_enter(&sharetab_lock, RW_READER);
+ rw_enter(&sg->sharefs_lock, RW_WRITER);
+ rw_enter(&sg->sharetab_lock, RW_READER);
if (sft->sharefs_snap) {
/*
* Nothing has changed, so no need to grab a new copy!
*/
- if (sft->sharefs_generation == sharetab_generation) {
- rw_exit(&sharetab_lock);
- rw_exit(&sharefs_lock);
+ if (sft->sharefs_generation == sg->sharetab_generation) {
+ rw_exit(&sg->sharetab_lock);
+ rw_exit(&sg->sharefs_lock);
return (0);
}
@@ -71,12 +75,12 @@ sharefs_snap_create(shnode_t *sft)
sft->sharefs_snap = NULL;
}
- sft->sharefs_size = sharetab_size;
- sft->sharefs_count = sharetab_count;
+ sft->sharefs_size = sg->sharetab_size;
+ sft->sharefs_count = sg->sharetab_count;
if (sft->sharefs_size == 0) {
- rw_exit(&sharetab_lock);
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharetab_lock);
+ rw_exit(&sg->sharefs_lock);
return (0);
}
@@ -87,7 +91,7 @@ sharefs_snap_create(shnode_t *sft)
/*
* Walk the Sharetab, dumping each entry.
*/
- for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) {
+ for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
int i;
for (i = 0; i < SHARETAB_HASHES; i++) {
@@ -132,14 +136,14 @@ sharefs_snap_create(shnode_t *sft)
* We want to record the generation number and
* mtime inside this snapshot.
*/
- gethrestime(&sharetab_snap_time);
- sft->sharefs_snap_time = sharetab_snap_time;
- sft->sharefs_generation = sharetab_generation;
+ gethrestime(&sg->sharetab_snap_time);
+ sft->sharefs_snap_time = sg->sharetab_snap_time;
+ sft->sharefs_generation = sg->sharetab_generation;
ASSERT(iCount == sft->sharefs_count);
- rw_exit(&sharetab_lock);
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharetab_lock);
+ rw_exit(&sg->sharefs_lock);
return (0);
error_fault:
@@ -148,8 +152,8 @@ error_fault:
sft->sharefs_size = 0;
sft->sharefs_count = 0;
sft->sharefs_snap = NULL;
- rw_exit(&sharetab_lock);
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharetab_lock);
+ rw_exit(&sg->sharefs_lock);
return (EFAULT);
}
@@ -161,13 +165,14 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
{
timestruc_t now;
shnode_t *sft = VTOSH(vp);
+ sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
vap->va_type = VREG;
vap->va_mode = S_IRUSR | S_IRGRP | S_IROTH;
vap->va_nodeid = SHAREFS_INO_FILE;
vap->va_nlink = 1;
- rw_enter(&sharefs_lock, RW_READER);
+ rw_enter(&sg->sharefs_lock, RW_READER);
/*
* If we get asked about a snapped vnode, then
@@ -177,15 +182,15 @@ sharefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
* sharetab.
*/
if (sft->sharefs_real_vp) {
- rw_enter(&sharetab_lock, RW_READER);
- vap->va_size = sharetab_size;
- vap->va_mtime = sharetab_mtime;
- rw_exit(&sharetab_lock);
+ rw_enter(&sg->sharetab_lock, RW_READER);
+ vap->va_size = sg->sharetab_size;
+ vap->va_mtime = sg->sharetab_mtime;
+ rw_exit(&sg->sharetab_lock);
} else {
vap->va_size = sft->sharefs_size;
vap->va_mtime = sft->sharefs_snap_time;
}
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
gethrestime(&now);
vap->va_atime = vap->va_ctime = now;
@@ -259,7 +264,8 @@ sharefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
* are dumping an extremely huge sharetab, we make a copy
* of it here and use it to dump instead.
*/
- error = sharefs_snap_create(sft);
+ error = sharefs_snap_create(sharetab_get_globals(vp->v_vfsp->vfs_zone),
+ sft);
return (error);
}
@@ -270,11 +276,12 @@ sharefs_close(vnode_t *vp, int flag, int count,
offset_t off, cred_t *cr, caller_context_t *ct)
{
shnode_t *sft = VTOSH(vp);
+ sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
if (count > 1)
return (0);
- rw_enter(&sharefs_lock, RW_WRITER);
+ rw_enter(&sg->sharefs_lock, RW_WRITER);
if (vp->v_count == 1) {
if (sft->sharefs_snap != NULL) {
kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
@@ -284,7 +291,7 @@ sharefs_close(vnode_t *vp, int flag, int count,
}
}
atomic_dec_32(&sft->sharefs_refs);
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
return (0);
}
@@ -292,30 +299,31 @@ sharefs_close(vnode_t *vp, int flag, int count,
/* ARGSUSED */
static int
sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr,
- caller_context_t *ct)
+ caller_context_t *ct)
{
shnode_t *sft = VTOSH(vp);
off_t off = uio->uio_offset;
size_t len = uio->uio_resid;
int error = 0;
+ sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
- rw_enter(&sharefs_lock, RW_READER);
+ rw_enter(&sg->sharefs_lock, RW_READER);
/*
* First check to see if we need to grab a new snapshot.
*/
if (off == (off_t)0) {
- rw_exit(&sharefs_lock);
- error = sharefs_snap_create(sft);
+ rw_exit(&sg->sharefs_lock);
+ error = sharefs_snap_create(sg, sft);
if (error) {
return (EFAULT);
}
- rw_enter(&sharefs_lock, RW_READER);
+ rw_enter(&sg->sharefs_lock, RW_READER);
}
/* LINTED */
if (len <= 0 || off >= sft->sharefs_size) {
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
return (error);
}
@@ -323,7 +331,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr,
len = sft->sharefs_size - off;
if (off < 0 || len > sft->sharefs_size) {
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
return (EFAULT);
}
@@ -332,7 +340,7 @@ sharefs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr,
len, UIO_READ, uio);
}
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
return (error);
}
@@ -342,16 +350,17 @@ sharefs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *tx)
{
gfs_file_t *fp = vp->v_data;
shnode_t *sft;
+ sharetab_globals_t *sg = sharetab_get_globals(vp->v_vfsp->vfs_zone);
sft = (shnode_t *)gfs_file_inactive(vp);
if (sft) {
- rw_enter(&sharefs_lock, RW_WRITER);
+ rw_enter(&sg->sharefs_lock, RW_WRITER);
if (sft->sharefs_snap != NULL) {
kmem_free(sft->sharefs_snap, sft->sharefs_size + 1);
}
kmem_free(sft, fp->gfs_size);
- rw_exit(&sharefs_lock);
+ rw_exit(&sg->sharefs_lock);
}
}
diff --git a/usr/src/uts/common/fs/sharefs/sharetab.c b/usr/src/uts/common/fs/sharefs/sharetab.c
index 0f8543641c..d66c1aa85f 100644
--- a/usr/src/uts/common/fs/sharefs/sharetab.c
+++ b/usr/src/uts/common/fs/sharefs/sharetab.c
@@ -23,6 +23,11 @@
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright 2020 Joyent, Inc.
+ */
+
#include <sys/types.h>
#include <sys/types32.h>
#include <sys/param.h>
@@ -47,15 +52,13 @@
*/
#define SHARETAB_COPYIN(field) \
if (copyinstr(STRUCT_FGETP(u_sh, sh_##field), \
- buf, \
- bufsz + 1, /* Add one for extra NUL */ \
- &len)) { \
+ buf, \
+ bufsz + 1, /* Add one for extra NUL */ \
+ &len)) { \
error = EFAULT; \
goto cleanup; \
} \
- /* \
- * Need to remove 1 because copyinstr() counts the NUL. \
- */ \
+ /* Need to remove 1 because copyinstr() counts the NUL */ \
len--; \
sh->sh_##field = kmem_alloc(len + 1, KM_SLEEP); \
bcopy(buf, sh->sh_##field, len); \
@@ -64,24 +67,13 @@
sh->sh_size += shl.shl_##field; /* Debug counting */
#define SHARETAB_DELETE_FIELD(field) \
- if (sh->sh_##field) { \
+ if (sh->sh_##field != NULL) { \
kmem_free(sh->sh_##field, \
- shl ? shl->shl_##field + 1 : \
- strlen(sh->sh_##field) + 1); \
+ shl ? shl->shl_##field + 1 : \
+ strlen(sh->sh_##field) + 1); \
}
-sharetab_t *sharefs_sharetab = NULL; /* The incore sharetab. */
-size_t sharetab_size;
-uint_t sharetab_count;
-
-krwlock_t sharetab_lock; /* lock to protect the cached sharetab */
-
-krwlock_t sharefs_lock; /* lock to protect the vnode ops */
-
-timestruc_t sharetab_mtime;
-timestruc_t sharetab_snap_time;
-
-uint_t sharetab_generation; /* Only increments and wraps! */
+static zone_key_t sharetab_zone_key;
/*
* Take care of cleaning up a share.
@@ -91,7 +83,7 @@ uint_t sharetab_generation; /* Only increments and wraps! */
static void
sharefree(share_t *sh, sharefs_lens_t *shl)
{
- if (!sh)
+ if (sh == NULL)
return;
SHARETAB_DELETE_FIELD(path);
@@ -100,7 +92,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl)
SHARETAB_DELETE_FIELD(opts);
SHARETAB_DELETE_FIELD(descr);
- kmem_free(sh, sizeof (share_t));
+ kmem_free(sh, sizeof (*sh));
}
/*
@@ -108,7 +100,7 @@ sharefree(share_t *sh, sharefs_lens_t *shl)
* cleaning up the memory associated with the share argument.
*/
static int
-sharefs_remove(share_t *sh, sharefs_lens_t *shl)
+sharefs_remove(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl)
{
int iHash;
sharetab_t *sht;
@@ -118,23 +110,22 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl)
if (!sh)
return (ENOENT);
- rw_enter(&sharetab_lock, RW_WRITER);
- for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) {
- if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) {
+ rw_enter(&sg->sharetab_lock, RW_WRITER);
+ for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
+ if (strcmp(sh->sh_fstype, sht->s_fstype) == 0)
break;
- }
}
/*
* There does not exist a fstype in memory which
* matches the share passed in.
*/
- if (!sht) {
- rw_exit(&sharetab_lock);
+ if (sht == NULL) {
+ rw_exit(&sg->sharetab_lock);
return (ENOENT);
}
- iPath = shl ? shl->shl_path : strlen(sh->sh_path);
+ iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path);
iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path));
/*
@@ -147,22 +138,21 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl)
*/
if (strcmp(sh->sh_path, s->sh_path) == 0 &&
strlen(s->sh_path) == iPath) {
- if (p) {
+ if (p != NULL)
p->sh_next = s->sh_next;
- } else {
+ else
sht->s_buckets[iHash].ssh_sh = s->sh_next;
- }
ASSERT(sht->s_buckets[iHash].ssh_count != 0);
atomic_dec_32(&sht->s_buckets[iHash].ssh_count);
atomic_dec_32(&sht->s_count);
- atomic_dec_32(&sharetab_count);
+ atomic_dec_32(&sg->sharetab_count);
- ASSERT(sharetab_size >= s->sh_size);
- sharetab_size -= s->sh_size;
+ ASSERT(sg->sharetab_size >= s->sh_size);
+ sg->sharetab_size -= s->sh_size;
- gethrestime(&sharetab_mtime);
- atomic_inc_32(&sharetab_generation);
+ gethrestime(&sg->sharetab_mtime);
+ atomic_inc_32(&sg->sharetab_generation);
break;
}
@@ -170,18 +160,15 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl)
p = s;
}
- rw_exit(&sharetab_lock);
+ rw_exit(&sg->sharetab_lock);
- if (!s) {
+ if (s == NULL)
return (ENOENT);
- }
s->sh_next = NULL;
sharefree(s, NULL);
- /*
- * We need to free the share for the caller.
- */
+ /* We need to free the share for the caller */
sharefree(sh, shl);
return (0);
@@ -191,7 +178,7 @@ sharefs_remove(share_t *sh, sharefs_lens_t *shl)
* The caller must have allocated memory for us to use.
*/
static int
-sharefs_add(share_t *sh, sharefs_lens_t *shl)
+sharefs_add(sharetab_globals_t *sg, share_t *sh, sharefs_lens_t *shl)
{
int iHash;
sharetab_t *sht;
@@ -199,41 +186,31 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl)
int iPath;
int n;
- if (!sh) {
+ if (sh == NULL)
return (ENOENT);
- }
- /*
- * We need to find the hash buckets for the fstype.
- */
- rw_enter(&sharetab_lock, RW_WRITER);
- for (sht = sharefs_sharetab; sht != NULL; sht = sht->s_next) {
- if (strcmp(sh->sh_fstype, sht->s_fstype) == 0) {
+ /* We need to find the hash buckets for the fstype */
+ rw_enter(&sg->sharetab_lock, RW_WRITER);
+ for (sht = sg->sharefs_sharetab; sht != NULL; sht = sht->s_next) {
+ if (strcmp(sh->sh_fstype, sht->s_fstype) == 0)
break;
- }
}
- /*
- * Did not exist, so allocate one and add it to the
- * sharetab.
- */
- if (!sht) {
+ /* Did not exist, so allocate one and add it to the sharetab */
+ if (sht == NULL) {
sht = kmem_zalloc(sizeof (*sht), KM_SLEEP);
n = strlen(sh->sh_fstype);
sht->s_fstype = kmem_zalloc(n + 1, KM_SLEEP);
(void) strncpy(sht->s_fstype, sh->sh_fstype, n);
- sht->s_next = sharefs_sharetab;
- sharefs_sharetab = sht;
+ sht->s_next = sg->sharefs_sharetab;
+ sg->sharefs_sharetab = sht;
}
- /*
- * Now we need to find where we have to add the entry.
- */
+ /* Now we need to find where we have to add the entry */
+ iPath = shl != NULL ? shl->shl_path : strlen(sh->sh_path);
iHash = pkp_tab_hash(sh->sh_path, strlen(sh->sh_path));
- iPath = shl ? shl->shl_path : strlen(sh->sh_path);
-
if (shl) {
sh->sh_size = shl->shl_path + shl->shl_res +
shl->shl_fstype + shl->shl_opts + shl->shl_descr;
@@ -243,15 +220,10 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl)
strlen(sh->sh_opts) + strlen(sh->sh_descr);
}
- /*
- * We need to account for field seperators and
- * the EOL.
- */
+ /* We need to account for field separators and the EOL */
sh->sh_size += 5;
- /*
- * Now walk down the hash table and add the new entry!
- */
+ /* Now walk down the hash table and add the new entry */
for (p = NULL, s = sht->s_buckets[iHash].ssh_sh;
s != NULL; s = s->sh_next) {
/*
@@ -263,28 +235,25 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl)
*/
if (strcmp(sh->sh_path, s->sh_path) == 0 &&
strlen(s->sh_path) == iPath) {
- if (p) {
+ if (p != NULL)
p->sh_next = sh;
- } else {
+ else
sht->s_buckets[iHash].ssh_sh = sh;
- }
sh->sh_next = s->sh_next;
- ASSERT(sharetab_size >= s->sh_size);
- sharetab_size -= s->sh_size;
- sharetab_size += sh->sh_size;
+ ASSERT(sg->sharetab_size >= s->sh_size);
+ sg->sharetab_size -= s->sh_size;
+ sg->sharetab_size += sh->sh_size;
- /*
- * Get rid of the old node.
- */
+ /* Get rid of the old node */
sharefree(s, NULL);
- gethrestime(&sharetab_mtime);
- atomic_inc_32(&sharetab_generation);
+ gethrestime(&sg->sharetab_mtime);
+ atomic_inc_32(&sg->sharetab_generation);
ASSERT(sht->s_buckets[iHash].ssh_count != 0);
- rw_exit(&sharetab_lock);
+ rw_exit(&sg->sharetab_lock);
return (0);
}
@@ -300,29 +269,82 @@ sharefs_add(share_t *sh, sharefs_lens_t *shl)
sht->s_buckets[iHash].ssh_sh = sh;
atomic_inc_32(&sht->s_buckets[iHash].ssh_count);
atomic_inc_32(&sht->s_count);
- atomic_inc_32(&sharetab_count);
- sharetab_size += sh->sh_size;
+ atomic_inc_32(&sg->sharetab_count);
+ sg->sharetab_size += sh->sh_size;
- gethrestime(&sharetab_mtime);
- atomic_inc_32(&sharetab_generation);
+ gethrestime(&sg->sharetab_mtime);
+ atomic_inc_32(&sg->sharetab_generation);
- rw_exit(&sharetab_lock);
+ rw_exit(&sg->sharetab_lock);
return (0);
}
+/* ARGSUSED */
+static void *
+sharetab_zone_init(zoneid_t zoneid)
+{
+ sharetab_globals_t *sg;
+
+ sg = kmem_zalloc(sizeof (*sg), KM_SLEEP);
+
+ rw_init(&sg->sharetab_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&sg->sharefs_lock, NULL, RW_DEFAULT, NULL);
+
+ sg->sharetab_size = 0;
+ sg->sharetab_count = 0;
+ sg->sharetab_generation = 1;
+
+ gethrestime(&sg->sharetab_mtime);
+ gethrestime(&sg->sharetab_snap_time);
+
+ return (sg);
+}
+
+/* ARGSUSED */
+static void
+sharetab_zone_fini(zoneid_t zoneid, void *data)
+{
+ sharetab_globals_t *sg = data;
+
+ rw_destroy(&sg->sharefs_lock);
+ rw_destroy(&sg->sharetab_lock);
+
+ /* ALL of the allocated things must be cleaned before we free sg. */
+ while (sg->sharefs_sharetab != NULL) {
+ int i;
+ sharetab_t *freeing = sg->sharefs_sharetab;
+
+ sg->sharefs_sharetab = freeing->s_next;
+ kmem_free(freeing->s_fstype, strlen(freeing->s_fstype) + 1);
+ for (i = 0; i < PKP_HASH_SIZE; i++) {
+ sharefs_hash_head_t *bucket;
+
+ bucket = &(freeing->s_buckets[i]);
+ while (bucket->ssh_sh != NULL) {
+ share_t *share = bucket->ssh_sh;
+
+ bucket->ssh_sh = share->sh_next;
+ sharefree(share, NULL);
+ }
+ }
+ kmem_free(freeing, sizeof (*freeing));
+ }
+
+ kmem_free(sg, sizeof (*sg));
+}
+
void
sharefs_sharetab_init(void)
{
- rw_init(&sharetab_lock, NULL, RW_DEFAULT, NULL);
- rw_init(&sharefs_lock, NULL, RW_DEFAULT, NULL);
-
- sharetab_size = 0;
- sharetab_count = 0;
- sharetab_generation = 1;
+ zone_key_create(&sharetab_zone_key, sharetab_zone_init,
+ NULL, sharetab_zone_fini);
+}
- gethrestime(&sharetab_mtime);
- gethrestime(&sharetab_snap_time);
+sharetab_globals_t *
+sharetab_get_globals(zone_t *zone)
+{
+ return (zone_getspecific(sharetab_zone_key, zone));
}
int
@@ -332,12 +354,10 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
size_t len;
size_t bufsz;
share_t *sh;
-
sharefs_lens_t shl;
-
model_t model;
-
char *buf = NULL;
+ sharetab_globals_t *sg = sharetab_get_globals(curzone);
STRUCT_DECL(share, u_sh);
@@ -347,20 +367,20 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
* Before we do anything, lets make sure we have
* a sharetab in memory if we need one.
*/
- rw_enter(&sharetab_lock, RW_READER);
+ rw_enter(&sg->sharetab_lock, RW_READER);
switch (opcode) {
- case (SHAREFS_REMOVE) :
- case (SHAREFS_REPLACE) :
- if (!sharefs_sharetab) {
- rw_exit(&sharetab_lock);
+ case SHAREFS_REMOVE:
+ case SHAREFS_REPLACE:
+ if (!sg->sharefs_sharetab) {
+ rw_exit(&sg->sharetab_lock);
return (set_errno(ENOENT));
}
break;
- case (SHAREFS_ADD) :
- default :
+ case SHAREFS_ADD:
+ default:
break;
}
- rw_exit(&sharetab_lock);
+ rw_exit(&sg->sharetab_lock);
model = get_udatamodel();
@@ -368,49 +388,37 @@ sharefs_impl(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
* Initialize the data pointers.
*/
STRUCT_INIT(u_sh, model);
- if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh))) {
+ if (copyin(sh_in, STRUCT_BUF(u_sh), STRUCT_SIZE(u_sh)))
return (set_errno(EFAULT));
- }
- /*
- * Get the share.
- */
+ /* Get the share */
sh = kmem_zalloc(sizeof (share_t), KM_SLEEP);
- /*
- * Get some storage for copying in the strings.
- */
+ /* Get some storage for copying in the strings */
buf = kmem_zalloc(bufsz + 1, KM_SLEEP);
bzero(&shl, sizeof (sharefs_lens_t));
- /*
- * Only grab these two until we know what we want.
- */
+ /* Only grab these two until we know what we want */
SHARETAB_COPYIN(path);
SHARETAB_COPYIN(fstype);
switch (opcode) {
- case (SHAREFS_ADD) :
- case (SHAREFS_REPLACE) :
+ case SHAREFS_ADD:
+ case SHAREFS_REPLACE:
SHARETAB_COPYIN(res);
SHARETAB_COPYIN(opts);
SHARETAB_COPYIN(descr);
-
- error = sharefs_add(sh, &shl);
+ error = sharefs_add(sg, sh, &shl);
break;
-
- case (SHAREFS_REMOVE) :
-
- error = sharefs_remove(sh, &shl);
+ case SHAREFS_REMOVE:
+ error = sharefs_remove(sg, sh, &shl);
break;
-
default:
error = EINVAL;
break;
}
cleanup:
-
/*
* If there is no error, then we have stashed the structure
* away in the sharetab hash table or have deleted it.
@@ -418,22 +426,38 @@ cleanup:
* Either way, the only reason to blow away the data is if
* there was an error.
*/
- if (error != 0) {
+ if (error != 0)
sharefree(sh, &shl);
- }
- if (buf) {
+ if (buf != NULL)
kmem_free(buf, bufsz + 1);
- }
- return ((error != 0) ? set_errno(error) : 0);
+ return (error != 0 ? set_errno(error) : 0);
}
int
sharefs(enum sharefs_sys_op opcode, share_t *sh_in, uint32_t iMaxLen)
{
- if (secpolicy_sys_config(CRED(), B_FALSE) != 0)
- return (set_errno(EPERM));
+ /*
+ * If we're in the global zone PRIV_SYS_CONFIG gives us the
+ * privileges needed to act on sharetab. However if we're in
+ * a non-global zone PRIV_SYS_CONFIG is not allowed. To work
+ * around this issue PRIV_SYS_NFS is used in this case.
+ *
+ * TODO: This basically overloads the definition/use of
+ * PRIV_SYS_NFS to work around the limitation of PRIV_SYS_CONFIG
+ * in a zone. Solaris 11 solved this by implementing a PRIV_SYS_SHARE
+ * we should do the same and replace the use of PRIV_SYS_NFS here and
+ * in zfs_secpolicy_share.
+ */
+ if (INGLOBALZONE(curproc)) {
+ if (secpolicy_sys_config(CRED(), B_FALSE) != 0)
+ return (set_errno(EPERM));
+ } else {
+ /* behave like zfs_secpolicy_share() */
+ if (secpolicy_nfs(CRED()) != 0)
+ return (set_errno(EPERM));
+ }
return (sharefs_impl(opcode, sh_in, iMaxLen));
}
diff --git a/usr/src/uts/common/fs/sockfs/nl7curi.c b/usr/src/uts/common/fs/sockfs/nl7curi.c
index a8d46d0fe0..0c8276d227 100644
--- a/usr/src/uts/common/fs/sockfs/nl7curi.c
+++ b/usr/src/uts/common/fs/sockfs/nl7curi.c
@@ -1140,10 +1140,6 @@ nl7c_data(struct sonode *so, uio_t *uio)
alloc = kmem_alloc(sz, KM_SLEEP);
URI_RD_ADD(uri, rdp, sz, -1);
- if (rdp == NULL) {
- error = ENOMEM;
- goto fail;
- }
if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
uri_delete(uri);
@@ -1376,10 +1372,6 @@ nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
fp = NULL;
}
URI_RD_ADD(uri, rdp, cnt, -1);
- if (rdp == NULL) {
- error = ENOMEM;
- goto fail;
- }
data = alloc;
alloc = NULL;
rdp->data.kmem = data;
@@ -1405,10 +1397,6 @@ nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
if (len > cnt) {
/* More file data so add it */
URI_RD_ADD(uri, rdp, len - cnt, off);
- if (rdp == NULL) {
- error = ENOMEM;
- goto fail;
- }
rdp->data.vnode = vp;
/* Send vnode data out the connection */
diff --git a/usr/src/uts/common/fs/sockfs/sockfilter.c b/usr/src/uts/common/fs/sockfs/sockfilter.c
index 7dca6ae6fc..ea161e30ae 100644
--- a/usr/src/uts/common/fs/sockfs/sockfilter.c
+++ b/usr/src/uts/common/fs/sockfs/sockfilter.c
@@ -85,13 +85,13 @@
* sof_module_list -> sof_module_t -> ... -> sof_module_t
*/
-static list_t sof_entry_list; /* list of configured filters */
+static list_t sof_entry_list; /* list of configured filters */
static list_t sof_module_list; /* list of loaded filter modules */
static kmutex_t sof_module_lock; /* protect the module list */
static sof_kstat_t sof_stat;
-static kstat_t *sof_stat_ksp;
+static kstat_t *sof_stat_ksp;
#ifdef DEBUG
static int socket_filter_debug = 0;
@@ -117,15 +117,15 @@ kmutex_t sof_close_deferred_lock;
static void sof_close_deferred(void *);
static void sof_module_rele(sof_module_t *);
-static sof_module_t *sof_module_hold_by_name(const char *, const char *);
+static sof_module_t *sof_module_hold_by_name(const char *, const char *);
static int sof_entry_load_module(sof_entry_t *);
-static void sof_entry_hold(sof_entry_t *);
-static void sof_entry_rele(sof_entry_t *);
-static int sof_entry_kstat_create(sof_entry_t *);
-static void sof_entry_kstat_destroy(sof_entry_t *);
+static void sof_entry_hold(sof_entry_t *);
+static void sof_entry_rele(sof_entry_t *);
+static int sof_entry_kstat_create(sof_entry_t *);
+static void sof_entry_kstat_destroy(sof_entry_t *);
-static sof_instance_t *sof_instance_create(sof_entry_t *, struct sonode *);
+static sof_instance_t *sof_instance_create(sof_entry_t *, struct sonode *);
static void sof_instance_destroy(sof_instance_t *);
static int
@@ -1093,9 +1093,10 @@ sof_entry_proc_sockparams(sof_entry_t *ent, struct sockparams *sp)
fil != NULL;
fil = list_next(&sp->sp_auto_filters, fil)) {
if (strncmp(ent->sofe_hintarg,
- fil->spf_filter->sofe_name,
- SOF_MAXNAMELEN) == 0)
- break;
+ fil->spf_filter->sofe_name, SOF_MAXNAMELEN)
+ == 0) {
+ break;
+ }
}
if (fil != NULL) {
diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c
index cbb8eccd23..b8d83105e8 100644
--- a/usr/src/uts/common/fs/sockfs/socktpi.c
+++ b/usr/src/uts/common/fs/sockfs/socktpi.c
@@ -4811,8 +4811,8 @@ sodgram_direct(struct sonode *so, struct sockaddr *name,
if (auditing)
audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0);
- udp_wput(udp_wq, mp);
- return (0);
+ /* Always returns 0... */
+ return (udp_wput(udp_wq, mp));
}
ASSERT(mpdata == NULL);
@@ -4874,8 +4874,8 @@ sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
}
mp = newmp;
}
- tcp_wput(tcp_wq, mp);
- return (0);
+ /* Always returns 0... */
+ return (tcp_wput(tcp_wq, mp));
}
/* Fallback to strwrite() to do proper error handling */
@@ -4925,7 +4925,7 @@ sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr)
}
mp = newmp;
}
- tcp_wput(tcp_wq, mp);
+ (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */
wflag |= NOINTR;
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 2ad4e77398..9947bedf54 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -385,6 +385,7 @@ typedef struct zinject_record {
#define ZINJECT_NULL 0x1
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
+#define ZINJECT_CALC_RANGE 0x8
#define ZI_NO_DVA (-1)
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index b24d83496c..2b4c1d55e7 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -21,10 +21,13 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
* Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
* Portions Copyright 2011 Martin Matuska
* Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
@@ -788,9 +791,6 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int
zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- if (!INGLOBALZONE(curproc))
- return (SET_ERROR(EPERM));
-
if (secpolicy_nfs(cr) == 0) {
return (0);
} else {
@@ -801,9 +801,6 @@ zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int
zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- if (!INGLOBALZONE(curproc))
- return (SET_ERROR(EPERM));
-
if (secpolicy_smb(cr) == 0) {
return (0);
} else {
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index f13fb18c16..a65721d175 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -45,9 +45,10 @@
#include <sys/zfs_ioctl.h>
#include <sys/vdev_impl.h>
#include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
#include <sys/fs/zfs.h>
-uint32_t zio_injection_enabled;
+uint32_t zio_injection_enabled = 0;
/*
* Data describing each zinject handler registered on the system, and
@@ -623,6 +624,63 @@ zio_handle_io_delay(zio_t *zio)
return (min_target);
}
+static int
+zio_calculate_range(const char *pool, zinject_record_t *record)
+{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds;
+ objset_t *os = NULL;
+ dnode_t *dn = NULL;
+ int error;
+
+ /*
+ * Obtain the dnode for object using pool, objset, and object
+ */
+ error = dsl_pool_hold(pool, FTAG, &dp);
+ if (error)
+ return (error);
+
+ error = dsl_dataset_hold_obj(dp, record->zi_objset, FTAG, &ds);
+ dsl_pool_rele(dp, FTAG);
+ if (error)
+ return (error);
+
+ error = dmu_objset_from_ds(ds, &os);
+ dsl_dataset_rele(ds, FTAG);
+ if (error)
+ return (error);
+
+ error = dnode_hold(os, record->zi_object, FTAG, &dn);
+ if (error)
+ return (error);
+
+ /*
+ * Translate the range into block IDs
+ */
+ if (record->zi_start != 0 || record->zi_end != -1ULL) {
+ record->zi_start >>= dn->dn_datablkshift;
+ record->zi_end >>= dn->dn_datablkshift;
+ }
+ if (record->zi_level > 0) {
+ if (record->zi_level >= dn->dn_nlevels) {
+ dnode_rele(dn, FTAG);
+ return (SET_ERROR(EDOM));
+ }
+
+ if (record->zi_start != 0 || record->zi_end != 0) {
+ int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+ for (int level = record->zi_level; level > 0; level--) {
+ record->zi_start >>= shift;
+ record->zi_end >>= shift;
+ }
+ }
+ }
+
+ dnode_rele(dn, FTAG);
+ return (0);
+}
+
/*
* Create a new handler for the given record. We add it to the list, adding
* a reference to the spa_t in the process. We increment zio_injection_enabled,
@@ -662,6 +720,15 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
return (SET_ERROR(EINVAL));
}
+ /*
+ * If the supplied range was in bytes -- calculate the actual blkid
+ */
+ if (flags & ZINJECT_CALC_RANGE) {
+ error = zio_calculate_range(name, record);
+ if (error != 0)
+ return (error);
+ }
+
if (!(flags & ZINJECT_NULL)) {
/*
* spa_inject_ref() will add an injection reference, which will
diff --git a/usr/src/uts/common/klm/klmmod.c b/usr/src/uts/common/klm/klmmod.c
index 58e0f2d874..02a0b49e66 100644
--- a/usr/src/uts/common/klm/klmmod.c
+++ b/usr/src/uts/common/klm/klmmod.c
@@ -93,6 +93,7 @@ lm_zone_init(zoneid_t zoneid)
g->lockd_pid = 0;
g->run_status = NLM_ST_DOWN;
+ g->nlm_zoneid = zoneid;
nlm_globals_register(g);
return (g);
@@ -104,6 +105,8 @@ lm_zone_fini(zoneid_t zoneid, void *data)
{
struct nlm_globals *g = data;
+ nlm_globals_unregister(g);
+
ASSERT(avl_is_empty(&g->nlm_hosts_tree));
avl_destroy(&g->nlm_hosts_tree);
mod_hash_destroy_idhash(g->nlm_hosts_hash);
@@ -114,7 +117,6 @@ lm_zone_fini(zoneid_t zoneid, void *data)
cv_destroy(&g->nlm_gc_finish_cv);
mutex_destroy(&g->clean_lock);
- nlm_globals_unregister(g);
kmem_free(g, sizeof (*g));
}
diff --git a/usr/src/uts/common/klm/nlm_impl.c b/usr/src/uts/common/klm/nlm_impl.c
index e787f70ebd..cbba11f6ed 100644
--- a/usr/src/uts/common/klm/nlm_impl.c
+++ b/usr/src/uts/common/klm/nlm_impl.c
@@ -849,7 +849,7 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
* statd using the status monitor protocol.
*/
error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
- 0, NLM_RPC_RETRIES, kcred, &nsm->ns_handle);
+ 0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle);
if (error != 0)
goto error;
@@ -858,7 +858,8 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
* local statd using the address registration protocol.
*/
error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
- NSM_ADDR_V1, 0, NLM_RPC_RETRIES, kcred, &nsm->ns_addr_handle);
+ NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(),
+ &nsm->ns_addr_handle);
if (error != 0)
goto error;
@@ -867,8 +868,11 @@ nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
error:
kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
- if (nsm->ns_handle)
+ if (nsm->ns_handle) {
+ ASSERT(nsm->ns_handle->cl_auth != NULL);
+ auth_destroy(nsm->ns_handle->cl_auth);
CLNT_DESTROY(nsm->ns_handle);
+ }
return (error);
}
@@ -877,8 +881,12 @@ static void
nlm_nsm_fini(struct nlm_nsm *nsm)
{
kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
+ if (nsm->ns_addr_handle->cl_auth != NULL)
+ auth_destroy(nsm->ns_addr_handle->cl_auth);
CLNT_DESTROY(nsm->ns_addr_handle);
nsm->ns_addr_handle = NULL;
+ if (nsm->ns_handle->cl_auth != NULL)
+ auth_destroy(nsm->ns_handle->cl_auth);
CLNT_DESTROY(nsm->ns_handle);
nsm->ns_handle = NULL;
sema_destroy(&nsm->ns_sem);
@@ -2589,14 +2597,17 @@ nlm_vp_active(const vnode_t *vp)
* on them.
*/
void
-nlm_unexport(struct exportinfo *exi)
+nlm_zone_unexport(struct nlm_globals *g, struct exportinfo *exi)
{
- struct nlm_globals *g;
struct nlm_host *hostp;
- g = zone_getspecific(nlm_zone_key, curzone);
-
mutex_enter(&g->lock);
+ if (g->run_status != NLM_ST_UP) {
+ /* nothing to do */
+ mutex_exit(&g->lock);
+ return;
+ }
+
hostp = avl_first(&g->nlm_hosts_tree);
while (hostp != NULL) {
struct nlm_vhold *nvp;
@@ -2644,6 +2655,28 @@ nlm_unexport(struct exportinfo *exi)
mutex_exit(&g->lock);
}
+void
+nlm_unexport(struct exportinfo *exi)
+{
+ struct nlm_globals *g;
+
+ rw_enter(&lm_lck, RW_READER);
+ TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) {
+ if (g->nlm_zoneid == exi->exi_zoneid) {
+ /*
+ * NOTE: If we want to drop lm_lock before
+ * calling nlm_zone_unexport(), we should break,
+ * and have a post-rw_exit() snippit like:
+ * if (g != NULL)
+ * nlm_zone_unexport(g, exi);
+ */
+ nlm_zone_unexport(g, exi);
+ break; /* Only going to match once! */
+ }
+ }
+ rw_exit(&lm_lck);
+}
+
/*
* Allocate new unique sysid.
* In case of failure (no available sysids)
@@ -2812,7 +2845,7 @@ void
nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
{
(void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
- NLM_RPC_RETRIES, kcred);
+ NLM_RPC_RETRIES, zone_kcred());
}
void
diff --git a/usr/src/uts/common/klm/nlm_impl.h b/usr/src/uts/common/klm/nlm_impl.h
index 2ac711f3c7..84c6d47075 100644
--- a/usr/src/uts/common/klm/nlm_impl.h
+++ b/usr/src/uts/common/klm/nlm_impl.h
@@ -460,7 +460,7 @@ struct nlm_globals {
int cn_idle_tmo; /* (z) */
int grace_period; /* (z) */
int retrans_tmo; /* (z) */
- boolean_t nlm_v4_only; /* (z) */
+ zoneid_t nlm_zoneid; /* (c) */
kmutex_t clean_lock; /* (c) */
TAILQ_ENTRY(nlm_globals) nlm_link; /* (g) */
};
diff --git a/usr/src/uts/common/nfs/export.h b/usr/src/uts/common/nfs/export.h
index 60c6320db7..41bd4ab74f 100644
--- a/usr/src/uts/common/nfs/export.h
+++ b/usr/src/uts/common/nfs/export.h
@@ -23,6 +23,7 @@
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 Jason King.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -37,6 +38,11 @@
#include <nfs/nfs4.h>
#include <sys/kiconv.h>
#include <sys/avl.h>
+#include <sys/zone.h>
+
+#ifdef _KERNEL
+#include <sys/pkp_hash.h> /* for PKP_HASH_SIZE */
+#endif /* _KERNEL */
#ifdef __cplusplus
extern "C" {
@@ -467,19 +473,25 @@ typedef struct treenode {
} treenode_t;
/*
- * TREE_ROOT checks if the node corresponds to a filesystem root
+ * Now that we have links to chase, we can get the zone rootvp just from
+ * an export. No current-zone-context needed.
+ */
+#define EXI_TO_ZONEROOTVP(exi) ((exi)->exi_ne->exi_root->exi_vp)
+
+/*
+ * TREE_ROOT checks if the node corresponds to a filesystem root or
+ * the zone's root directory.
* TREE_EXPORTED checks if the node is explicitly shared
*/
#define TREE_ROOT(t) \
- ((t)->tree_exi && (t)->tree_exi->exi_vp->v_flag & VROOT)
+ ((t)->tree_exi != NULL && \
+ (((t)->tree_exi->exi_vp->v_flag & VROOT) || \
+ VN_CMP(EXI_TO_ZONEROOTVP((t)->tree_exi), (t)->tree_exi->exi_vp)))
#define TREE_EXPORTED(t) \
((t)->tree_exi && !PSEUDO((t)->tree_exi))
-/* Root of nfs pseudo namespace */
-extern treenode_t *ns_root;
-
#define EXPTABLESIZE 256
struct exp_hash {
@@ -517,6 +529,7 @@ struct exportinfo {
krwlock_t exi_cache_lock;
kmutex_t exi_lock;
uint_t exi_count;
+ zoneid_t exi_zoneid;
vnode_t *exi_vp;
vnode_t *exi_dvp;
avl_tree_t *exi_cache[AUTH_TABLESIZE];
@@ -525,11 +538,20 @@ struct exportinfo {
struct charset_cache *exi_charset;
unsigned exi_volatile_dev:1;
unsigned exi_moved:1;
+ int exi_id;
+ avl_node_t exi_id_link;
+ /*
+ * Soft-reference/backpointer to zone's nfs_export_t.
+ * This allows us access to the zone's rootvp (stored in
+ * exi_ne->exi_root->exi_vp) even if the current thread isn't in
+ * same-zone context.
+ */
+ struct nfs_export *exi_ne;
#ifdef VOLATILE_FH_TEST
uint32_t exi_volatile_id;
struct ex_vol_rename *exi_vol_rename;
kmutex_t exi_vol_rename_lock;
-#endif /* VOLATILE_FH_TEST */
+#endif /* VOLATILE_FH_TEST -- keep last! */
};
typedef struct exportinfo exportinfo_t;
@@ -608,8 +630,12 @@ extern int nfsauth4_secinfo_access(struct exportinfo *,
struct svc_req *, int, int, cred_t *);
extern int nfsauth_cache_clnt_compar(const void *, const void *);
extern int nfs_fhbcmp(char *, char *, int);
-extern int nfs_exportinit(void);
+extern void nfs_exportinit(void);
extern void nfs_exportfini(void);
+extern void nfs_export_zone_init(nfs_globals_t *);
+extern void nfs_export_zone_fini(nfs_globals_t *);
+extern void nfs_export_zone_shutdown(nfs_globals_t *);
+extern int nfs_export_get_rootfh(nfs_globals_t *);
extern int chk_clnt_sec(struct exportinfo *, struct svc_req *);
extern int makefh(fhandle_t *, struct vnode *, struct exportinfo *);
extern int makefh_ol(fhandle_t *, struct exportinfo *, uint_t);
@@ -625,32 +651,61 @@ extern struct exportinfo *nfs_vptoexi(vnode_t *, vnode_t *, cred_t *, int *,
int *, bool_t);
extern int nfs_check_vpexi(vnode_t *, vnode_t *, cred_t *,
struct exportinfo **);
-extern void export_link(struct exportinfo *);
-extern void export_unlink(struct exportinfo *);
-extern vnode_t *untraverse(vnode_t *);
+extern vnode_t *untraverse(vnode_t *, vnode_t *);
extern int vn_is_nfs_reparse(vnode_t *, cred_t *);
extern int client_is_downrev(struct svc_req *);
extern char *build_symlink(vnode_t *, cred_t *, size_t *);
+extern fhandle_t nullfh2; /* for comparing V2 filehandles */
+
+typedef struct nfs_export {
+ /* Root of nfs pseudo namespace */
+ treenode_t *ns_root;
+
+ nfs_globals_t *ne_globals; /* "up" pointer */
+
+ struct exportinfo *exptable_path_hash[PKP_HASH_SIZE];
+ struct exportinfo *exptable[EXPTABLESIZE];
+
+ /*
+ * Read/Write lock that protects the exportinfo list. This lock
+ * must be held when searching or modifiying the exportinfo list.
+ */
+ krwlock_t exported_lock;
+
+ /* "public" and default (root) location for public filehandle */
+ struct exportinfo *exi_public;
+ struct exportinfo *exi_root;
+ /* For checking default public file handle */
+ fid_t exi_rootfid;
+ /* For comparing V2 filehandles */
+ fhandle_t nullfh2;
+
+ /* The change attribute value of the root of nfs pseudo namespace */
+ timespec_t ns_root_change;
+} nfs_export_t;
+
/*
* Functions that handle the NFSv4 server namespace
*/
extern exportinfo_t *vis2exi(treenode_t *);
extern int treeclimb_export(struct exportinfo *);
-extern void treeclimb_unexport(struct exportinfo *);
+extern void treeclimb_unexport(nfs_export_t *, struct exportinfo *);
extern int nfs_visible(struct exportinfo *, vnode_t *, int *);
extern int nfs_visible_inode(struct exportinfo *, ino64_t,
- struct exp_visible **);
+ struct exp_visible **);
extern int has_visible(struct exportinfo *, vnode_t *);
extern void free_visible(struct exp_visible *);
extern int nfs_exported(struct exportinfo *, vnode_t *);
-extern struct exportinfo *pseudo_exportfs(vnode_t *, fid_t *,
- struct exp_visible *, struct exportdata *);
+extern struct exportinfo *pseudo_exportfs(nfs_export_t *, vnode_t *, fid_t *,
+ struct exp_visible *, struct exportdata *);
extern int vop_fid_pseudo(vnode_t *, fid_t *);
extern int nfs4_vget_pseudo(struct exportinfo *, vnode_t **, fid_t *);
extern bool_t nfs_visible_change(struct exportinfo *, vnode_t *,
- timespec_t *);
-extern void tree_update_change(treenode_t *, timespec_t *);
+ timespec_t *);
+extern void tree_update_change(nfs_export_t *, treenode_t *, timespec_t *);
+extern void rfs4_clean_state_exi(nfs_export_t *, struct exportinfo *);
+
/*
* Functions that handle the NFSv4 server namespace security flavors
* information.
@@ -658,13 +713,16 @@ extern void tree_update_change(treenode_t *, timespec_t *);
extern void srv_secinfo_exp2pseu(struct exportdata *, struct exportdata *);
extern void srv_secinfo_list_free(struct secinfo *, int);
+extern nfs_export_t *nfs_get_export();
+extern void export_link(nfs_export_t *, struct exportinfo *);
+extern void export_unlink(nfs_export_t *, struct exportinfo *);
+
/*
- * "public" and default (root) location for public filehandle
+ * exi_id support
*/
-extern struct exportinfo *exi_public, *exi_root;
-extern fhandle_t nullfh2; /* for comparing V2 filehandles */
-extern krwlock_t exported_lock;
-extern struct exportinfo *exptable[];
+extern kmutex_t nfs_exi_id_lock;
+extern avl_tree_t exi_id_tree;
+extern int exi_id_get_next(void);
/*
* Two macros for identifying public filehandles.
diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h
index e26025b003..6f76fc93ea 100644
--- a/usr/src/uts/common/nfs/nfs.h
+++ b/usr/src/uts/common/nfs/nfs.h
@@ -20,19 +20,20 @@
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
+ */
+
#ifndef _NFS_NFS_H
#define _NFS_NFS_H
-/* nfs.h 2.38 88/08/19 SMI */
-
#include <sys/isa_defs.h>
#include <sys/vfs.h>
#include <sys/stream.h>
@@ -72,8 +73,56 @@ extern "C" {
#define NFS_VERSMIN_DEFAULT ((rpcvers_t)2)
#define NFS_VERSMAX_DEFAULT ((rpcvers_t)4)
-extern rpcvers_t nfs_versmin;
-extern rpcvers_t nfs_versmax;
+/*
+ * Used to track the state of the server so that initialization
+ * can be done properly.
+ */
+typedef enum {
+ NFS_SERVER_STOPPED, /* server state destroyed */
+ NFS_SERVER_STOPPING, /* server state being destroyed */
+ NFS_SERVER_RUNNING,
+ NFS_SERVER_QUIESCED, /* server state preserved */
+ NFS_SERVER_OFFLINE /* server pool offline */
+} nfs_server_running_t;
+
+/* Forward declarations for nfs_globals */
+struct nfs_export;
+struct nfs_srv;
+struct nfs3_srv;
+struct nfs4_srv;
+struct nfsauth_globals;
+
+/*
+ * Zone globals variables of NFS server
+ */
+typedef struct nfs_globals {
+ list_node_t nfs_g_link; /* all globals list */
+
+ rpcvers_t nfs_versmin;
+ rpcvers_t nfs_versmax;
+
+ /* NFS server locks and state */
+ nfs_server_running_t nfs_server_upordown;
+ kmutex_t nfs_server_upordown_lock;
+ kcondvar_t nfs_server_upordown_cv;
+
+ /* RDMA wait variables */
+ kcondvar_t rdma_wait_cv;
+ kmutex_t rdma_wait_mutex;
+
+ zoneid_t nfs_zoneid;
+ /* Per-zone data structures private to each module */
+ struct nfs_export *nfs_export; /* nfs_export.c */
+ struct nfs_srv *nfs_srv; /* nfs_srv.c */
+ struct nfs3_srv *nfs3_srv; /* nfs3_srv.c */
+ struct nfs4_srv *nfs4_srv; /* nfs4_srv.c */
+ struct nfsauth_globals *nfs_auth; /* nfs_auth.c */
+
+ /* statistic: nfs_stat.c, etc. */
+ kstat_named_t *svstat[NFS_VERSMAX + 1];
+ kstat_named_t *rfsproccnt[NFS_VERSMAX + 1];
+ kstat_named_t *aclproccnt[NFS_VERSMAX + 1];
+} nfs_globals_t;
/*
* Default delegation setting for the server ==> "on"
@@ -872,6 +921,8 @@ extern void rfs_statfs(fhandle_t *, struct nfsstatfs *, struct exportinfo *,
extern void *rfs_statfs_getfh(fhandle_t *);
extern void rfs_srvrinit(void);
extern void rfs_srvrfini(void);
+extern void rfs_srv_zone_init(nfs_globals_t *);
+extern void rfs_srv_zone_fini(nfs_globals_t *);
/*
* flags to define path types during Multi Component Lookups
@@ -884,6 +935,8 @@ extern void rfs_srvrfini(void);
/* index for svstat_ptr */
enum nfs_svccounts {NFS_CALLS, NFS_BADCALLS, NFS_REFERRALS, NFS_REFERLINKS};
+#define NFS_V2 NFS_VERSION
+
/* function defs for NFS kernel */
extern int nfs_waitfor_purge_complete(vnode_t *);
extern int nfs_validate_caches(vnode_t *, cred_t *);
@@ -904,7 +957,7 @@ extern int nfs_async_stop_sig(struct vfs *);
extern int nfs_clntinit(void);
extern void nfs_clntfini(void);
extern int nfstsize(void);
-extern int nfs_srvinit(void);
+extern void nfs_srvinit(void);
extern void nfs_srvfini(void);
extern int vattr_to_sattr(struct vattr *, struct nfssattr *);
extern void setdiropargs(struct nfsdiropargs *, char *, vnode_t *);
@@ -942,10 +995,14 @@ extern int nfsauth_access(struct exportinfo *, struct svc_req *, cred_t *,
uid_t *, gid_t *, uint_t *, gid_t **);
extern void nfsauth_init(void);
extern void nfsauth_fini(void);
+extern void nfsauth_zone_init(nfs_globals_t *);
+extern void nfsauth_zone_fini(nfs_globals_t *);
+extern void nfsauth_zone_shutdown(nfs_globals_t *);
extern int nfs_setopts(vnode_t *, model_t, struct nfs_args *);
extern int nfs_mount_label_policy(vfs_t *, struct netbuf *,
struct knetconfig *, cred_t *);
extern boolean_t nfs_has_ctty(void);
+extern nfs_globals_t *nfs_srv_getzg(void);
extern void nfs_srv_stop_all(void);
extern void nfs_srv_quiesce_all(void);
extern int rfs4_dss_setpaths(char *, size_t);
@@ -957,9 +1014,12 @@ extern nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
extern kstat_named_t *global_svstat_ptr[];
+extern zone_key_t nfssrv_zone_key;
+extern list_t nfssrv_globals_list;
+extern krwlock_t nfssrv_globals_rwl;
+
extern krwlock_t rroklock;
extern vtype_t nf_to_vt[];
-extern kstat_named_t *rfsproccnt_v2_ptr;
extern kmutex_t nfs_minor_lock;
extern int nfs_major;
extern int nfs_minor;
@@ -975,16 +1035,13 @@ extern int (*nfs_srv_dss_func)(char *, size_t);
*/
struct nfs_version_stats {
kstat_named_t *aclreqcnt_ptr; /* nfs_acl:0:aclreqcnt_v? */
- kstat_named_t *aclproccnt_ptr; /* nfs_acl:0:aclproccnt_v? */
kstat_named_t *rfsreqcnt_ptr; /* nfs:0:rfsreqcnt_v? */
- kstat_named_t *rfsproccnt_ptr; /* nfs:0:rfsproccnt_v? */
};
/*
* A bit of asymmetry: nfs:0:nfs_client isn't part of this structure.
*/
struct nfs_stats {
- kstat_named_t *nfs_stats_svstat_ptr[NFS_VERSMAX + 1];
struct nfs_version_stats nfs_stats_v2;
struct nfs_version_stats nfs_stats_v3;
struct nfs_version_stats nfs_stats_v4;
@@ -1001,6 +1058,9 @@ extern zone_key_t nfsstat_zone_key;
extern void *nfsstat_zone_init(zoneid_t);
extern void nfsstat_zone_fini(zoneid_t, void *);
+extern void rfs_stat_zone_init(nfs_globals_t *);
+extern void rfs_stat_zone_fini(nfs_globals_t *);
+
#endif /* _KERNEL */
/*
@@ -2248,6 +2308,8 @@ extern void rfs3_commit(COMMIT3args *, COMMIT3res *, struct exportinfo *,
extern void *rfs3_commit_getfh(COMMIT3args *);
extern void rfs3_srvrinit(void);
extern void rfs3_srvrfini(void);
+extern void rfs3_srv_zone_init(nfs_globals_t *);
+extern void rfs3_srv_zone_fini(nfs_globals_t *);
extern int nfs3_validate_caches(vnode_t *, cred_t *);
extern void nfs3_cache_post_op_attr(vnode_t *, post_op_attr *, hrtime_t,
@@ -2282,7 +2344,6 @@ extern int rfs_cross_mnt(vnode_t **, struct exportinfo **);
extern int rfs_climb_crossmnt(vnode_t **, struct exportinfo **, cred_t *);
extern vtype_t nf3_to_vt[];
-extern kstat_named_t *rfsproccnt_v3_ptr;
extern vfsops_t *nfs3_vfsops;
extern struct vnodeops *nfs3_vnodeops;
extern const struct fs_operation_def nfs3_vnodeops_template[];
@@ -2312,11 +2373,11 @@ extern bool_t rfs4_check_delegated(int mode, vnode_t *, bool_t trunc);
* if no delegation is present.
*/
extern int rfs4_delegated_getattr(vnode_t *, vattr_t *, int, cred_t *);
-extern void rfs4_hold_deleg_policy(void);
-extern void rfs4_rele_deleg_policy(void);
extern int do_xattr_exists_check(vnode_t *, ulong_t *, cred_t *);
+extern int protect_zfs_mntpt(vnode_t *);
+
extern ts_label_t *nfs_getflabel(vnode_t *, struct exportinfo *);
extern boolean_t do_rfs_label_check(bslabel_t *, vnode_t *, int,
struct exportinfo *);
diff --git a/usr/src/uts/common/nfs/nfs4.h b/usr/src/uts/common/nfs/nfs4.h
index 2dee6d22f4..ce09473f95 100644
--- a/usr/src/uts/common/nfs/nfs4.h
+++ b/usr/src/uts/common/nfs/nfs4.h
@@ -20,12 +20,13 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright 2018 Nexenta Systems, Inc.
+ * Copyright 2019 Nexenta by DDN, Inc.
*/
#ifndef _NFS4_H
@@ -39,6 +40,7 @@
#ifdef _KERNEL
#include <nfs/nfs4_kprot.h>
+#include <nfs/nfs4_drc.h>
#include <sys/nvpair.h>
#else
#include <rpcsvc/nfs4_prot.h>
@@ -119,7 +121,21 @@ typedef struct { /* opaque entry type for later use */
rfs4_dbe_t *dbe;
} *rfs4_entry_t;
-extern rfs4_table_t *rfs4_client_tab;
+/*
+ * NFSv4 server state databases
+ *
+ * Initialized when the module is loaded and used by NFSv4 state tables.
+ * These kmem_cache free pools are used globally, the NFSv4 state tables
+ * which make use of these kmem_cache free pools are per zone.
+ */
+extern kmem_cache_t *rfs4_client_mem_cache;
+extern kmem_cache_t *rfs4_clntIP_mem_cache;
+extern kmem_cache_t *rfs4_openown_mem_cache;
+extern kmem_cache_t *rfs4_openstID_mem_cache;
+extern kmem_cache_t *rfs4_lockstID_mem_cache;
+extern kmem_cache_t *rfs4_lockown_mem_cache;
+extern kmem_cache_t *rfs4_file_mem_cache;
+extern kmem_cache_t *rfs4_delegstID_mem_cache;
/* database, table, index creation entry points */
extern rfs4_database_t *rfs4_database_create(uint32_t);
@@ -128,6 +144,8 @@ extern void rfs4_database_destroy(rfs4_database_t *);
extern void rfs4_database_destroy(rfs4_database_t *);
+extern kmem_cache_t *nfs4_init_mem_cache(char *, uint32_t, uint32_t,
+ uint32_t);
extern rfs4_table_t *rfs4_table_create(rfs4_database_t *, char *,
time_t, uint32_t,
bool_t (*create)(rfs4_entry_t, void *),
@@ -369,12 +387,6 @@ typedef struct rfs4_dss_path {
char **rfs4_dss_newpaths;
uint_t rfs4_dss_numnewpaths;
-/*
- * Circular doubly-linked list of paths for currently-served RGs.
- * No locking required: only changed on warmstart. Managed with insque/remque.
- */
-rfs4_dss_path_t *rfs4_dss_pathlist;
-
/* nvlists of all DSS paths: current, and before last warmstart */
nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
@@ -740,26 +752,8 @@ typedef struct rfs4_file {
krwlock_t rf_file_rwlock;
} rfs4_file_t;
-extern int rfs4_seen_first_compound; /* set first time we see one */
-
-extern rfs4_servinst_t *rfs4_cur_servinst; /* current server instance */
-extern kmutex_t rfs4_servinst_lock; /* protects linked list */
-extern void rfs4_servinst_create(int, int, char **);
-extern void rfs4_servinst_destroy_all(void);
-extern void rfs4_servinst_assign(rfs4_client_t *,
- rfs4_servinst_t *);
-extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *);
-extern int rfs4_clnt_in_grace(rfs4_client_t *);
-extern int rfs4_servinst_in_grace(rfs4_servinst_t *);
-extern int rfs4_servinst_grace_new(rfs4_servinst_t *);
-extern void rfs4_grace_start(rfs4_servinst_t *);
-extern void rfs4_grace_start_new(void);
-extern void rfs4_grace_reset_all(void);
-extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *);
-extern void rfs4_dss_readstate(int, char **);
-
/*
- * rfs4_deleg_policy is used to signify the server's global delegation
+ * nfs4_deleg_policy is used to signify the server's global delegation
* policy. The default is to NEVER delegate files and the
* administrator must configure the server to enable delegations.
*
@@ -771,8 +765,6 @@ typedef enum {
SRV_NORMAL_DELEGATE = 1
} srv_deleg_policy_t;
-extern srv_deleg_policy_t rfs4_deleg_policy;
-extern kmutex_t rfs4_deleg_lock;
extern void rfs4_disable_delegation(void), rfs4_enable_delegation(void);
/*
@@ -790,11 +782,124 @@ typedef enum {
#define NFS4_DELEG4TYPE2REQTYPE(x) (delegreq_t)(x)
/*
+ * Zone global variables of NFSv4 server
+ */
+typedef struct nfs4_srv {
+ /* Unique write verifier */
+ verifier4 write4verf;
+ /* Delegation lock */
+ kmutex_t deleg_lock;
+ /* Used to serialize create/destroy of nfs4_server_state database */
+ kmutex_t state_lock;
+ rfs4_database_t *nfs4_server_state;
+ /* Used to manage access to server instance linked list */
+ kmutex_t servinst_lock;
+ rfs4_servinst_t *nfs4_cur_servinst;
+ /* Used to manage access to nfs4_deleg_policy */
+ krwlock_t deleg_policy_lock;
+ srv_deleg_policy_t nfs4_deleg_policy;
+ /* Set first time we see one */
+ int seen_first_compound;
+ /*
+ * Circular double-linked list of paths for currently-served RGs.
+ * No locking required -- only changed on server start.
+ * Managed with insque/remque.
+ */
+ rfs4_dss_path_t *dss_pathlist;
+ /* Duplicate request cache */
+ rfs4_drc_t *nfs4_drc;
+ /* nfsv4 server start time */
+ time_t rfs4_start_time;
+ /* Used to serialize lookups of clientids */
+ krwlock_t rfs4_findclient_lock;
+
+ /* NFSv4 server state client tables */
+ /* table expiry times */
+ time_t rfs4_client_cache_time;
+ time_t rfs4_openowner_cache_time;
+ time_t rfs4_state_cache_time;
+ time_t rfs4_lo_state_cache_time;
+ time_t rfs4_lockowner_cache_time;
+ time_t rfs4_file_cache_time;
+ time_t rfs4_deleg_state_cache_time;
+ time_t rfs4_clntip_cache_time;
+ /* tables and indexes */
+ /* client table */
+ rfs4_table_t *rfs4_client_tab;
+ rfs4_index_t *rfs4_clientid_idx;
+ rfs4_index_t *rfs4_nfsclnt_idx;
+ /* client IP table */
+ rfs4_table_t *rfs4_clntip_tab;
+ rfs4_index_t *rfs4_clntip_idx;
+ /* Open Owner table */
+ rfs4_table_t *rfs4_openowner_tab;
+ rfs4_index_t *rfs4_openowner_idx;
+ /* Open State ID table */
+ rfs4_table_t *rfs4_state_tab;
+ rfs4_index_t *rfs4_state_idx;
+ rfs4_index_t *rfs4_state_owner_file_idx;
+ rfs4_index_t *rfs4_state_file_idx;
+ /* Lock State ID table */
+ rfs4_table_t *rfs4_lo_state_tab;
+ rfs4_index_t *rfs4_lo_state_idx;
+ rfs4_index_t *rfs4_lo_state_owner_idx;
+ /* Lock owner table */
+ rfs4_table_t *rfs4_lockowner_tab;
+ rfs4_index_t *rfs4_lockowner_idx;
+ rfs4_index_t *rfs4_lockowner_pid_idx;
+ /* File table */
+ rfs4_table_t *rfs4_file_tab;
+ rfs4_index_t *rfs4_file_idx;
+ /* Deleg State table */
+ rfs4_table_t *rfs4_deleg_state_tab;
+ rfs4_index_t *rfs4_deleg_idx;
+ rfs4_index_t *rfs4_deleg_state_idx;
+
+ /* client stable storage */
+ int rfs4_ss_enabled;
+} nfs4_srv_t;
+
+/*
+ * max length of the NFSv4 server database name
+ */
+#define RFS4_MAX_MEM_CACHE_NAME 48
+
+/*
+ * global NFSv4 server kmem caches
+ * r_db_name - The name of the state database and the table that will use it
+ * These tables are defined in nfs4_srv_t
+ * r_db_mem_cache - The kmem cache associated with the state database name
+ */
+typedef struct rfs4_db_mem_cache {
+ char r_db_name[RFS4_MAX_MEM_CACHE_NAME];
+ kmem_cache_t *r_db_mem_cache;
+} rfs4_db_mem_cache_t;
+
+#define RFS4_DB_MEM_CACHE_NUM 8
+
+rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
+
+
+extern srv_deleg_policy_t nfs4_get_deleg_policy();
+
+extern void rfs4_servinst_create(nfs4_srv_t *, int, int, char **);
+extern void rfs4_servinst_destroy_all(nfs4_srv_t *);
+extern void rfs4_servinst_assign(nfs4_srv_t *, rfs4_client_t *,
+ rfs4_servinst_t *);
+extern rfs4_servinst_t *rfs4_servinst(rfs4_client_t *);
+extern int rfs4_clnt_in_grace(rfs4_client_t *);
+extern int rfs4_servinst_in_grace(rfs4_servinst_t *);
+extern int rfs4_servinst_grace_new(rfs4_servinst_t *);
+extern void rfs4_grace_start(rfs4_servinst_t *);
+extern void rfs4_grace_start_new(nfs4_srv_t *);
+extern void rfs4_grace_reset_all(nfs4_srv_t *);
+extern void rfs4_ss_oldstate(rfs4_oldstate_t *, char *, char *);
+extern void rfs4_dss_readstate(nfs4_srv_t *, int, char **);
+
+/*
* Various interfaces to manipulate the state structures introduced
* above
*/
-extern kmutex_t rfs4_state_lock;
-extern void rfs4_clean_state_exi(struct exportinfo *exi);
extern void rfs4_free_reply(nfs_resop4 *);
extern void rfs4_copy_reply(nfs_resop4 *, nfs_resop4 *);
@@ -946,7 +1051,10 @@ extern fem_t *deleg_wrops;
extern int rfs4_share(rfs4_state_t *, uint32_t, uint32_t);
extern int rfs4_unshare(rfs4_state_t *);
-extern void rfs4_set_deleg_policy(srv_deleg_policy_t);
+extern void rfs4_set_deleg_policy(nfs4_srv_t *, srv_deleg_policy_t);
+extern void rfs4_hold_deleg_policy(nfs4_srv_t *);
+extern void rfs4_rele_deleg_policy(nfs4_srv_t *);
+
#ifdef DEBUG
#define NFS4_DEBUG(var, args) if (var) cmn_err args
@@ -1348,7 +1456,6 @@ extern vtype_t nf4_to_vt[];
extern struct nfs4_ntov_map nfs4_ntov_map[];
extern uint_t nfs4_ntov_map_size;
-extern kstat_named_t *rfsproccnt_v4_ptr;
extern struct vfsops *nfs4_vfsops;
extern struct vnodeops *nfs4_vnodeops;
extern const struct fs_operation_def nfs4_vnodeops_template[];
@@ -1377,15 +1484,21 @@ extern stateid4 clnt_special1;
* The NFS Version 4 service procedures.
*/
+extern void rfs4_do_server_start(int, int, int);
extern void rfs4_compound(COMPOUND4args *, COMPOUND4res *,
struct exportinfo *, struct svc_req *, cred_t *, int *);
extern void rfs4_compound_free(COMPOUND4res *);
extern void rfs4_compound_flagproc(COMPOUND4args *, int *);
-extern int rfs4_srvrinit(void);
+extern void rfs4_srvrinit(void);
extern void rfs4_srvrfini(void);
-extern void rfs4_state_init(void);
-extern void rfs4_state_fini(void);
+extern void rfs4_srv_zone_init(nfs_globals_t *);
+extern void rfs4_srv_zone_fini(nfs_globals_t *);
+extern void rfs4_state_g_init(void);
+extern void rfs4_state_zone_init(nfs4_srv_t *);
+extern void rfs4_state_g_fini(void);
+extern void rfs4_state_zone_fini(void);
+extern nfs4_srv_t *nfs4_get_srv(void);
#endif
#ifdef __cplusplus
diff --git a/usr/src/uts/common/nfs/nfs4_drc.h b/usr/src/uts/common/nfs/nfs4_drc.h
index a77fb60818..f42867d197 100644
--- a/usr/src/uts/common/nfs/nfs4_drc.h
+++ b/usr/src/uts/common/nfs/nfs4_drc.h
@@ -18,16 +18,19 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#ifndef _NFS4_DRC_H
#define _NFS4_DRC_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -36,26 +39,26 @@ extern "C" {
* NFSv4 Duplicate Request cache.
*/
typedef struct rfs4_drc {
- kmutex_t lock;
+ kmutex_t lock;
uint32_t dr_hash;
- uint32_t max_size;
- uint32_t in_use;
+ uint32_t max_size;
+ uint32_t in_use;
list_t dr_cache;
- list_t *dr_buckets;
+ list_t *dr_buckets;
} rfs4_drc_t;
/*
* NFSv4 Duplicate request cache entry.
*/
typedef struct rfs4_dupreq {
- list_node_t dr_bkt_next;
+ list_node_t dr_bkt_next;
list_node_t dr_next;
list_t *dr_bkt;
rfs4_drc_t *drc;
int dr_state;
uint32_t dr_xid;
struct netbuf dr_addr;
- COMPOUND4res dr_res;
+ COMPOUND4res dr_res;
} rfs4_dupreq_t;
/*
@@ -67,15 +70,14 @@ typedef struct rfs4_dupreq {
#define NFS4_DUP_PENDING 2
#define NFS4_DUP_FREE 3
-#define NFS4_DUP_REPLAY 4
+#define NFS4_DUP_REPLAY 4
#define NFS4_DUP_INUSE 5
-extern rfs4_drc_t *nfs4_drc;
extern uint32_t nfs4_drc_max;
extern uint32_t nfs4_drc_hash;
rfs4_drc_t *rfs4_init_drc(uint32_t, uint32_t);
-void rfs4_fini_drc(rfs4_drc_t *);
+void rfs4_fini_drc(void);
void rfs4_dr_chstate(rfs4_dupreq_t *, int);
rfs4_dupreq_t *rfs4_alloc_dr(rfs4_drc_t *);
int rfs4_find_dr(struct svc_req *, rfs4_drc_t *, rfs4_dupreq_t **);
diff --git a/usr/src/uts/common/nfs/nfs_acl.h b/usr/src/uts/common/nfs/nfs_acl.h
index a9dd2e3635..f3ab476467 100644
--- a/usr/src/uts/common/nfs/nfs_acl.h
+++ b/usr/src/uts/common/nfs/nfs_acl.h
@@ -379,8 +379,6 @@ extern void nfs_acl_free(vsecattr_t *);
#ifdef _KERNEL
/* server and client data structures */
-extern kstat_named_t *aclproccnt_v2_ptr;
-extern kstat_named_t *aclproccnt_v3_ptr;
extern char *aclnames_v2[];
extern uchar_t acl_call_type_v2[];
diff --git a/usr/src/uts/common/nfs/nfs_cmd.h b/usr/src/uts/common/nfs/nfs_cmd.h
index fe1dda528f..feb2e5a9fc 100644
--- a/usr/src/uts/common/nfs/nfs_cmd.h
+++ b/usr/src/uts/common/nfs/nfs_cmd.h
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -26,6 +27,10 @@
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#ifndef _NFS_CMD_H
#define _NFS_CMD_H
@@ -80,6 +85,7 @@ extern struct charset_cache *nfscmd_findmap(struct exportinfo *,
struct sockaddr *);
extern char *nfscmd_convname(struct sockaddr *, struct exportinfo *,
char *, int, size_t);
+
extern char *nfscmd_convdirent(struct sockaddr *, struct exportinfo *, char *,
size_t, enum nfsstat3 *);
extern size_t nfscmd_convdirplus(struct sockaddr *, struct exportinfo *, char *,
@@ -87,6 +93,9 @@ extern size_t nfscmd_convdirplus(struct sockaddr *, struct exportinfo *, char *,
extern size_t nfscmd_countents(char *, size_t);
extern size_t nfscmd_dropped_entrysize(struct dirent64 *, size_t, size_t);
+extern void nfscmd_init(void);
+extern void nfscmd_fini(void);
+
#endif
#ifdef __cplusplus
diff --git a/usr/src/uts/common/nfs/nfs_dispatch.h b/usr/src/uts/common/nfs/nfs_dispatch.h
index 16475fea47..d1757fdb28 100644
--- a/usr/src/uts/common/nfs/nfs_dispatch.h
+++ b/usr/src/uts/common/nfs/nfs_dispatch.h
@@ -73,7 +73,6 @@ typedef struct rpcdisp {
typedef struct rpc_disptable {
int dis_nprocs;
char **dis_procnames;
- kstat_named_t **dis_proccntp;
struct rpcdisp *dis_table;
} rpc_disptable_t;
diff --git a/usr/src/uts/common/nfs/nfs_log.h b/usr/src/uts/common/nfs/nfs_log.h
index ff0f38a455..2bb90b37af 100644
--- a/usr/src/uts/common/nfs/nfs_log.h
+++ b/usr/src/uts/common/nfs/nfs_log.h
@@ -19,16 +19,19 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#ifndef _NFS_LOG_H
#define _NFS_LOG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -654,7 +657,7 @@ extern void nfslog_dprint(const int, const char *fmt, ...)
extern void *nfslog_record_alloc(struct exportinfo *, int,
void **, int);
extern void nfslog_record_free(void *, void *, size_t);
-extern struct exportinfo *nfslog_get_exi(struct exportinfo *,
+extern struct exportinfo *nfslog_get_exi(nfs_export_t *, struct exportinfo *,
struct svc_req *, caddr_t, unsigned int *);
extern void nfslog_write_record(struct exportinfo *, struct svc_req *,
caddr_t, caddr_t, cred_t *, struct netbuf *, unsigned int,
diff --git a/usr/src/uts/common/sharefs/sharefs.h b/usr/src/uts/common/sharefs/sharefs.h
index 3587504c5e..d222227cb8 100644
--- a/usr/src/uts/common/sharefs/sharefs.h
+++ b/usr/src/uts/common/sharefs/sharefs.h
@@ -24,11 +24,13 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright 2018 Nexenta Systems, Inc.
+ */
+
#ifndef _SHAREFS_SHAREFS_H
#define _SHAREFS_SHAREFS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This header provides service for the sharefs module.
*/
@@ -67,24 +69,27 @@ typedef struct sharefs_vfs {
#define SHAREFS_NAME_MAX MAXNAMELEN
-/*
- * The lock ordering whenever sharefs_lock and sharetab_lock both
- * need to be held is: sharefs_lock and then sharetab_lock.
- */
-extern krwlock_t sharefs_lock; /* lock for the vnode ops */
-extern sharetab_t *sharefs_sharetab; /* The sharetab. */
+typedef struct sharetab_globals {
+ /*
+ * The lock ordering whenever sharefs_lock and sharetab_lock both
+ * need to be held is: sharefs_lock and then sharetab_lock.
+ */
+ krwlock_t sharefs_lock; /* lock for the vnode ops */
+ sharetab_t *sharefs_sharetab; /* The sharetab. */
-extern uint_t sharetab_count; /* How many shares? */
-extern krwlock_t sharetab_lock; /* lock for the cached sharetab */
-extern size_t sharetab_size; /* How big is the sharetab file? */
+ uint_t sharetab_count; /* How many shares? */
+ krwlock_t sharetab_lock; /* lock for the cached sharetab */
+ size_t sharetab_size; /* How big is the sharetab file? */
-extern timestruc_t sharetab_mtime; /* Last mod to sharetab */
-extern timestruc_t sharetab_snap_time; /* Last snap */
-extern uint_t sharetab_generation; /* Which copy is it? */
+ timestruc_t sharetab_mtime; /* Last mod to sharetab */
+ timestruc_t sharetab_snap_time; /* Last snap */
+ uint_t sharetab_generation; /* Which copy is it? */
+} sharetab_globals_t;
#define SHAREFS_INO_FILE 0x80
extern vnode_t *sharefs_create_root_file(vfs_t *);
+extern sharetab_globals_t *sharetab_get_globals(zone_t *zone);
/*
* Sharetab file
diff --git a/usr/src/uts/common/sys/prsystm.h b/usr/src/uts/common/sys/prsystm.h
index 48da69dd16..ec95a0409d 100644
--- a/usr/src/uts/common/sys/prsystm.h
+++ b/usr/src/uts/common/sys/prsystm.h
@@ -86,7 +86,7 @@ extern void prgetsecflags(proc_t *, struct prsecflags *);
extern uint_t prnsegs(struct as *, int);
extern u_offset_t prgetfdinfosize(proc_t *, vnode_t *, cred_t *);
extern int prgetfdinfo(proc_t *, vnode_t *, struct prfdinfo *, cred_t *,
- list_t *);
+ cred_t *, list_t *);
extern void prexit(proc_t *);
extern void prfree(proc_t *);
extern void prlwpexit(kthread_t *);
diff --git a/usr/src/uts/common/sys/sdt.h b/usr/src/uts/common/sys/sdt.h
index 1535734a5c..d0f423ca8b 100644
--- a/usr/src/uts/common/sys/sdt.h
+++ b/usr/src/uts/common/sys/sdt.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_SDT_H
@@ -231,11 +231,17 @@ extern "C" {
type3, arg3) \
DTRACE_PROBE3(__nfsv3_##name, type1, arg1, type2, arg2, \
type3, arg3);
+
#define DTRACE_NFSV3_4(name, type1, arg1, type2, arg2, \
type3, arg3, type4, arg4) \
DTRACE_PROBE4(__nfsv3_##name, type1, arg1, type2, arg2, \
type3, arg3, type4, arg4);
+#define DTRACE_NFSV3_5(name, type1, arg1, type2, arg2, \
+ type3, arg3, type4, arg4, type5, arg5) \
+ DTRACE_PROBE5(__nfsv3_##name, type1, arg1, type2, arg2, \
+ type3, arg3, type4, arg4, type5, arg5);
+
#define DTRACE_NFSV4_1(name, type1, arg1) \
DTRACE_PROBE1(__nfsv4_##name, type1, arg1);
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 976841fae0..203326d79d 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -18,11 +18,12 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
- * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
#ifndef _SYS_ZONE_H
@@ -789,7 +790,6 @@ typedef enum zone_pageout_op {
*/
#define ZONE_PS_INVAL PS_MYID
-
extern zone_t zone0;
extern zone_t *global_zone;
extern uint_t maxzones;
@@ -932,6 +932,26 @@ struct zsd_entry {
#define ZONE_SPECIALPID(x) ((x) == 0 || (x) == 1)
/*
+ * A root vnode of the current zone.
+ *
+ * NOTE: It may be necessary (initialization time for file sharing where an
+ * NGZ loads a file-sharing kernel module that does zsd initialization) to NOT
+ * use this macro. One should ASSERT() that curzone == active ZSD (an
+ * ASSERTion that's not always true at ZSD initialization time) during regular
+ * use of this macro.
+ */
+#define ZONE_ROOTVP() (curzone->zone_rootvp)
+
+/*
+ * Since a zone's root isn't necessarily an actual filesystem boundary
+ * (i.e. VROOT may not be set on zone->zone_rootvp) we need to not assume it.
+ * This macro helps in checking if a vnode is the current zone's rootvp.
+ * NOTE: Using the VN_ prefix, even though it's defined here in zone.h.
+ * NOTE2: See above warning about ZONE_ROOTVP().
+ */
+#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP()))
+
+/*
* Zone-safe version of thread_create() to be used when the caller wants to
* create a kernel thread to run within the current zone's context.
*/
diff --git a/usr/src/uts/intel/sockfs/Makefile b/usr/src/uts/intel/sockfs/Makefile
index 7ca35b6e67..d3b079b652 100644
--- a/usr/src/uts/intel/sockfs/Makefile
+++ b/usr/src/uts/intel/sockfs/Makefile
@@ -43,7 +43,6 @@ UTSBASE = ../..
#
MODULE = sockfs
OBJECTS = $(SOCK_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(SOCK_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
#
@@ -55,7 +54,6 @@ include $(UTSBASE)/intel/Makefile.intel
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
@@ -73,19 +71,6 @@ SRCDIR = $(UTSBASE)/common/fs/sockfs
TOKGEN = $(SRCDIR)/nl7ctokgen
DERIVED_FILES = nl7ctokgen.h
CFLAGS += -I.
-LINTFLAGS += -I.
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
-LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
-LINTTAGS += -erroff=E_STATIC_UNUSED
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-unused-value
@@ -94,9 +79,6 @@ CERRWARN += -_gcc=-Wno-unused-function
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += $(CNOWARN_UNINIT)
-# needs work
-SMATCH=off
-
#
# Default build targets.
#
@@ -112,12 +94,6 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
$(RM) $(DERIVED_FILES)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#
diff --git a/usr/src/uts/sparc/sockfs/Makefile b/usr/src/uts/sparc/sockfs/Makefile
index d2d43d640a..20c15c173c 100644
--- a/usr/src/uts/sparc/sockfs/Makefile
+++ b/usr/src/uts/sparc/sockfs/Makefile
@@ -42,7 +42,6 @@ UTSBASE = ../..
#
MODULE = sockfs
OBJECTS = $(SOCK_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(SOCK_OBJS:%.o=$(LINTS_DIR)/%.ln)
ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
#
@@ -54,7 +53,6 @@ include $(UTSBASE)/sparc/Makefile.sparc
# Define targets
#
ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
@@ -73,19 +71,6 @@ SRCDIR = $(UTSBASE)/common/fs/sockfs
TOKGEN = $(SRCDIR)/nl7ctokgen
DERIVED_FILES = nl7ctokgen.h
CFLAGS += -I.
-LINTFLAGS += -I.
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
-LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
-LINTTAGS += -erroff=E_STATIC_UNUSED
-LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
-LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-unused-value
@@ -109,12 +94,6 @@ clean: $(CLEAN_DEPS)
clobber: $(CLOBBER_DEPS)
$(RM) $(DERIVED_FILES)
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
install: $(INSTALL_DEPS)
#