summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2019-08-19 12:06:05 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2019-08-19 12:06:05 +0000
commit289a9bb49771505b864985403334d2f94f0ca3ec (patch)
tree2853dbf40fb16b4ea3df020177473835c0641dcb
parentfb22979c02ec1ab84832084bea882640c366be5b (diff)
parent2052a1fb16201e50b4c3a91ebcbeeccbc8276644 (diff)
downloadillumos-joyent-289a9bb49771505b864985403334d2f94f0ca3ec.tar.gz
[illumos-gate merge]
commit 2052a1fb16201e50b4c3a91ebcbeeccbc8276644 11568 loader: pxe.c missing initializer commit 8d94f651a44d41a7147253bb5dad1a53941e8f50 11031 SMB3 persistent handles commit 2f57b5e005e6dce9d124b3dbd5fdcad1cc0372d2 11532 Makefile.master: add gcc9 support flags commit f8296c60994fb27105f37ac6f75661e4a6bdbab7 11329 improved Virtio framework 10012 vioblk should not accept an all-zero serial number 7366 vioif happily creates rx descriptors until it consumes all memory Conflicts: usr/src/uts/common/io/vioif/vioif.c
-rw-r--r--exception_lists/packaging1
-rw-r--r--usr/src/Makefile.master13
-rw-r--r--usr/src/boot/sys/boot/i386/libi386/pxe.c25
-rw-r--r--usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c34
-rw-r--r--usr/src/cmd/smbsrv/Makefile2
-rw-r--r--usr/src/cmd/smbsrv/fksmbd/fksmbd_shr.c4
-rw-r--r--usr/src/cmd/smbsrv/nvlprint/Makefile37
-rw-r--r--usr/src/cmd/smbsrv/nvlprint/nvlprint.c88
-rw-r--r--usr/src/lib/libfakekernel/common/clock.c47
-rw-r--r--usr/src/lib/libfakekernel/common/kmisc.c3
-rw-r--r--usr/src/lib/libfakekernel/common/mapfile-vers3
-rw-r--r--usr/src/lib/libshare/smb/libshare_smb.c4
-rw-r--r--usr/src/lib/smbsrv/libfksmbsrv/Makefile.com3
-rw-r--r--usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_cred.c10
-rw-r--r--usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_init.c5
-rw-r--r--usr/src/lib/smbsrv/libmlsvc/common/smb_share.c12
-rwxr-xr-xusr/src/tools/quick/make-smbsrv1
-rw-r--r--usr/src/uts/common/Makefile.files3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_close.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_create.c36
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_dispatch.c23
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_durable.c1241
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_lease.c3
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_lock.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_negotiate.c32
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb2_tree_connect.c6
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_common_open.c77
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_cred.c18
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_fsops.c38
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_init.c9
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_kshare.c102
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_node.c12
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_ofile.c185
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_pathname.c30
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_server.c108
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_session.c234
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c13
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_tree.c243
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_user.c4
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_vfs.c164
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_vops.c10
-rw-r--r--usr/src/uts/common/io/vioblk/vioblk.c1560
-rw-r--r--usr/src/uts/common/io/vioblk/vioblk.h212
-rw-r--r--usr/src/uts/common/io/vioif/vioif.c2423
-rw-r--r--usr/src/uts/common/io/vioif/vioif.h432
-rw-r--r--usr/src/uts/common/io/virtio/virtio.c1364
-rw-r--r--usr/src/uts/common/io/virtio/virtio.h342
-rw-r--r--usr/src/uts/common/io/virtio/virtio_dma.c295
-rw-r--r--usr/src/uts/common/io/virtio/virtio_impl.h368
-rw-r--r--usr/src/uts/common/io/virtio/virtio_main.c1730
-rw-r--r--usr/src/uts/common/io/virtio/virtioreg.h178
-rw-r--r--usr/src/uts/common/io/virtio/virtiovar.h211
-rw-r--r--usr/src/uts/common/smbsrv/smb2_kproto.h12
-rw-r--r--usr/src/uts/common/smbsrv/smb_kproto.h27
-rw-r--r--usr/src/uts/common/smbsrv/smb_ktypes.h53
-rw-r--r--usr/src/uts/common/smbsrv/smb_share.h5
-rw-r--r--usr/src/uts/intel/vioblk/Makefile80
-rw-r--r--usr/src/uts/intel/vioif/Makefile56
-rw-r--r--usr/src/uts/intel/virtio/Makefile81
59 files changed, 7523 insertions, 4797 deletions
diff --git a/exception_lists/packaging b/exception_lists/packaging
index ce7ebe91a1..c6cb2ccf99 100644
--- a/exception_lists/packaging
+++ b/exception_lists/packaging
@@ -600,6 +600,7 @@ usr/lib/smbsrv/libfksmbsrv.so.1
usr/lib/smbsrv/libmlsvc.so
usr/lib/smbsrv/libsmb.so
usr/lib/smbsrv/libsmbns.so
+usr/lib/smbsrv/nvlprint
usr/lib/smbsrv/test-msgbuf
usr/lib/smbsrv/testoplock
#
diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master
index da8d14c660..e751a9f79f 100644
--- a/usr/src/Makefile.master
+++ b/usr/src/Makefile.master
@@ -372,8 +372,10 @@ CCNOAUTOINLINE= \
-_gcc=-fno-ipa-cp \
-_gcc7=-fno-ipa-icf \
-_gcc8=-fno-ipa-icf \
+ -_gcc9=-fno-ipa-icf \
-_gcc7=-fno-clone-functions \
- -_gcc8=-fno-clone-functions
+ -_gcc8=-fno-clone-functions \
+ -_gcc9=-fno-clone-functions
# GCC may put functions in different named sub-sections of .text based on
# their presumed calling frequency. At least in the kernel, where we actually
@@ -383,7 +385,8 @@ CCNOAUTOINLINE= \
# but the application of this may move into usr/src/uts/ in future.
CCNOREORDER= \
-_gcc7=-fno-reorder-functions \
- -_gcc8=-fno-reorder-functions
+ -_gcc8=-fno-reorder-functions \
+ -_gcc9=-fno-reorder-functions
#
# gcc has a rather aggressive optimization on by default that infers loop
@@ -394,7 +397,8 @@ CCNOREORDER= \
#
CCNOAGGRESSIVELOOPS= \
-_gcc7=-fno-aggressive-loop-optimizations \
- -_gcc8=-fno-aggressive-loop-optimizations
+ -_gcc8=-fno-aggressive-loop-optimizations \
+ -_gcc9=-fno-aggressive-loop-optimizations
# One optimization the compiler might perform is to turn this:
# #pragma weak foo
@@ -472,7 +476,8 @@ CERRWARN += -_gcc=-Wno-array-bounds
# gcc4 lacks -Wno-maybe-uninitialized
CNOWARN_UNINIT = -_gcc4=-Wno-uninitialized \
-_gcc7=-Wno-maybe-uninitialized \
- -_gcc8=-Wno-maybe-uninitialized
+ -_gcc8=-Wno-maybe-uninitialized \
+ -_gcc9=-Wno-maybe-uninitialized
CERRWARN += -_smatch=-p=illumos_user
include $(SRC)/Makefile.smatch
diff --git a/usr/src/boot/sys/boot/i386/libi386/pxe.c b/usr/src/boot/sys/boot/i386/libi386/pxe.c
index 693596559d..821d0f627d 100644
--- a/usr/src/boot/sys/boot/i386/libi386/pxe.c
+++ b/usr/src/boot/sys/boot/i386/libi386/pxe.c
@@ -76,16 +76,21 @@ static ssize_t pxe_netif_put(struct iodesc *desc, void *pkt, size_t len);
static void pxe_netif_end(struct netif *nif);
extern struct netif_stats pxe_st[];
-extern u_int16_t __bangpxeseg;
-extern u_int16_t __bangpxeoff;
+extern uint16_t __bangpxeseg;
+extern uint16_t __bangpxeoff;
extern void __bangpxeentry(void);
-extern u_int16_t __pxenvseg;
-extern u_int16_t __pxenvoff;
+extern uint16_t __pxenvseg;
+extern uint16_t __pxenvoff;
extern void __pxenventry(void);
struct netif_dif pxe_ifs[] = {
-/* dif_unit dif_nsel dif_stats dif_private */
- {0, 1, &pxe_st[0], 0}
+ {
+ .dif_unit = 0,
+ .dif_nsel = 1,
+ .dif_stats = &pxe_st[0],
+ .dif_private = NULL,
+ .dif_used = 0
+ }
};
struct netif_stats pxe_st[nitems(pxe_ifs)];
@@ -218,7 +223,7 @@ pxe_init(void)
pxenv_p->RMEntry.segment, pxenv_p->RMEntry.offset);
}
- gci_p = bio_alloc(sizeof(*gci_p));
+ gci_p = bio_alloc(sizeof (*gci_p));
if (gci_p == NULL) {
pxe_p = NULL;
return (0);
@@ -269,7 +274,7 @@ pxe_cleanup(void)
if (pxe_call == NULL)
return;
- undi_shutdown_p = bio_alloc(sizeof(*undi_shutdown_p));
+ undi_shutdown_p = bio_alloc(sizeof (*undi_shutdown_p));
if (undi_shutdown_p != NULL) {
bzero(undi_shutdown_p, sizeof (*undi_shutdown_p));
pxe_call(PXENV_UNDI_SHUTDOWN, undi_shutdown_p);
@@ -282,7 +287,7 @@ pxe_cleanup(void)
bio_free(undi_shutdown_p, sizeof (*undi_shutdown_p));
}
- unload_stack_p = bio_alloc(sizeof(*unload_stack_p));
+ unload_stack_p = bio_alloc(sizeof (*unload_stack_p));
if (unload_stack_p != NULL) {
bzero(unload_stack_p, sizeof (*unload_stack_p));
pxe_call(PXENV_UNLOAD_STACK, unload_stack_p);
@@ -423,7 +428,7 @@ pxe_netif_init(struct iodesc *desc, void *machdep_hint)
else
desc->xid = 0;
- bio_free(undi_info_p, sizeof(*undi_info_p));
+ bio_free(undi_info_p, sizeof (*undi_info_p));
undi_open_p = bio_alloc(sizeof (*undi_open_p));
if (undi_open_p == NULL)
return;
diff --git a/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c b/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c
index b54549eebb..4195a62149 100644
--- a/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c
+++ b/usr/src/cmd/mdb/common/modules/smbsrv/smbsrv.c
@@ -1623,6 +1623,9 @@ tree_flag_bits[] = {
{ "FORCE_L2_OPLOCK",
SMB_TREE_FORCE_L2_OPLOCK,
SMB_TREE_FORCE_L2_OPLOCK },
+ { "CA",
+ SMB_TREE_CA,
+ SMB_TREE_CA },
{ NULL, 0, 0 }
};
@@ -2334,17 +2337,26 @@ smb_kshare_walk_step(mdb_walk_state_t *wsp)
* *****************************************************************************
*/
+typedef struct mdb_smb_vfs {
+ list_node_t sv_lnd;
+ uint32_t sv_magic;
+ uint32_t sv_refcnt;
+ vfs_t *sv_vfsp;
+ vnode_t *sv_rootvp;
+} mdb_smb_vfs_t;
+
struct smb_vfs_cb_args {
uint_t opts;
vnode_t vn;
char path[MAXPATHLEN];
};
+/*ARGSUSED*/
static int
smb_vfs_cb(uintptr_t addr, const void *data, void *varg)
{
struct smb_vfs_cb_args *args = varg;
- const smb_vfs_t *sf = data;
+ mdb_smb_vfs_t sf;
if (args->opts & SMB_OPT_VERBOSE) {
mdb_arg_t argv;
@@ -2363,16 +2375,21 @@ smb_vfs_cb(uintptr_t addr, const void *data, void *varg)
*
* Get the vnode v_path string if we can.
*/
+ if (mdb_ctf_vread(&sf, SMBSRV_SCOPE "smb_vfs_t",
+ "mdb_smb_vfs_t", addr, 0) < 0) {
+ mdb_warn("failed to read struct smb_vfs at %p", addr);
+ return (DCMD_ERR);
+ }
strcpy(args->path, "?");
if (mdb_vread(&args->vn, sizeof (args->vn),
- (uintptr_t)sf->sv_rootvp) == sizeof (args->vn))
+ (uintptr_t)sf.sv_rootvp) == sizeof (args->vn))
(void) mdb_readstr(args->path, sizeof (args->path),
(uintptr_t)args->vn.v_path);
mdb_printf("%-?p ", addr);
- mdb_printf("%-10d ", sf->sv_refcnt);
- mdb_printf("%-?p ", sf->sv_vfsp);
- mdb_printf("%-?p ", sf->sv_rootvp);
+ mdb_printf("%-10d ", sf.sv_refcnt);
+ mdb_printf("%-?p ", sf.sv_vfsp);
+ mdb_printf("%-?p ", sf.sv_rootvp);
mdb_printf("%-s\n", args->path);
return (WALK_NEXT);
@@ -2442,7 +2459,12 @@ smb_vfs_walk_init(mdb_walk_state_t *wsp)
* OFFSETOF(smb_server_t, sv_export.e_vfs_list.ll_list);
*/
GET_OFFSET(sv_exp_off, smb_server_t, sv_export);
- GET_OFFSET(ex_vfs_off, smb_export_t, e_vfs_list);
+ /* GET_OFFSET(ex_vfs_off, smb_export_t, e_vfs_list); */
+ ex_vfs_off = mdb_ctf_offsetof_by_name("smb_export_t", "e_vfs_list");
+ if (ex_vfs_off < 0) {
+ mdb_warn("cannot lookup: smb_export_t .e_vfs_list");
+ return (WALK_ERR);
+ }
GET_OFFSET(ll_off, smb_llist_t, ll_list);
wsp->walk_addr += (sv_exp_off + ex_vfs_off + ll_off);
diff --git a/usr/src/cmd/smbsrv/Makefile b/usr/src/cmd/smbsrv/Makefile
index 8e7699c252..85d9ec05f1 100644
--- a/usr/src/cmd/smbsrv/Makefile
+++ b/usr/src/cmd/smbsrv/Makefile
@@ -26,7 +26,7 @@
#
SUBDIRS = smbadm smbd smbstat dtrace fksmbd bind-helper \
- test-msgbuf testoplock
+ nvlprint testoplock test-msgbuf
MSGSUBDIRS = smbadm smbstat
include ../Makefile.cmd
diff --git a/usr/src/cmd/smbsrv/fksmbd/fksmbd_shr.c b/usr/src/cmd/smbsrv/fksmbd/fksmbd_shr.c
index 23038f1641..20f1f146b0 100644
--- a/usr/src/cmd/smbsrv/fksmbd/fksmbd_shr.c
+++ b/usr/src/cmd/smbsrv/fksmbd/fksmbd_shr.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -115,6 +115,8 @@ smb_shr_load(void *args)
*/
new_share("test", "/var/smb/test", "fksmbd test share",
SMB_SHRF_GUEST_OK);
+ new_share("testca", "/var/smb/test", "fksmbd test CA share",
+ SMB_SHRF_CA);
/* Allow creating lots of shares for testing. */
shr_file = getenv("FKSMBD_SHARE_FILE");
diff --git a/usr/src/cmd/smbsrv/nvlprint/Makefile b/usr/src/cmd/smbsrv/nvlprint/Makefile
new file mode 100644
index 0000000000..6e107f4219
--- /dev/null
+++ b/usr/src/cmd/smbsrv/nvlprint/Makefile
@@ -0,0 +1,37 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+#
+
+
+PROG= nvlprint
+
+include ../../Makefile.cmd
+ROOTCMDDIR= $(ROOT)/usr/lib/smbsrv
+
+CFLAGS += $(CCVERBOSE)
+
+CPPFLAGS += -D_FILE_OFFSET_BITS=64
+LDLIBS += -lnvpair
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTCMD)
+
+clean:
+
+lint:
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/smbsrv/nvlprint/nvlprint.c b/usr/src/cmd/smbsrv/nvlprint/nvlprint.c
new file mode 100644
index 0000000000..939cedd933
--- /dev/null
+++ b/usr/src/cmd/smbsrv/nvlprint/nvlprint.c
@@ -0,0 +1,88 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ */
+
+/*
+ * Print a packed nvlist from a file.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "libnvpair.h"
+
+char buf[65536];
+
+void
+dumpit(FILE *fp)
+{
+ struct stat st;
+ size_t flen;
+ int rlen;
+ nvlist_t *nvl = NULL;
+ int err;
+
+ if (fstat(fileno(fp), &st) < 0) {
+ perror("fstat");
+ return;
+ }
+ flen = (size_t)st.st_size;
+ if (flen > sizeof (buf)) {
+ (void) printf("File too large\n");
+ return;
+ }
+ rlen = fread(buf, 1, flen, fp);
+ if (rlen <= 0) {
+ perror("fread");
+ return;
+ }
+ if (rlen != flen) {
+ (void) printf("Short read %d %d \n", rlen, flen);
+ return;
+ }
+
+ err = nvlist_unpack(buf, flen, &nvl, 0);
+ if (err != 0) {
+ (void) printf("nvlist_unpack, err=%d\n", err);
+ return;
+ }
+
+ nvlist_print(stdout, nvl);
+ nvlist_free(nvl);
+}
+
+int
+main(int argc, char **argv)
+{
+ FILE *fp;
+ int i;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "usage: %s {filename} [filename2...]\n",
+ argv[0]);
+ return (1);
+ }
+ for (i = 1; i < argc; i++) {
+ fp = fopen(argv[i], "r");
+ if (fp == NULL) {
+ perror(argv[i]);
+ return (1);
+ }
+ (void) printf("%s:\n", argv[i]);
+ dumpit(fp);
+ (void) fclose(fp);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/libfakekernel/common/clock.c b/usr/src/lib/libfakekernel/common/clock.c
index 2bee02af2e..deacbd4705 100644
--- a/usr/src/lib/libfakekernel/common/clock.c
+++ b/usr/src/lib/libfakekernel/common/clock.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
@@ -83,3 +83,48 @@ void
scalehrtime(hrtime_t *t)
{
}
+
+/*
+ * These functions are blatently stolen from the kernel.
+ * See the dissertation in the comments preceding the
+ * hrt2ts() and ts2hrt() functions in:
+ * uts/common/os/timers.c
+ */
+void
+hrt2ts(hrtime_t hrt, timespec_t *tsp)
+{
+ uint32_t sec, nsec, tmp;
+
+ tmp = (uint32_t)(hrt >> 30);
+ sec = tmp - (tmp >> 2);
+ sec = tmp - (sec >> 5);
+ sec = tmp + (sec >> 1);
+ sec = tmp - (sec >> 6) + 7;
+ sec = tmp - (sec >> 3);
+ sec = tmp + (sec >> 1);
+ sec = tmp + (sec >> 3);
+ sec = tmp + (sec >> 4);
+ tmp = (sec << 7) - sec - sec - sec;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ nsec = (uint32_t)hrt - (tmp << 9);
+ while (nsec >= NANOSEC) {
+ nsec -= NANOSEC;
+ sec++;
+ }
+ tsp->tv_sec = (time_t)sec;
+ tsp->tv_nsec = nsec;
+}
+
+hrtime_t
+ts2hrt(const timestruc_t *tsp)
+{
+ hrtime_t hrt;
+
+ hrt = tsp->tv_sec;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 9) + tsp->tv_nsec;
+ return (hrt);
+}
diff --git a/usr/src/lib/libfakekernel/common/kmisc.c b/usr/src/lib/libfakekernel/common/kmisc.c
index 15730d6539..70f303e035 100644
--- a/usr/src/lib/libfakekernel/common/kmisc.c
+++ b/usr/src/lib/libfakekernel/common/kmisc.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Copyright 2017 RackTop Systems.
*/
@@ -95,6 +95,7 @@ highbit64(uint64_t i)
int
ddi_strtoul(const char *str, char **endp, int base, unsigned long *res)
{
+ errno = 0;
*res = strtoul(str, endp, base);
if (*res == 0)
return (errno);
diff --git a/usr/src/lib/libfakekernel/common/mapfile-vers b/usr/src/lib/libfakekernel/common/mapfile-vers
index 3950ccd4b5..731f6801a5 100644
--- a/usr/src/lib/libfakekernel/common/mapfile-vers
+++ b/usr/src/lib/libfakekernel/common/mapfile-vers
@@ -99,7 +99,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
highbit;
highbit64;
-
+ hrt2ts;
hz;
issig;
@@ -233,6 +233,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
tick_per_msec;
timeout;
+ ts2hrt;
tsignal;
uiomove;
uioskip;
diff --git a/usr/src/lib/libshare/smb/libshare_smb.c b/usr/src/lib/libshare/smb/libshare_smb.c
index e15bb26d9a..f567e7818b 100644
--- a/usr/src/lib/libshare/smb/libshare_smb.c
+++ b/usr/src/lib/libshare/smb/libshare_smb.c
@@ -179,6 +179,7 @@ struct option_defs optdefs[] = {
{ SHOPT_GUEST, OPT_TYPE_BOOLEAN },
{ SHOPT_DFSROOT, OPT_TYPE_BOOLEAN },
{ SHOPT_DESCRIPTION, OPT_TYPE_STRING },
+ { SHOPT_CA, OPT_TYPE_BOOLEAN },
{ SHOPT_FSO, OPT_TYPE_BOOLEAN },
{ SHOPT_QUOTAS, OPT_TYPE_BOOLEAN },
{ SHOPT_ENCRYPT, OPT_TYPE_STRING },
@@ -2195,6 +2196,9 @@ smb_build_shareinfo(sa_share_t share, sa_resource_t resource, smb_share_t *si)
if (smb_saprop_getbool(opts, SHOPT_DFSROOT, B_FALSE))
si->shr_flags |= SMB_SHRF_DFSROOT;
+ if (smb_saprop_getbool(opts, SHOPT_CA, B_FALSE))
+ si->shr_flags |= SMB_SHRF_CA;
+
if (smb_saprop_getbool(opts, SHOPT_FSO, B_FALSE))
si->shr_flags |= SMB_SHRF_FSO;
diff --git a/usr/src/lib/smbsrv/libfksmbsrv/Makefile.com b/usr/src/lib/smbsrv/libfksmbsrv/Makefile.com
index 507122dadd..7f29003239 100644
--- a/usr/src/lib/smbsrv/libfksmbsrv/Makefile.com
+++ b/usr/src/lib/smbsrv/libfksmbsrv/Makefile.com
@@ -119,7 +119,6 @@ OBJS_FS_SMBSRV = \
smb_tree_connect.o \
smb_unlock_byte_range.o \
smb_user.o \
- smb_vfs.o \
smb_vops.o \
smb_vss.o \
smb_write.o \
@@ -210,8 +209,10 @@ STRIP_STABS = :
# Note: need our sys includes _before_ ENVCPPFLAGS, proto etc.
+# Also, like Makefile.uts, reset CPPFLAGS
CPPFLAGS.first += -I../../../libfakekernel/common
CPPFLAGS.first += -I../common
+CPPFLAGS = $(CPPFLAGS.first)
INCS += -I$(SRC)/uts/common
INCS += -I$(SRC)/common/smbsrv
diff --git a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_cred.c b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_cred.c
index 7b2bb93581..030c9c6244 100644
--- a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_cred.c
+++ b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_cred.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -53,6 +53,14 @@ smb_cred_create(smb_token_t *token)
return (cr);
}
+cred_t *
+smb_kcred_create(void)
+{
+ cred_t *cr;
+ cr = CRED();
+ return (cr);
+}
+
void
smb_user_setcred(smb_user_t *user, cred_t *cr, uint32_t privileges)
{
diff --git a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_init.c b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_init.c
index 4f0d6bf299..dc9eff1b44 100644
--- a/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_init.c
+++ b/usr/src/lib/smbsrv/libfksmbsrv/common/fksmb_init.c
@@ -141,9 +141,12 @@ fksmbsrv_drv_open(void)
int
fksmbsrv_drv_close(void)
{
+ smb_server_t *sv;
int rc;
- rc = smb_server_delete();
+ rc = smb_server_lookup(&sv);
+ if (rc == 0)
+ rc = smb_server_delete(sv);
if (g_init_done != 0) {
smb_server_g_fini();
diff --git a/usr/src/lib/smbsrv/libmlsvc/common/smb_share.c b/usr/src/lib/smbsrv/libmlsvc/common/smb_share.c
index ccd5b75c12..8a354a7da0 100644
--- a/usr/src/lib/smbsrv/libmlsvc/common/smb_share.c
+++ b/usr/src/lib/smbsrv/libmlsvc/common/smb_share.c
@@ -770,6 +770,10 @@ smb_shr_modify(smb_share_t *new_si)
si->shr_flags &= ~SMB_SHRF_DFSROOT;
si->shr_flags |= flag;
+ flag = (new_si->shr_flags & SMB_SHRF_CA);
+ si->shr_flags &= ~SMB_SHRF_CA;
+ si->shr_flags |= flag;
+
flag = (new_si->shr_flags & SMB_SHRF_FSO);
si->shr_flags &= ~SMB_SHRF_FSO;
si->shr_flags |= flag;
@@ -1822,6 +1826,12 @@ smb_shr_sa_get(sa_share_t share, sa_resource_t resource, smb_share_t *si)
free(val);
}
+ val = smb_shr_sa_getprop(opts, SHOPT_CA);
+ if (val != NULL) {
+ smb_shr_sa_setflag(val, si, SMB_SHRF_CA);
+ free(val);
+ }
+
val = smb_shr_sa_getprop(opts, SHOPT_FSO);
if (val != NULL) {
smb_shr_sa_setflag(val, si, SMB_SHRF_FSO);
@@ -2611,6 +2621,8 @@ smb_shr_encode(smb_share_t *si, nvlist_t **nvlist)
rc |= nvlist_add_string(smb, SHOPT_GUEST, "true");
if ((si->shr_flags & SMB_SHRF_DFSROOT) != 0)
rc |= nvlist_add_string(smb, SHOPT_DFSROOT, "true");
+ if ((si->shr_flags & SMB_SHRF_CA) != 0)
+ rc |= nvlist_add_string(smb, SHOPT_CA, "true");
if ((si->shr_flags & SMB_SHRF_FSO) != 0)
rc |= nvlist_add_string(smb, SHOPT_FSO, "true");
if ((si->shr_flags & SMB_SHRF_QUOTAS) != 0)
diff --git a/usr/src/tools/quick/make-smbsrv b/usr/src/tools/quick/make-smbsrv
index 9e2381288d..0aabee3812 100755
--- a/usr/src/tools/quick/make-smbsrv
+++ b/usr/src/tools/quick/make-smbsrv
@@ -278,6 +278,7 @@ usr/lib/libmlrpc.so.2
usr/lib/smbsrv/libmlsvc.so.1
usr/lib/smbsrv/libsmb.so.1
usr/lib/smbsrv/libsmbns.so.1
+usr/lib/smbsrv/nvlprint
usr/lib/smbsrv/smbd
usr/sbin/smbadm
usr/sbin/smbstat
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 9d63669f58..0b4426db3a 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1231,7 +1231,6 @@ SMBSRV_OBJS += $(SMBSRV_SHARED_OBJS) \
smb_tree_connect.o \
smb_unlock_byte_range.o \
smb_user.o \
- smb_vfs.o \
smb_vops.o \
smb_vss.o \
smb_write.o \
@@ -2097,7 +2096,7 @@ NXGE_HCALL_OBJS = \
#
# Virtio core
-VIRTIO_OBJS = virtio.o
+VIRTIO_OBJS = virtio_main.o virtio_dma.o
# Virtio block driver
VIOBLK_OBJS = vioblk.o
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_close.c b/usr/src/uts/common/fs/smbsrv/smb2_close.c
index e019a3c3da..bbb000f329 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_close.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_close.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -71,6 +71,8 @@ smb2_close(smb_request_t *sr)
}
}
+ if (of->dh_persist)
+ smb2_dh_setdoc_persistent(of);
smb_ofile_close(of, 0);
errout:
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_create.c b/usr/src/uts/common/fs/smbsrv/smb2_create.c
index 6aab3c5127..582efbae28 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_create.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_create.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -280,7 +280,6 @@ smb2_create(smb_request_t *sr)
* many create context types are ignored too.
*/
op->dh_vers = SMB2_NOT_DURABLE;
- op->dh_v2_flags = 0;
if ((cctx.cc_in_flags &
(CCTX_DH_RECONNECT|CCTX_DH_RECONNECT_V2)) != 0) {
@@ -388,6 +387,9 @@ smb2_create(smb_request_t *sr)
cctx.cc_in_flags &= ~CCTX_REQUEST_LEASE;
}
+ if ((sr->tid_tree->t_flags & SMB_TREE_CA) == 0)
+ op->dh_v2_flags &= ~DH_PERSISTENT;
+
if ((cctx.cc_in_flags &
(CCTX_DH_REQUEST|CCTX_DH_REQUEST_V2)) != 0) {
if ((cctx.cc_in_flags & CCTX_DH_REQUEST_V2) != 0)
@@ -441,15 +443,19 @@ smb2_create(smb_request_t *sr)
* non-durable handles in case we get the ioctl
* to set "resiliency" on this handle.
*/
- if (of->f_ftype == SMB_FTYPE_DISK)
- smb_ofile_set_persistid(of);
+ if (of->f_ftype == SMB_FTYPE_DISK) {
+ if ((op->dh_v2_flags & DH_PERSISTENT) != 0)
+ smb_ofile_set_persistid_ph(of);
+ else
+ smb_ofile_set_persistid_dh(of);
+ }
/*
* [MS-SMB2] 3.3.5.9.8
* Handling the SMB2_CREATE_REQUEST_LEASE Create Context
*/
if ((cctx.cc_in_flags & CCTX_REQUEST_LEASE) != 0) {
- status = smb2_lease_create(sr);
+ status = smb2_lease_create(sr, sr->session->clnt_uuid);
if (status != NT_STATUS_SUCCESS) {
if (op->action_taken == SMB_OACT_CREATED) {
smb_ofile_set_delete_on_close(sr, of);
@@ -479,7 +485,8 @@ smb2_create(smb_request_t *sr)
if ((cctx.cc_in_flags &
(CCTX_DH_REQUEST|CCTX_DH_REQUEST_V2)) != 0 &&
smb_node_is_file(of->f_node) &&
- ((op->op_oplock_level == SMB2_OPLOCK_LEVEL_BATCH) ||
+ ((op->dh_v2_flags & DH_PERSISTENT) != 0 ||
+ (op->op_oplock_level == SMB2_OPLOCK_LEVEL_BATCH) ||
(op->op_oplock_level == SMB2_OPLOCK_LEVEL_LEASE &&
(op->lease_state & OPLOCK_LEVEL_CACHE_HANDLE) != 0))) {
/*
@@ -489,8 +496,13 @@ smb2_create(smb_request_t *sr)
(void) memcpy(of->dh_create_guid,
op->create_guid, UUID_LEN);
- /* no persistent handles yet */
- of->dh_persist = B_FALSE;
+ if ((op->dh_v2_flags & DH_PERSISTENT) != 0) {
+ if (smb2_dh_make_persistent(sr, of) == 0) {
+ of->dh_persist = B_TRUE;
+ } else {
+ op->dh_v2_flags = 0;
+ }
+ }
}
if (op->dh_vers != SMB2_NOT_DURABLE) {
uint32_t msto;
@@ -503,8 +515,11 @@ smb2_create(smb_request_t *sr)
* the default timeout (in mSec.)
*/
msto = op->dh_timeout;
- if (msto == 0)
- msto = smb2_dh_def_timeout;
+ if (msto == 0) {
+ msto = (of->dh_persist) ?
+ smb2_persist_timeout :
+ smb2_dh_def_timeout;
+ }
if (msto > smb2_dh_max_timeout)
msto = smb2_dh_max_timeout;
op->dh_timeout = msto;
@@ -512,6 +527,7 @@ smb2_create(smb_request_t *sr)
}
} else {
op->dh_vers = SMB2_NOT_DURABLE;
+ op->dh_v2_flags = 0;
}
/*
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c b/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c
index b592dc4c5f..88c4b6d600 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_dispatch.c
@@ -979,6 +979,16 @@ cmd_done:
*/
if (!sr->smb2_async && sr->smb2_next_command != 0)
goto cmd_start;
+
+ /*
+ * If we have a durable handle, and this operation updated
+ * the nvlist, write it out (before smb2_send_reply).
+ */
+ if (sr->dh_nvl_dirty) {
+ sr->dh_nvl_dirty = B_FALSE;
+ smb2_dh_update_nvfile(sr);
+ }
+
smb2_send_reply(sr);
if (sr->smb2_async && sr->smb2_next_command != 0) {
MBC_FLUSH(&sr->reply); /* New reply buffer. */
@@ -990,6 +1000,9 @@ cleanup:
if (disconnect)
smb_session_disconnect(session);
+ /*
+ * Do "postwork" for oplock (and maybe other things)
+ */
if (sr->sr_postwork != NULL)
smb2sr_run_postwork(sr);
@@ -1728,6 +1741,16 @@ smb2sr_run_postwork(smb_request_t *top_sr)
default:
ASSERT(0);
}
+
+ /*
+ * If we have a durable handle, and this operation
+ * updated the nvlist, write it out.
+ */
+ if (post_sr->dh_nvl_dirty) {
+ post_sr->dh_nvl_dirty = B_FALSE;
+ smb2_dh_update_nvfile(post_sr);
+ }
+
post_sr->sr_state = SMB_REQ_STATE_COMPLETED;
smb_request_free(post_sr);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_durable.c b/usr/src/uts/common/fs/smbsrv/smb2_durable.c
index 9ba3dd9c07..7b65924ca4 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_durable.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_durable.c
@@ -21,6 +21,7 @@
#include <sys/cmn_err.h>
#include <sys/fcntl.h>
#include <sys/nbmlock.h>
+#include <sys/sid.h>
#include <smbsrv/string.h>
#include <smbsrv/smb_kproto.h>
#include <smbsrv/smb_fsops.h>
@@ -53,6 +54,48 @@ uint32_t smb2_dh_max_timeout = 300 * MILLISEC; /* mSec. */
uint32_t smb2_res_def_timeout = 120 * MILLISEC; /* mSec. */
uint32_t smb2_res_max_timeout = 300 * MILLISEC; /* mSec. */
+uint32_t smb2_persist_timeout = 300 * MILLISEC; /* mSec. */
+
+/* Max. size of the file used to store a CA handle. */
+static uint32_t smb2_dh_max_cah_size = 64 * 1024;
+static uint32_t smb2_ca_info_version = 1;
+
+/*
+ * Want this to have invariant layout on disk, where the
+ * last two uint32_t values are stored as a uint64_t
+ */
+struct nvlk {
+ uint64_t lk_start;
+ uint64_t lk_len;
+ /* (lk_pid << 32) | lk_type */
+#ifdef _BIG_ENDIAN
+ uint32_t lk_pid, lk_type;
+#else
+ uint32_t lk_type, lk_pid;
+#endif
+};
+
+static void smb2_dh_import_share(void *);
+static smb_ofile_t *smb2_dh_import_handle(smb_request_t *, smb_node_t *,
+ uint64_t);
+static int smb2_dh_read_nvlist(smb_request_t *, smb_node_t *, struct nvlist **);
+static int smb2_dh_import_cred(smb_ofile_t *, char *);
+
+#define DH_SN_SIZE 24 /* size of DH stream name buffers */
+/*
+ * Build the stream name used to store a CA handle.
+ * i.e. ":0123456789abcdef:$CA"
+ * Note: smb_fsop_create adds the SUNWsmb prefix,
+ * so we compose the name without the prefix.
+ */
+static inline void
+smb2_dh_make_stream_name(char *buf, size_t buflen, uint64_t id)
+{
+ ASSERT(buflen >= DH_SN_SIZE);
+ (void) snprintf(buf, buflen,
+ ":%016" PRIx64 ":$CA", id);
+}
+
/*
* smb_dh_should_save
*
@@ -80,6 +123,11 @@ uint32_t smb2_res_max_timeout = 300 * MILLISEC; /* mSec. */
* Open.OplockState == Held, and Open.IsDurable is TRUE.
*
* - Open.IsPersistent is TRUE.
+ *
+ * We also deal with some special cases for shutdown of the
+ * server, session, user, tree (in that order). Other than
+ * the cases above, shutdown (or forced termination) should
+ * destroy durable handles.
*/
boolean_t
smb_dh_should_save(smb_ofile_t *of)
@@ -87,12 +135,49 @@ smb_dh_should_save(smb_ofile_t *of)
ASSERT(MUTEX_HELD(&of->f_mutex));
ASSERT(of->dh_vers != SMB2_NOT_DURABLE);
- if (of->f_user->preserve_opens == SMB2_DH_PRESERVE_NONE)
+ /* SMB service shutting down, destroy DH */
+ if (of->f_server->sv_state == SMB_SERVER_STATE_STOPPING)
return (B_FALSE);
- if (of->f_user->preserve_opens == SMB2_DH_PRESERVE_ALL)
+ /*
+ * SMB Session (connection) going away (server up).
+ * If server initiated disconnect, destroy DH
+ * If client initiated disconnect, save all DH.
+ */
+ if (of->f_session->s_state == SMB_SESSION_STATE_TERMINATED)
+ return (B_FALSE);
+ if (of->f_session->s_state == SMB_SESSION_STATE_DISCONNECTED)
return (B_TRUE);
+ /*
+ * SMB User logoff, session still "up".
+ * Action depends on why/how this logoff happened,
+ * determined based on user->preserve_opens
+ */
+ if (of->f_user->u_state == SMB_USER_STATE_LOGGING_OFF) {
+ switch (of->f_user->preserve_opens) {
+ case SMB2_DH_PRESERVE_NONE:
+ /* Server-initiated */
+ return (B_FALSE);
+ case SMB2_DH_PRESERVE_SOME:
+ /* Previous session logoff. */
+ goto preserve_some;
+ case SMB2_DH_PRESERVE_ALL:
+ /* Protocol logoff request */
+ return (B_TRUE);
+ }
+ }
+
+ /*
+ * SMB tree disconnecting (user still logged on)
+ * i.e. when kshare export forces disconnection.
+ */
+ if (of->f_tree->t_state == SMB_TREE_STATE_DISCONNECTING)
+ return (B_FALSE);
+
+preserve_some:
+ /* preserve_opens == SMB2_DH_PRESERVE_SOME */
+
switch (of->dh_vers) {
case SMB2_RESILIENT:
return (B_TRUE);
@@ -116,6 +201,1063 @@ smb_dh_should_save(smb_ofile_t *of)
}
/*
+ * Is this stream name a CA handle? i.e.
+ * ":0123456789abcdef:$CA"
+ */
+static boolean_t
+smb2_dh_match_ca_name(const char *name, uint64_t *idp)
+{
+ static const char suffix[] = ":$CA";
+ u_longlong_t ull;
+ const char *p = name;
+ char *p2 = NULL;
+ int len, rc;
+
+ if (*p++ != ':')
+ return (B_FALSE);
+
+ rc = ddi_strtoull(p, &p2, 16, &ull);
+ if (rc != 0 || p2 != (p + 16))
+ return (B_FALSE);
+ p += 16;
+
+ len = sizeof (suffix) - 1;
+ if (strncmp(p, suffix, len) != 0)
+ return (B_FALSE);
+ p += len;
+
+ if (*p != '\0')
+ return (B_FALSE);
+
+ *idp = (uint64_t)ull;
+ return (B_TRUE);
+}
+
+/*
+ * smb2_dh_new_ca_share
+ *
+ * Called when a new share has ca=true. Find or create the CA dir,
+ * and start a thread to import persistent handles.
+ */
+int
+smb2_dh_new_ca_share(smb_server_t *sv, smb_kshare_t *shr)
+{
+ smb_kshare_t *shr2;
+ smb_request_t *sr;
+
+ ASSERT(STYPE_ISDSK(shr->shr_type));
+
+ /*
+ * Need to lookup the kshare again, to get a hold.
+ * Add a function to just get the hold?
+ */
+ shr2 = smb_kshare_lookup(sv, shr->shr_name);
+ if (shr2 != shr)
+ return (EINVAL);
+
+ sr = smb_request_alloc(sv->sv_session, 0);
+ if (sr == NULL) {
+ /* shutting down? */
+ smb_kshare_release(sv, shr);
+ return (EINTR);
+ }
+ sr->sr_state = SMB_REQ_STATE_SUBMITTED;
+
+ /*
+ * Mark this share as "busy importing persistent handles"
+ * so we can hold off tree connect until that's done.
+ * Will clear and wakeup below.
+ */
+ mutex_enter(&shr->shr_mutex);
+ shr->shr_import_busy = sr;
+ mutex_exit(&shr->shr_mutex);
+
+ /*
+ * Start a taskq job to import any CA handles.
+ * The hold on the kshare is given to this job,
+ * which releases it when it's done.
+ */
+ sr->arg.tcon.si = shr; /* hold from above */
+ (void) taskq_dispatch(
+ sv->sv_worker_pool,
+ smb2_dh_import_share, sr, TQ_SLEEP);
+
+ return (0);
+}
+
+int smb2_dh_import_delay = 0;
+
+static void
+smb2_dh_import_share(void *arg)
+{
+ smb_request_t *sr = arg;
+ smb_kshare_t *shr = sr->arg.tcon.si;
+ smb_node_t *snode;
+ cred_t *kcr = zone_kcred();
+ smb_streaminfo_t *str_info = NULL;
+ uint64_t id;
+ smb_node_t *str_node;
+ smb_odir_t *od = NULL;
+ smb_ofile_t *of;
+ int rc;
+ boolean_t eof;
+
+ sr->sr_state = SMB_REQ_STATE_ACTIVE;
+
+ if (smb2_dh_import_delay > 0)
+ delay(SEC_TO_TICK(smb2_dh_import_delay));
+
+ /*
+ * Borrow the server's "root" user.
+ *
+ * This takes the place of smb_session_lookup_ssnid()
+ * that would happen in smb2_dispatch for a normal SR.
+ * As usual, this hold is released in smb_request_free.
+ */
+ sr->uid_user = sr->sr_server->sv_rootuser;
+ smb_user_hold_internal(sr->uid_user);
+ sr->user_cr = sr->uid_user->u_cred;
+
+ /*
+ * Create a temporary tree connect
+ */
+ sr->arg.tcon.path = shr->shr_name;
+ sr->tid_tree = smb_tree_alloc(sr, shr, shr->shr_root_node,
+ ACE_ALL_PERMS, 0);
+ if (sr->tid_tree == NULL) {
+ cmn_err(CE_NOTE, "smb2_dh_import_share: "
+ "failed connect share <%s>", shr->shr_name);
+ goto out;
+ }
+ snode = sr->tid_tree->t_snode;
+
+ /*
+ * Get the buffers we'll use to read CA handle data.
+ * Stash in sr_request_buf for smb2_dh_import_handle().
+ * Also a buffer for the stream name info.
+ */
+ sr->sr_req_length = smb2_dh_max_cah_size;
+ sr->sr_request_buf = kmem_alloc(sr->sr_req_length, KM_SLEEP);
+ str_info = kmem_alloc(sizeof (smb_streaminfo_t), KM_SLEEP);
+
+ /*
+ * Open the ext. attr dir under the share root and
+ * import CA handles for this share.
+ */
+ if (smb_odir_openat(sr, snode, &od) != 0) {
+ cmn_err(CE_NOTE, "Share [%s] CA import, no xattr dir?",
+ shr->shr_name);
+ goto out;
+ }
+
+ eof = B_FALSE;
+ do {
+ /*
+ * If the kshare gets unshared before we finish,
+ * bail out so we don't hold things up.
+ */
+ if (shr->shr_flags & SMB_SHRF_REMOVED)
+ break;
+
+ /*
+ * Read a stream name and info
+ */
+ rc = smb_odir_read_streaminfo(sr, od, str_info, &eof);
+ if ((rc != 0) || (eof))
+ break;
+
+ /*
+ * Skip anything not a CA handle.
+ */
+ if (!smb2_dh_match_ca_name(str_info->si_name, &id)) {
+ continue;
+ }
+
+ /*
+ * Lookup stream node and import
+ */
+ str_node = NULL;
+ rc = smb_fsop_lookup_name(sr, kcr, SMB_CASE_SENSITIVE,
+ snode, snode, str_info->si_name, &str_node);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "Share [%s] CA import, "
+ "lookup <%s> failed rc=%d",
+ shr->shr_name, str_info->si_name, rc);
+ continue;
+ }
+ of = smb2_dh_import_handle(sr, str_node, id);
+ smb_node_release(str_node);
+ if (of != NULL) {
+ smb_ofile_release(of);
+ of = NULL;
+ }
+ sr->fid_ofile = NULL;
+
+ } while (!eof);
+
+out:
+ if (od != NULL) {
+ smb_odir_close(od);
+ smb_odir_release(od);
+ }
+
+ if (str_info != NULL)
+ kmem_free(str_info, sizeof (smb_streaminfo_t));
+ /* Let smb_request_free clean up sr->sr_request_buf */
+
+ /*
+ * We did a (temporary, internal) tree connect above,
+ * which we need to undo before we return. Note that
+ * smb_request_free will do the final release of
+ * sr->tid_tree, sr->uid_user
+ */
+ if (sr->tid_tree != NULL)
+ smb_tree_disconnect(sr->tid_tree, B_FALSE);
+
+ /*
+ * Wake up any waiting tree connect(s).
+ * See smb_tree_connect_disk().
+ */
+ mutex_enter(&shr->shr_mutex);
+ shr->shr_import_busy = NULL;
+ cv_broadcast(&shr->shr_cv);
+ mutex_exit(&shr->shr_mutex);
+
+ smb_kshare_release(sr->sr_server, shr);
+ smb_request_free(sr);
+}
+
+/*
+ * This returns the new ofile mostly for dtrace.
+ */
+static smb_ofile_t *
+smb2_dh_import_handle(smb_request_t *sr, smb_node_t *str_node,
+ uint64_t persist_id)
+{
+ uint8_t client_uuid[UUID_LEN];
+ smb_tree_t *tree = sr->tid_tree;
+ smb_arg_open_t *op = &sr->arg.open;
+ smb_pathname_t *pn = &op->fqi.fq_path;
+ cred_t *kcr = zone_kcred();
+ struct nvlist *nvl = NULL;
+ char *sidstr = NULL;
+ smb_ofile_t *of = NULL;
+ smb_attr_t *pa;
+ boolean_t did_open = B_FALSE;
+ boolean_t have_lease = B_FALSE;
+ hrtime_t hrt;
+ uint64_t *u64p;
+ uint64_t u64;
+ uint32_t u32;
+ uint32_t status;
+ char *s;
+ uint8_t *u8p;
+ uint_t alen;
+ int rc;
+
+ /*
+ * While we're called with arg.tcon, we now want to use
+ * smb_arg_open for the rest of import, so clear it.
+ */
+ bzero(op, sizeof (*op));
+ op->create_disposition = FILE_OPEN;
+
+ /*
+ * Read and unpack the NVL
+ */
+ rc = smb2_dh_read_nvlist(sr, str_node, &nvl);
+ if (rc != 0)
+ return (NULL);
+
+ /*
+ * Known CA info version?
+ */
+ u32 = 0;
+ rc = nvlist_lookup_uint32(nvl, "info_version", &u32);
+ if (rc != 0 || u32 != smb2_ca_info_version) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) bad vers=%d",
+ tree->t_resource, str_node->od_name, u32);
+ goto errout;
+ }
+
+ /*
+ * The persist ID in the nvlist should match the one
+ * encoded in the file name. (not enforced)
+ */
+ u64 = 0;
+ rc = nvlist_lookup_uint64(nvl, "file_persistid", &u64);
+ if (rc != 0 || u64 != persist_id) {
+ cmn_err(CE_WARN, "CA import (%s/%s) bad id=%016" PRIx64,
+ tree->t_resource, str_node->od_name, u64);
+ /* goto errout? (allow) */
+ }
+
+ /*
+ * Does it belong in the share being imported?
+ */
+ s = NULL;
+ rc = nvlist_lookup_string(nvl, "share_name", &s);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no share_name",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+ if (smb_strcasecmp(s, tree->t_sharename, 0) != 0) {
+ /* Normal (not an error) */
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "CA import (%s/%s) other share",
+ tree->t_resource, str_node->od_name);
+#endif
+ goto errout;
+ }
+
+ /*
+ * Get the path name (for lookup)
+ */
+ rc = nvlist_lookup_string(nvl, "path_name", &pn->pn_path);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no path_name",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * owner sid
+ */
+ rc = nvlist_lookup_string(nvl, "owner_sid", &sidstr);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no owner_sid",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * granted access
+ */
+ rc = nvlist_lookup_uint32(nvl,
+ "granted_access", &op->desired_access);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no granted_access",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * share access
+ */
+ rc = nvlist_lookup_uint32(nvl,
+ "share_access", &op->share_access);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no share_access",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * create options
+ */
+ rc = nvlist_lookup_uint32(nvl,
+ "create_options", &op->create_options);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no create_options",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * create guid (client-assigned)
+ */
+ alen = UUID_LEN;
+ u8p = NULL;
+ rc = nvlist_lookup_uint8_array(nvl, "file_guid", &u8p, &alen);
+ if (rc != 0 || alen != UUID_LEN) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) bad file_guid",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+ bcopy(u8p, op->create_guid, UUID_LEN);
+
+ /*
+ * client uuid (identifies the client)
+ */
+ alen = UUID_LEN;
+ u8p = NULL;
+ rc = nvlist_lookup_uint8_array(nvl, "client_uuid", &u8p, &alen);
+ if (rc != 0 || alen != UUID_LEN) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) no client_uuid",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+ bcopy(u8p, client_uuid, UUID_LEN);
+
+ /*
+ * Lease key (optional)
+ */
+ alen = SMB_LEASE_KEY_SZ;
+ u8p = NULL;
+ rc = nvlist_lookup_uint8_array(nvl, "lease_uuid", &u8p, &alen);
+ if (rc == 0) {
+ bcopy(u8p, op->lease_key, UUID_LEN);
+ (void) nvlist_lookup_uint32(nvl,
+ "lease_state", &op->lease_state);
+ (void) nvlist_lookup_uint16(nvl,
+ "lease_epoch", &op->lease_epoch);
+ (void) nvlist_lookup_uint16(nvl,
+ "lease_version", &op->lease_version);
+ have_lease = B_TRUE;
+ } else {
+ (void) nvlist_lookup_uint32(nvl,
+ "oplock_state", &op->op_oplock_state);
+ }
+
+ /*
+ * Done getting what we need from the NV list.
+ * (re)open the file
+ */
+ status = smb_common_open(sr);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) open failed 0x%x",
+ tree->t_resource, str_node->od_name, status);
+ (void) smb_node_set_delete_on_close(str_node, kcr, 0);
+ goto errout;
+ }
+ of = sr->fid_ofile;
+ did_open = B_TRUE;
+
+ /*
+ * Now restore the rest of the SMB2 level state.
+ * See smb2_create after smb_common_open
+ */
+
+ /*
+ * Setup of->f_cr with owner SID
+ */
+ rc = smb2_dh_import_cred(of, sidstr);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) import cred failed",
+ tree->t_resource, str_node->od_name);
+ goto errout;
+ }
+
+ /*
+ * Use the persist ID we previously assigned.
+ * Like smb_ofile_set_persistid_ph()
+ */
+ rc = smb_ofile_insert_persistid(of, persist_id);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) "
+ "insert_persistid rc=%d",
+ tree->t_resource, str_node->od_name, rc);
+ goto errout;
+ }
+
+ /*
+ * Like smb2_lease_create()
+ *
+ * Lease state is stored in each persistent handle, but
+ * only one handle has the state we want. As we import
+ * each handle, "upgrade" the lease if the handle we're
+ * importing has a "better" lease state (higher epoch or
+ * more cache rights). After all handles are imported,
+ * that will get the lease to the right state.
+ */
+ if (have_lease) {
+ smb_lease_t *ls;
+ status = smb2_lease_create(sr, client_uuid);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) get lease 0x%x",
+ tree->t_resource, str_node->od_name, status);
+ goto errout;
+ }
+ ls = of->f_lease;
+
+ /* Use most current "epoch". */
+ mutex_enter(&ls->ls_mutex);
+ if (ls->ls_epoch < op->lease_epoch)
+ ls->ls_epoch = op->lease_epoch;
+ mutex_exit(&ls->ls_mutex);
+
+ /*
+ * Get the lease (and oplock)
+ * uses op->lease_state
+ */
+ op->op_oplock_level = SMB2_OPLOCK_LEVEL_LEASE;
+ smb2_lease_acquire(sr);
+
+ } else {
+ /*
+ * No lease; maybe get an oplock
+ * uses: op->op_oplock_level
+ */
+ if (op->op_oplock_state & OPLOCK_LEVEL_BATCH) {
+ op->op_oplock_level = SMB2_OPLOCK_LEVEL_BATCH;
+ } else if (op->op_oplock_state & OPLOCK_LEVEL_ONE) {
+ op->op_oplock_level = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+ } else if (op->op_oplock_state & OPLOCK_LEVEL_TWO) {
+ op->op_oplock_level = SMB2_OPLOCK_LEVEL_II;
+ } else {
+ op->op_oplock_level = SMB2_OPLOCK_LEVEL_NONE;
+ }
+ smb2_oplock_acquire(sr);
+ }
+
+ /*
+ * Byte range locks
+ */
+ alen = 0;
+ u64p = NULL;
+ if (nvlist_lookup_uint64_array(nvl, "locks", &u64p, &alen) == 0) {
+ uint_t i;
+ uint_t nlocks = alen / 3;
+ struct nvlk *nlp;
+
+ nlp = (struct nvlk *)u64p;
+ for (i = 0; i < nlocks; i++) {
+ status = smb_lock_range(
+ sr,
+ nlp->lk_start,
+ nlp->lk_len,
+ nlp->lk_pid,
+ nlp->lk_type,
+ 0);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) "
+ "get lock %d failed 0x%x",
+ tree->t_resource,
+ str_node->od_name,
+ i, status);
+ }
+ nlp++;
+ }
+ }
+ alen = SMB_OFILE_LSEQ_MAX;
+ u8p = NULL;
+ if (nvlist_lookup_uint8_array(nvl, "lockseq", &u8p, &alen) == 0) {
+ if (alen != SMB_OFILE_LSEQ_MAX) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) "
+ "get lockseq bad len=%d",
+ tree->t_resource,
+ str_node->od_name,
+ alen);
+ } else {
+ mutex_enter(&of->f_mutex);
+ bcopy(u8p, of->f_lock_seq, alen);
+ mutex_exit(&of->f_mutex);
+ }
+ }
+
+ /*
+ * Optional "sticky" times (set pending attributes)
+ */
+ mutex_enter(&of->f_mutex);
+ pa = &of->f_pending_attr;
+ if (nvlist_lookup_hrtime(nvl, "atime", &hrt) == 0) {
+ hrt2ts(hrt, &pa->sa_vattr.va_atime);
+ pa->sa_mask |= SMB_AT_ATIME;
+ }
+ if (nvlist_lookup_hrtime(nvl, "mtime", &hrt) == 0) {
+ hrt2ts(hrt, &pa->sa_vattr.va_mtime);
+ pa->sa_mask |= SMB_AT_MTIME;
+ }
+ if (nvlist_lookup_hrtime(nvl, "ctime", &hrt) == 0) {
+ hrt2ts(hrt, &pa->sa_vattr.va_ctime);
+ pa->sa_mask |= SMB_AT_CTIME;
+ }
+ mutex_exit(&of->f_mutex);
+
+ /*
+ * Make durable and persistent.
+ * See smb2_dh_make_persistent()
+ */
+ of->dh_vers = SMB2_DURABLE_V2;
+ bcopy(op->create_guid, of->dh_create_guid, UUID_LEN);
+ of->dh_persist = B_TRUE;
+ of->dh_nvfile = str_node;
+ smb_node_ref(str_node);
+ of->dh_nvlist = nvl;
+ nvl = NULL;
+
+ /*
+ * Now make it state orphaned...
+ * See smb_ofile_drop(), then
+ * smb_ofile_save_dh()
+ */
+ mutex_enter(&of->f_mutex);
+ of->f_state = SMB_OFILE_STATE_SAVE_DH;
+ of->dh_timeout_offset = MSEC2NSEC(smb2_persist_timeout);
+ mutex_exit(&of->f_mutex);
+
+ /*
+ * Finished!
+ */
+ return (of);
+
+errout:
+ if (did_open) {
+ smb_ofile_close(of, 0);
+ smb_ofile_release(of);
+ } else {
+ ASSERT(of == NULL);
+ }
+
+ if (nvl != NULL)
+ nvlist_free(nvl);
+
+ return (NULL);
+}
+
+static int
+smb2_dh_read_nvlist(smb_request_t *sr, smb_node_t *node,
+ struct nvlist **nvlpp)
+{
+ smb_attr_t attr;
+ iovec_t iov;
+ uio_t uio;
+ smb_kshare_t *shr = sr->arg.tcon.si;
+ cred_t *kcr = zone_kcred();
+ size_t flen;
+ int rc;
+
+ bzero(&attr, sizeof (attr));
+ attr.sa_mask = SMB_AT_SIZE;
+ rc = smb_node_getattr(NULL, node, kcr, NULL, &attr);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) getattr rc=%d",
+ shr->shr_path, node->od_name, rc);
+ return (rc);
+ }
+
+ if (attr.sa_vattr.va_size < 4 ||
+ attr.sa_vattr.va_size > sr->sr_req_length) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) bad size=%" PRIu64,
+ shr->shr_path, node->od_name,
+ (uint64_t)attr.sa_vattr.va_size);
+ return (EINVAL);
+ }
+ flen = (size_t)attr.sa_vattr.va_size;
+
+ bzero(&uio, sizeof (uio));
+ iov.iov_base = sr->sr_request_buf;
+ iov.iov_len = flen;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_resid = flen;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_extflg = UIO_COPY_DEFAULT;
+ rc = smb_fsop_read(sr, kcr, node, NULL, &uio);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) read, rc=%d",
+ shr->shr_path, node->od_name, rc);
+ return (rc);
+ }
+ if (uio.uio_resid != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) short read",
+ shr->shr_path, node->od_name);
+ return (EIO);
+ }
+
+ rc = nvlist_unpack(sr->sr_request_buf, flen, nvlpp, KM_SLEEP);
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "CA import (%s/%s) unpack, rc=%d",
+ shr->shr_path, node->od_name, rc);
+ return (rc);
+ }
+
+ return (0);
+}
+
+/*
+ * Setup a vestigial credential in of->f_cr just good enough for
+ * smb_is_same_user to determine if the caller owned this ofile.
+ * At reconnect, of->f_cr will be replaced with the caller's.
+ */
+static int
+smb2_dh_import_cred(smb_ofile_t *of, char *sidstr)
+{
+#ifdef _FAKE_KERNEL
+ _NOTE(ARGUNUSED(sidstr))
+ /* fksmbd doesn't have real credentials. */
+ of->f_cr = CRED();
+ crhold(of->f_cr);
+#else
+ char tmpstr[SMB_SID_STRSZ];
+ ksid_t ksid;
+ cred_t *cr, *oldcr;
+ int rc;
+
+ (void) strlcpy(tmpstr, sidstr, sizeof (tmpstr));
+ bzero(&ksid, sizeof (ksid));
+
+ rc = smb_sid_splitstr(tmpstr, &ksid.ks_rid);
+ if (rc != 0)
+ return (rc);
+ cr = crget();
+
+ ksid.ks_domain = ksid_lookupdomain(tmpstr);
+ crsetsid(cr, &ksid, KSID_USER);
+ ksiddomain_hold(ksid.ks_domain);
+ crsetsid(cr, &ksid, KSID_OWNER);
+
+ /*
+ * Just to avoid leaving the KSID_GROUP slot NULL,
+ * put the "everyone" SID there (S-1-1-0).
+ */
+ ksid.ks_domain = ksid_lookupdomain("S-1-1");
+ ksid.ks_rid = 0;
+ crsetsid(cr, &ksid, KSID_GROUP);
+
+ oldcr = of->f_cr;
+ of->f_cr = cr;
+ if (oldcr != NULL)
+ crfree(oldcr);
+#endif
+
+ return (0);
+}
+
+/*
+ * Set Delete-on-Close (DoC) on the persistent state file so it will be
+ * removed when the last ref. goes away (in smb2_dh_close_persistent).
+ *
+ * This is called in just two places:
+ * (1) SMB2_close request -- client tells us to destroy the handle.
+ * (2) smb2_dh_expire -- client has forgotten about this handle.
+ * All other (server-initiated) close calls should leave these
+ * persistent state files in the file system.
+ */
+void
+smb2_dh_setdoc_persistent(smb_ofile_t *of)
+{
+ smb_node_t *strnode;
+ uint32_t status;
+
+ mutex_enter(&of->dh_nvlock);
+ if ((strnode = of->dh_nvfile) != NULL)
+ smb_node_ref(strnode);
+ mutex_exit(&of->dh_nvlock);
+
+ if (strnode != NULL) {
+ status = smb_node_set_delete_on_close(strnode,
+ zone_kcred(), SMB_CASE_SENSITIVE);
+ if (status != 0) {
+ cmn_err(CE_WARN, "Can't set DoC on CA file: %s",
+ strnode->od_name);
+ DTRACE_PROBE1(rm__ca__err, smb_ofile_t *, of);
+ }
+ smb_node_release(strnode);
+ }
+}
+
+/*
+ * During ofile close, free the persistent handle state nvlist and
+ * drop our reference to the state file node (which may unlink it
+ * if smb2_dh_setdoc_persistent was called).
+ */
+void
+smb2_dh_close_persistent(smb_ofile_t *of)
+{
+ smb_node_t *strnode;
+ struct nvlist *nvl;
+
+ /*
+ * Clear out nvlist and stream linkage
+ */
+ mutex_enter(&of->dh_nvlock);
+ strnode = of->dh_nvfile;
+ of->dh_nvfile = NULL;
+ nvl = of->dh_nvlist;
+ of->dh_nvlist = NULL;
+ mutex_exit(&of->dh_nvlock);
+
+ if (nvl != NULL)
+ nvlist_free(nvl);
+
+ if (strnode != NULL)
+ smb_node_release(strnode);
+}
+
+/*
+ * Make this durable handle persistent.
+ * If we succeed, set of->dh_persist = TRUE.
+ */
+int
+smb2_dh_make_persistent(smb_request_t *sr, smb_ofile_t *of)
+{
+ char fname[DH_SN_SIZE];
+ char sidstr[SMB_SID_STRSZ];
+ smb_attr_t attr;
+ smb_arg_open_t *op = &sr->arg.open;
+ cred_t *kcr = zone_kcred();
+ smb_node_t *dnode = of->f_tree->t_snode;
+ smb_node_t *fnode = NULL;
+ ksid_t *ksid;
+ int rc;
+
+ ASSERT(of->dh_nvfile == NULL);
+
+ /*
+ * Create the persistent handle nvlist file.
+ * It's a named stream in the share root.
+ */
+ smb2_dh_make_stream_name(fname, sizeof (fname), of->f_persistid);
+
+ bzero(&attr, sizeof (attr));
+ attr.sa_mask = SMB_AT_TYPE | SMB_AT_MODE | SMB_AT_SIZE;
+ attr.sa_vattr.va_type = VREG;
+ attr.sa_vattr.va_mode = 0640;
+ attr.sa_vattr.va_size = 4;
+ rc = smb_fsop_create(sr, kcr, dnode, fname, &attr, &fnode);
+ if (rc != 0)
+ return (rc);
+
+ mutex_enter(&of->dh_nvlock);
+
+ /* fnode is held. rele in smb2_dh_close_persistent */
+ of->dh_nvfile = fnode;
+ (void) nvlist_alloc(&of->dh_nvlist, NV_UNIQUE_NAME, KM_SLEEP);
+
+ /*
+ * Want the ksid as a string
+ */
+ ksid = crgetsid(of->f_user->u_cred, KSID_USER);
+ (void) snprintf(sidstr, sizeof (sidstr), "%s-%u",
+ ksid->ks_domain->kd_name, ksid->ks_rid);
+
+ /*
+ * Fill in the fixed parts of the nvlist
+ */
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "info_version", smb2_ca_info_version);
+ (void) nvlist_add_string(of->dh_nvlist,
+ "owner_sid", sidstr);
+ (void) nvlist_add_string(of->dh_nvlist,
+ "share_name", of->f_tree->t_sharename);
+ (void) nvlist_add_uint64(of->dh_nvlist,
+ "file_persistid", of->f_persistid);
+ (void) nvlist_add_uint8_array(of->dh_nvlist,
+ "file_guid", of->dh_create_guid, UUID_LEN);
+ (void) nvlist_add_string(of->dh_nvlist,
+ "client_ipaddr", sr->session->ip_addr_str);
+ (void) nvlist_add_uint8_array(of->dh_nvlist,
+ "client_uuid", sr->session->clnt_uuid, UUID_LEN);
+ (void) nvlist_add_string(of->dh_nvlist,
+ "path_name", op->fqi.fq_path.pn_path);
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "granted_access", of->f_granted_access);
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "share_access", of->f_share_access);
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "create_options", of->f_create_options);
+ if (of->f_lease != NULL) {
+ smb_lease_t *ls = of->f_lease;
+ (void) nvlist_add_uint8_array(of->dh_nvlist,
+ "lease_uuid", ls->ls_key, 16);
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "lease_state", ls->ls_state);
+ (void) nvlist_add_uint16(of->dh_nvlist,
+ "lease_epoch", ls->ls_epoch);
+ (void) nvlist_add_uint16(of->dh_nvlist,
+ "lease_version", ls->ls_version);
+ } else {
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "oplock_state", of->f_oplock.og_state);
+ }
+ mutex_exit(&of->dh_nvlock);
+
+ smb2_dh_update_locks(sr, of);
+
+ /* Tell sr update nvlist file */
+ sr->dh_nvl_dirty = B_TRUE;
+
+ return (0);
+}
+
+void
+smb2_dh_update_nvfile(smb_request_t *sr)
+{
+ smb_attr_t attr;
+ iovec_t iov;
+ uio_t uio;
+ smb_ofile_t *of = sr->fid_ofile;
+ cred_t *kcr = zone_kcred();
+ char *buf = NULL;
+ size_t buflen = 0;
+ uint32_t wcnt;
+ int rc;
+
+ if (of == NULL || of->dh_persist == B_FALSE)
+ return;
+
+ mutex_enter(&of->dh_nvlock);
+ if (of->dh_nvlist == NULL || of->dh_nvfile == NULL) {
+ mutex_exit(&of->dh_nvlock);
+ return;
+ }
+
+ rc = nvlist_size(of->dh_nvlist, &buflen, NV_ENCODE_XDR);
+ if (rc != 0)
+ goto out;
+ buf = kmem_zalloc(buflen, KM_SLEEP);
+
+ rc = nvlist_pack(of->dh_nvlist, &buf, &buflen,
+ NV_ENCODE_XDR, KM_SLEEP);
+ if (rc != 0)
+ goto out;
+
+ bzero(&attr, sizeof (attr));
+ attr.sa_mask = SMB_AT_SIZE;
+ attr.sa_vattr.va_size = buflen;
+ rc = smb_node_setattr(sr, of->dh_nvfile, kcr, NULL, &attr);
+ if (rc != 0)
+ goto out;
+
+ bzero(&uio, sizeof (uio));
+ iov.iov_base = (void *) buf;
+ iov.iov_len = buflen;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_resid = buflen;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_extflg = UIO_COPY_DEFAULT;
+ rc = smb_fsop_write(sr, kcr, of->dh_nvfile,
+ NULL, &uio, &wcnt, 0);
+ if (rc == 0 && wcnt != buflen)
+ rc = EIO;
+
+out:
+ mutex_exit(&of->dh_nvlock);
+
+ if (rc != 0) {
+ cmn_err(CE_WARN,
+ "clnt(%s) failed to update persistent handle, rc=%d",
+ sr->session->ip_addr_str, rc);
+ }
+
+ if (buf != NULL) {
+ kmem_free(buf, buflen);
+ }
+}
+
+/*
+ * Called after f_oplock (and lease) changes
+ * If lease, update: lease_state, lease_epoch
+ * else (oplock) update: oplock_state
+ */
+void
+smb2_dh_update_oplock(smb_request_t *sr, smb_ofile_t *of)
+{
+ smb_lease_t *ls;
+
+ mutex_enter(&of->dh_nvlock);
+ if (of->dh_nvlist == NULL) {
+ mutex_exit(&of->dh_nvlock);
+ return;
+ }
+
+ if (of->f_lease != NULL) {
+ ls = of->f_lease;
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "lease_state", ls->ls_state);
+ (void) nvlist_add_uint16(of->dh_nvlist,
+ "lease_epoch", ls->ls_epoch);
+ } else {
+ (void) nvlist_add_uint32(of->dh_nvlist,
+ "oplock_state", of->f_oplock.og_state);
+ }
+ mutex_exit(&of->dh_nvlock);
+
+ sr->dh_nvl_dirty = B_TRUE;
+}
+
+/*
+ * Save locks from this ofile as an array of uint64_t, where the
+ * elements are triplets: (start, length, (pid << 32) | type)
+ * Note pid should always be zero for SMB2, so we could use
+ * that 32-bit spot for something else if needed.
+ */
+void
+smb2_dh_update_locks(smb_request_t *sr, smb_ofile_t *of)
+{
+ uint8_t lseq[SMB_OFILE_LSEQ_MAX];
+ smb_node_t *node = of->f_node;
+ smb_llist_t *llist = &node->n_lock_list;
+ size_t vec_sz; // storage size
+ uint_t my_cnt = 0;
+ uint64_t *vec = NULL;
+ struct nvlk *nlp;
+ smb_lock_t *lock;
+
+ smb_llist_enter(llist, RW_READER);
+ vec_sz = (llist->ll_count + 1) * sizeof (struct nvlk);
+ vec = kmem_alloc(vec_sz, KM_SLEEP);
+ nlp = (struct nvlk *)vec;
+ for (lock = smb_llist_head(llist);
+ lock != NULL;
+ lock = smb_llist_next(llist, lock)) {
+ if (lock->l_file != of)
+ continue;
+ nlp->lk_start = lock->l_start;
+ nlp->lk_len = lock->l_length;
+ nlp->lk_pid = lock->l_pid;
+ nlp->lk_type = lock->l_type;
+ nlp++;
+ my_cnt++;
+ }
+ smb_llist_exit(llist);
+
+ mutex_enter(&of->f_mutex);
+ bcopy(of->f_lock_seq, lseq, sizeof (lseq));
+ mutex_exit(&of->f_mutex);
+
+ mutex_enter(&of->dh_nvlock);
+ if (of->dh_nvlist != NULL) {
+
+ (void) nvlist_add_uint64_array(of->dh_nvlist,
+ "locks", vec, my_cnt * 3);
+
+ (void) nvlist_add_uint8_array(of->dh_nvlist,
+ "lockseq", lseq, sizeof (lseq));
+ }
+ mutex_exit(&of->dh_nvlock);
+
+ kmem_free(vec, vec_sz);
+
+ sr->dh_nvl_dirty = B_TRUE;
+}
+
+/*
+ * Save "sticky" times
+ */
+void
+smb2_dh_update_times(smb_request_t *sr, smb_ofile_t *of, smb_attr_t *attr)
+{
+ hrtime_t t;
+
+ mutex_enter(&of->dh_nvlock);
+ if (of->dh_nvlist == NULL) {
+ mutex_exit(&of->dh_nvlock);
+ return;
+ }
+
+ if (attr->sa_mask & SMB_AT_ATIME) {
+ t = ts2hrt(&attr->sa_vattr.va_atime);
+ (void) nvlist_add_hrtime(of->dh_nvlist, "atime", t);
+ }
+ if (attr->sa_mask & SMB_AT_MTIME) {
+ t = ts2hrt(&attr->sa_vattr.va_mtime);
+ (void) nvlist_add_hrtime(of->dh_nvlist, "mtime", t);
+ }
+ if (attr->sa_mask & SMB_AT_CTIME) {
+ t = ts2hrt(&attr->sa_vattr.va_ctime);
+ (void) nvlist_add_hrtime(of->dh_nvlist, "ctime", t);
+ }
+ mutex_exit(&of->dh_nvlock);
+
+ sr->dh_nvl_dirty = B_TRUE;
+}
+
+
+/*
* Requirements for ofile found during reconnect (MS-SMB2 3.3.5.9.7):
* - security descriptor must match provided descriptor
*
@@ -332,6 +1474,8 @@ smb2_dh_expire(void *arg)
{
smb_ofile_t *of = (smb_ofile_t *)arg;
+ if (of->dh_persist)
+ smb2_dh_setdoc_persistent(of);
smb_ofile_close(of, 0);
smb_ofile_release(of);
}
@@ -383,9 +1527,96 @@ smb2_durable_timers(smb_server_t *sv)
}
/*
+ * This is called when we're about to add a new open to some node.
+ * If we still have orphaned durable handles on this node, let's
+ * assume the client has lost interest in those and close them,
+ * otherwise we might conflict with our own orphaned handles.
+ *
+ * We need this because we import persistent handles "speculatively"
+ * during share import (before the client ever asks for reconnect).
+ * That allows us to avoid any need for a "create blackout" (or
+ * "grace period") because the imported handles prevent unwanted
+ * conflicting opens from other clients. However, if some client
+ * "forgets" about a persistent handle (*cough* Hyper-V) and tries
+ * a new (conflicting) open instead of a reconnect, that might
+ * fail unless we expire our orphaned durables handle first.
+ *
+ * Logic similar to smb_node_open_check()
+ */
+void
+smb2_dh_close_my_orphans(smb_request_t *sr, smb_ofile_t *new_of)
+{
+ smb_node_t *node = new_of->f_node;
+ smb_ofile_t *of;
+
+ SMB_NODE_VALID(node);
+
+ smb_llist_enter(&node->n_ofile_list, RW_READER);
+ for (of = smb_llist_head(&node->n_ofile_list);
+ of != NULL;
+ of = smb_llist_next(&node->n_ofile_list, of)) {
+
+ /* Same client? */
+ if (of->f_lease != NULL &&
+ bcmp(sr->session->clnt_uuid,
+ of->f_lease->ls_clnt, 16) != 0)
+ continue;
+
+ if (!smb_is_same_user(sr->user_cr, of->f_cr))
+ continue;
+
+ mutex_enter(&of->f_mutex);
+ if (of->f_state == SMB_OFILE_STATE_ORPHANED) {
+ of->f_state = SMB_OFILE_STATE_EXPIRED;
+ /* inline smb_ofile_hold_internal() */
+ of->f_refcnt++;
+ smb_llist_post(&node->n_ofile_list,
+ of, smb2_dh_expire);
+ }
+ mutex_exit(&of->f_mutex);
+ }
+
+ smb_llist_exit(&node->n_ofile_list);
+}
+
+/*
+ * Called for each orphaned DH during shutdown.
+ * Clean out any in-memory state, but leave any
+ * on-disk persistent handle state in place.
+ */
+static void
+smb2_dh_cleanup(void *arg)
+{
+ smb_ofile_t *of = (smb_ofile_t *)arg;
+ smb_node_t *strnode;
+ struct nvlist *nvl;
+
+ /*
+ * Intentionally skip smb2_dh_close_persistent by
+ * clearing dh_nvfile before smb_ofile_close().
+ */
+ mutex_enter(&of->dh_nvlock);
+ strnode = of->dh_nvfile;
+ of->dh_nvfile = NULL;
+ nvl = of->dh_nvlist;
+ of->dh_nvlist = NULL;
+ mutex_exit(&of->dh_nvlock);
+
+ if (nvl != NULL)
+ nvlist_free(nvl);
+
+ if (strnode != NULL)
+ smb_node_release(strnode);
+
+ smb_ofile_close(of, 0);
+ smb_ofile_release(of);
+}
+
+/*
* Clean out durable handles during shutdown.
- * Like, smb2_durable_timers but expire all,
- * and make sure the hash buckets are empty.
+ *
+ * Like, smb2_durable_timers but cleanup only in-memory state,
+ * and leave any persistent state there for later reconnect.
*/
void
smb2_dh_shutdown(smb_server_t *sv)
@@ -410,7 +1641,7 @@ smb2_dh_shutdown(smb_server_t *sv)
of->f_state = SMB_OFILE_STATE_EXPIRED;
/* inline smb_ofile_hold_internal() */
of->f_refcnt++;
- smb_llist_post(bucket, of, smb2_dh_expire);
+ smb_llist_post(bucket, of, smb2_dh_cleanup);
break;
default:
break;
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_lease.c b/usr/src/uts/common/fs/smbsrv/smb2_lease.c
index d2bf4805b3..95d7d9c7f1 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_lease.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_lease.c
@@ -122,11 +122,10 @@ smb_hash_uuid(const uint8_t *uuid)
* Handling the SMB2_CREATE_REQUEST_LEASE Create Context
*/
uint32_t
-smb2_lease_create(smb_request_t *sr)
+smb2_lease_create(smb_request_t *sr, uint8_t *clnt)
{
smb_arg_open_t *op = &sr->arg.open;
uint8_t *key = op->lease_key;
- uint8_t *clnt = sr->session->clnt_uuid;
smb_ofile_t *of = sr->fid_ofile;
smb_hash_t *ht = sr->sr_server->sv_lease_ht;
smb_llist_t *bucket;
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_lock.c b/usr/src/uts/common/fs/smbsrv/smb2_lock.c
index c6e8236cce..cc05f96e75 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_lock.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_lock.c
@@ -142,6 +142,10 @@ smb2_lock(smb_request_t *sr)
status = smb2_locks(sr);
}
+ if (sr->fid_ofile->dh_persist) {
+ smb2_dh_update_locks(sr, sr->fid_ofile);
+ }
+
errout:
sr->smb2_status = status;
DTRACE_SMB2_DONE(op__Lock, smb_request_t *, sr);
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c b/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c
index cbdd5f9fb5..5bc7b01260 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_negotiate.c
@@ -26,8 +26,12 @@ uint32_t smb2srv_capabilities =
SMB2_CAP_DFS |
SMB2_CAP_LEASING |
SMB2_CAP_LARGE_MTU |
+ SMB2_CAP_PERSISTENT_HANDLES |
SMB2_CAP_ENCRYPTION;
+/* These are the only capabilities defined for SMB2.X */
+#define SMB_2X_CAPS (SMB2_CAP_DFS | SMB2_CAP_LEASING | SMB2_CAP_LARGE_MTU)
+
/*
* These are not intended as customer tunables, but dev. & test folks
* might want to adjust them (with caution).
@@ -350,16 +354,26 @@ smb2_negotiate_common(smb_request_t *sr, uint16_t version)
/*
* [MS-SMB2] 3.3.5.4 Receiving an SMB2 NEGOTIATE Request
*
- * Only set CAP_ENCRYPTION if this is 3.0 or 3.0.2 and
- * the client has it set.
+ * The SMB2.x capabilities are returned without regard for
+ * what capabilities the client provided in the request.
+ * The SMB3.x capabilities returned are the traditional
+ * logical AND of server and client capabilities.
+ *
+ * One additional check: If KCF is missing something we
+ * require for encryption, turn off that capability.
*/
-
- if (s->dialect < SMB_VERS_3_0 ||
- !SMB3_CLIENT_ENCRYPTS(sr) ||
- smb3_encrypt_init_mech(s) != 0)
- s->srv_cap = smb2srv_capabilities & ~SMB2_CAP_ENCRYPTION;
- else
- s->srv_cap = smb2srv_capabilities;
+ if (s->dialect < SMB_VERS_3_0) {
+ /* SMB 2.x */
+ s->srv_cap = smb2srv_capabilities & SMB_2X_CAPS;
+ } else {
+ /* SMB 3.0 or later */
+ s->srv_cap = smb2srv_capabilities &
+ (SMB_2X_CAPS | s->capabilities);
+ if ((s->srv_cap & SMB2_CAP_ENCRYPTION) != 0 &&
+ smb3_encrypt_init_mech(s) != 0) {
+ s->srv_cap &= ~SMB2_CAP_ENCRYPTION;
+ }
+ }
/*
* See notes above smb2_max_rwsize, smb2_old_rwsize
diff --git a/usr/src/uts/common/fs/smbsrv/smb2_tree_connect.c b/usr/src/uts/common/fs/smbsrv/smb2_tree_connect.c
index e11a8855f7..34a74f564b 100644
--- a/usr/src/uts/common/fs/smbsrv/smb2_tree_connect.c
+++ b/usr/src/uts/common/fs/smbsrv/smb2_tree_connect.c
@@ -19,6 +19,8 @@
#include <smbsrv/smb2_kproto.h>
+#define SMB2_SHARE_CAP_CA SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY
+
smb_sdrc_t
smb2_tree_connect(smb_request_t *sr)
{
@@ -114,6 +116,10 @@ smb2_tree_connect(smb_request_t *sr)
ShareFlags = 0;
Capabilities = 0;
+ if ((tree->t_flags & SMB_TREE_DFSROOT) != 0)
+ Capabilities |= SMB2_SHARE_CAP_DFS;
+ if ((tree->t_flags & SMB_TREE_CA) != 0)
+ Capabilities |= SMB2_SHARE_CAP_CA;
/*
* SMB2 Tree Connect reply
diff --git a/usr/src/uts/common/fs/smbsrv/smb_common_open.c b/usr/src/uts/common/fs/smbsrv/smb_common_open.c
index 161f2790f6..0ef06a3c3e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_common_open.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_common_open.c
@@ -40,9 +40,6 @@
int smb_session_ofile_max = 32768;
-static volatile uint32_t smb_fids = 0;
-#define SMB_UNIQ_FID() atomic_inc_32_nv(&smb_fids)
-
extern uint32_t smb_is_executable(char *);
static void smb_delete_new_object(smb_request_t *);
static int smb_set_open_attributes(smb_request_t *, smb_ofile_t *);
@@ -280,6 +277,7 @@ smb_common_open(smb_request_t *sr)
boolean_t fnode_shrlk = B_FALSE;
boolean_t did_open = B_FALSE;
boolean_t did_break_handle = B_FALSE;
+ boolean_t did_cleanup_orphans = B_FALSE;
/* Get out now if we've been cancelled. */
mutex_enter(&sr->sr_mutex);
@@ -350,10 +348,9 @@ smb_common_open(smb_request_t *sr)
/*
* Most of IPC open is handled in smb_opipe_open()
*/
- uniq_fid = SMB_UNIQ_FID();
op->create_options = 0;
of = smb_ofile_alloc(sr, op, NULL, SMB_FTYPE_MESG_PIPE,
- tree_fid, uniq_fid);
+ tree_fid);
tree_fid = 0; // given to the ofile
status = smb_opipe_open(sr, of);
smb_threshold_exit(&sv->sv_opipe_ct);
@@ -450,13 +447,6 @@ smb_common_open(smb_request_t *sr)
goto errout;
}
- /*
- * The uniq_fid is a CIFS-server-wide unique identifier for an ofile
- * which is used to uniquely identify open instances for the
- * VFS share reservation and POSIX locks.
- */
- uniq_fid = SMB_UNIQ_FID();
-
if (last_comp_found) {
smb_node_unlock(dnode);
@@ -584,10 +574,14 @@ smb_common_open(smb_request_t *sr)
* affect the sharing checks, and may delete the file due to
* DELETE_ON_CLOSE. This may block, so set the file opening
* count before oplock stuff.
+ *
+ * Need the "proposed" ofile (and its TargetOplockKey) for
+ * correct oplock break semantics.
*/
of = smb_ofile_alloc(sr, op, fnode, SMB_FTYPE_DISK,
- tree_fid, uniq_fid);
+ tree_fid);
tree_fid = 0; // given to the ofile
+ uniq_fid = of->f_uniqid;
smb_node_inc_opening_count(fnode);
opening_incr = B_TRUE;
@@ -683,6 +677,22 @@ smb_common_open(smb_request_t *sr)
}
/*
+ * If we still have orphaned durable handles on this file,
+ * let's assume the client has lost interest in those and
+ * close them so they don't cause sharing violations.
+ * See longer comment at smb2_dh_close_my_orphans().
+ */
+ if (status == NT_STATUS_SHARING_VIOLATION &&
+ sr->session->dialect >= SMB_VERS_2_BASE &&
+ did_cleanup_orphans == B_FALSE) {
+
+ did_cleanup_orphans = B_TRUE;
+ smb2_dh_close_my_orphans(sr, of);
+
+ goto shrlock_again;
+ }
+
+ /*
* SMB1 expects a 1 sec. delay before returning a
* sharing violation error. If breaking oplocks
* above took less than a sec, wait some more.
@@ -904,27 +914,17 @@ create:
goto errout;
}
+ /* Create done. */
smb_node_unlock(dnode);
dnode_wlock = B_FALSE;
created = B_TRUE;
op->action_taken = SMB_OACT_CREATED;
+ /* Note: hold from create */
fnode = op->fqi.fq_fnode;
fnode_held = B_TRUE;
- smb_node_inc_opening_count(fnode);
- opening_incr = B_TRUE;
-
- smb_node_wrlock(fnode);
- fnode_wlock = B_TRUE;
-
- status = smb_fsop_shrlock(sr->user_cr, fnode, uniq_fid,
- op->desired_access, op->share_access);
- if (status != 0)
- goto errout;
- fnode_shrlk = B_TRUE;
-
if (max_requested) {
smb_fsop_eaccess(sr, sr->user_cr, fnode, &max_allowed);
op->desired_access |= max_allowed;
@@ -937,6 +937,27 @@ create:
*/
op->desired_access |= (READ_CONTROL | FILE_READ_ATTRIBUTES);
+ /* Allocate the ofile and fill in most of it. */
+ of = smb_ofile_alloc(sr, op, fnode, SMB_FTYPE_DISK,
+ tree_fid);
+ tree_fid = 0; // given to the ofile
+ uniq_fid = of->f_uniqid;
+
+ smb_node_inc_opening_count(fnode);
+ opening_incr = B_TRUE;
+
+ /*
+ * Share access checks...
+ */
+ smb_node_wrlock(fnode);
+ fnode_wlock = B_TRUE;
+
+ status = smb_fsop_shrlock(sr->user_cr, fnode, uniq_fid,
+ op->desired_access, op->share_access);
+ if (status != 0)
+ goto errout;
+ fnode_shrlk = B_TRUE;
+
/*
* MS-FSA 2.1.5.1.1
* If the Oplock member of the DirectoryStream in
@@ -951,9 +972,6 @@ create:
*
* The break never blocks, so ignore the return.
*/
- of = smb_ofile_alloc(sr, op, fnode, SMB_FTYPE_DISK,
- tree_fid, uniq_fid);
- tree_fid = 0; // given to the ofile
(void) smb_oplock_break_PARENT(dnode, of);
}
@@ -1052,8 +1070,9 @@ create:
errout:
if (did_open) {
smb_ofile_close(of, 0);
- /* Don't also ofile_free */
+ /* rele via sr->fid_ofile */
} else if (of != NULL) {
+ /* No other refs possible */
smb_ofile_free(of);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_cred.c b/usr/src/uts/common/fs/smbsrv/smb_cred.c
index f47f5e72a5..8431db4653 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_cred.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_cred.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -172,3 +172,19 @@ smb_cred_set_sidlist(smb_ids_t *token_grps)
return (lp);
}
+
+/*
+ * Special variant of smb_cred_create() used when we need an
+ * SMB kcred (e.g. DH import). The returned cred must be
+ * from crget() so it can be passed to smb_user_setcred().
+ */
+cred_t *
+smb_kcred_create(void)
+{
+ cred_t *cr;
+
+ cr = crget();
+ ASSERT(cr != NULL);
+
+ return (cr);
+}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
index 6aa4074221..1b7c3a9fa9 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c
@@ -365,6 +365,9 @@ smb_fsop_create(smb_request_t *sr, cred_t *cr, smb_node_t *dnode,
* because we want to set the UID and GID on the named
* stream in this case for consistency with the (unnamed
* stream) file (see comments for smb_vop_setattr()).
+ *
+ * Note that some stream "types" are "restricted" and only
+ * internal callers (cr == kcred) can create those.
*/
static int
smb_fsop_create_stream(smb_request_t *sr, cred_t *cr,
@@ -379,6 +382,9 @@ smb_fsop_create_stream(smb_request_t *sr, cred_t *cr,
int rc = 0;
boolean_t fcreate = B_FALSE;
+ if (cr != kcr && smb_strname_restricted(sname))
+ return (EACCES);
+
/* Look up / create the unnamed stream, fname */
rc = smb_fsop_lookup(sr, cr, flags | SMB_FOLLOW_LINKS,
sr->tid_tree->t_snode, dnode, fname, &fnode);
@@ -663,6 +669,9 @@ smb_fsop_mkdir(
* It is assumed that a reference exists on snode coming into this routine.
*
* A null smb_request might be passed to this function.
+ *
+ * Note that some stream "types" are "restricted" and only
+ * internal callers (cr == kcred) can remove those.
*/
int
smb_fsop_remove(
@@ -698,6 +707,11 @@ smb_fsop_remove(
sname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
if (dnode->flags & NODE_XATTR_DIR) {
+ if (cr != zone_kcred() && smb_strname_restricted(name)) {
+ rc = EACCES;
+ goto out;
+ }
+
fnode = dnode->n_dnode;
rc = smb_vop_stream_remove(fnode->vp, name, flags, cr);
@@ -709,6 +723,11 @@ smb_fsop_remove(
} else if (smb_is_stream_name(name)) {
smb_stream_parse_name(name, fname, sname);
+ if (cr != zone_kcred() && smb_strname_restricted(sname)) {
+ rc = EACCES;
+ goto out;
+ }
+
/*
* Look up the unnamed stream (i.e. fname).
* Unmangle processing will be done on fname
@@ -719,9 +738,7 @@ smb_fsop_remove(
sr->tid_tree->t_snode, dnode, fname, &fnode);
if (rc != 0) {
- kmem_free(fname, MAXNAMELEN);
- kmem_free(sname, MAXNAMELEN);
- return (rc);
+ goto out;
}
/*
@@ -744,9 +761,7 @@ smb_fsop_remove(
if (rc == ENOENT) {
if (!SMB_TREE_SUPPORTS_SHORTNAMES(sr) ||
!smb_maybe_mangled(name)) {
- kmem_free(fname, MAXNAMELEN);
- kmem_free(sname, MAXNAMELEN);
- return (rc);
+ goto out;
}
longname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
@@ -776,6 +791,7 @@ smb_fsop_remove(
}
}
+out:
kmem_free(fname, MAXNAMELEN);
kmem_free(sname, MAXNAMELEN);
@@ -1609,6 +1625,9 @@ smb_fsop_statfs(
* check is performed on the named stream in case it has been
* quarantined. kcred is used to avoid issues with the permissions
* set on the extended attribute file representing the named stream.
+ *
+ * Note that some stream "types" are "restricted" and only
+ * internal callers (cr == kcred) can access those.
*/
int
smb_fsop_access(smb_request_t *sr, cred_t *cr, smb_node_t *snode,
@@ -1639,9 +1658,14 @@ smb_fsop_access(smb_request_t *sr, cred_t *cr, smb_node_t *snode,
unnamed_node = SMB_IS_STREAM(snode);
if (unnamed_node) {
+ cred_t *kcr = zone_kcred();
+
ASSERT(unnamed_node->n_magic == SMB_NODE_MAGIC);
ASSERT(unnamed_node->n_state != SMB_NODE_STATE_DESTROYING);
+ if (cr != kcr && smb_strname_restricted(snode->od_name))
+ return (NT_STATUS_ACCESS_DENIED);
+
/*
* Perform VREAD access check on the named stream in case it
* is quarantined. kcred is passed to smb_vop_access so it
@@ -1649,7 +1673,7 @@ smb_fsop_access(smb_request_t *sr, cred_t *cr, smb_node_t *snode,
*/
if (faccess & (FILE_READ_DATA | FILE_EXECUTE)) {
error = smb_vop_access(snode->vp, VREAD,
- 0, NULL, zone_kcred());
+ 0, NULL, kcr);
if (error)
return (NT_STATUS_ACCESS_DENIED);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_init.c b/usr/src/uts/common/fs/smbsrv/smb_init.c
index 88d804723e..f7e1739367 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_init.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_init.c
@@ -247,7 +247,14 @@ smb_drv_open(dev_t *devp, int flag, int otyp, cred_t *cr)
static int
smb_drv_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
- return (smb_server_delete());
+ smb_server_t *sv;
+ int rc;
+
+ rc = smb_server_lookup(&sv);
+ if (rc == 0)
+ rc = smb_server_delete(sv);
+
+ return (rc);
}
/* ARGSUSED */
diff --git a/usr/src/uts/common/fs/smbsrv/smb_kshare.c b/usr/src/uts/common/fs/smbsrv/smb_kshare.c
index a43c4af02a..5c5458bca5 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_kshare.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_kshare.c
@@ -26,8 +26,9 @@
*/
#include <smbsrv/smb_door.h>
-#include <smbsrv/smb_kproto.h>
#include <smbsrv/smb_ktypes.h>
+#include <smbsrv/smb2_kproto.h>
+#include <smbsrv/smb_kstat.h>
typedef struct smb_unshare {
list_node_t us_lnd;
@@ -36,7 +37,6 @@ typedef struct smb_unshare {
static kmem_cache_t *smb_kshare_cache_share;
static kmem_cache_t *smb_kshare_cache_unexport;
-kmem_cache_t *smb_kshare_cache_vfs;
static int smb_kshare_cmp(const void *, const void *);
static void smb_kshare_hold(const void *);
@@ -294,7 +294,6 @@ smb_export_stop(smb_server_t *sv)
mutex_exit(&sv->sv_export.e_mutex);
smb_avl_destroy(&sv->sv_export.e_share_avl);
- smb_vfs_rele_all(&sv->sv_export);
}
void
@@ -305,18 +304,12 @@ smb_kshare_g_init(void)
smb_kshare_cache_unexport = kmem_cache_create("smb_unexport_cache",
sizeof (smb_unshare_t), 8, NULL, NULL, NULL, NULL, NULL, 0);
-
- smb_kshare_cache_vfs = kmem_cache_create("smb_vfs_cache",
- sizeof (smb_vfs_t), 8, NULL, NULL, NULL, NULL, NULL, 0);
}
void
smb_kshare_init(smb_server_t *sv)
{
- smb_llist_constructor(&sv->sv_export.e_vfs_list, sizeof (smb_vfs_t),
- offsetof(smb_vfs_t, sv_lnd));
-
smb_slist_constructor(&sv->sv_export.e_unexport_list,
sizeof (smb_unshare_t), offsetof(smb_unshare_t, us_lnd));
}
@@ -348,10 +341,6 @@ smb_kshare_fini(smb_server_t *sv)
kmem_cache_free(smb_kshare_cache_unexport, ux);
}
smb_slist_destructor(&sv->sv_export.e_unexport_list);
-
- smb_vfs_rele_all(&sv->sv_export);
-
- smb_llist_destructor(&sv->sv_export.e_vfs_list);
}
void
@@ -359,7 +348,6 @@ smb_kshare_g_fini(void)
{
kmem_cache_destroy(smb_kshare_cache_unexport);
kmem_cache_destroy(smb_kshare_cache_share);
- kmem_cache_destroy(smb_kshare_cache_vfs);
}
@@ -684,10 +672,8 @@ smb_kshare_release(smb_server_t *sv, smb_kshare_t *shr)
/*
* Add the given share in the specified server.
- * If the share is a disk share, smb_vfs_hold() is
- * invoked to ensure that there is a hold on the
- * corresponding file system before the share is
- * added to shares AVL.
+ * If the share is a disk share, lookup the share path
+ * and hold the smb_node_t for the share root.
*
* If the share is an Autohome share and it is
* already in the AVL only a reference count for
@@ -698,7 +684,7 @@ smb_kshare_export(smb_server_t *sv, smb_kshare_t *shr)
{
smb_avl_t *share_avl;
smb_kshare_t *auto_shr;
- vnode_t *vp;
+ smb_node_t *snode = NULL;
int rc = 0;
share_avl = &sv->sv_export.e_share_avl;
@@ -713,36 +699,53 @@ smb_kshare_export(smb_server_t *sv, smb_kshare_t *shr)
}
if ((auto_shr = smb_avl_lookup(share_avl, shr)) != NULL) {
- if ((auto_shr->shr_flags & SMB_SHRF_AUTOHOME) == 0) {
- smb_avl_release(share_avl, auto_shr);
- return (EEXIST);
+ rc = EEXIST;
+ if ((auto_shr->shr_flags & SMB_SHRF_AUTOHOME) != 0) {
+ mutex_enter(&auto_shr->shr_mutex);
+ auto_shr->shr_autocnt++;
+ mutex_exit(&auto_shr->shr_mutex);
+ rc = 0;
}
-
- mutex_enter(&auto_shr->shr_mutex);
- auto_shr->shr_autocnt++;
- mutex_exit(&auto_shr->shr_mutex);
smb_avl_release(share_avl, auto_shr);
- return (0);
+ return (rc);
}
- if ((rc = smb_server_sharevp(sv, shr->shr_path, &vp)) != 0) {
- cmn_err(CE_WARN, "export[%s(%s)]: failed obtaining vnode (%d)",
+ /*
+ * Get the root smb_node_t for this share, held.
+ * This hold is normally released during AVL destroy,
+ * via the element destructor: smb_kshare_destroy
+ */
+ rc = smb_server_share_lookup(sv, shr->shr_path, &snode);
+ if (rc != 0) {
+ cmn_err(CE_WARN, "export[%s(%s)]: lookup failed (%d)",
shr->shr_name, shr->shr_path, rc);
return (rc);
}
- if ((rc = smb_vfs_hold(&sv->sv_export, vp->v_vfsp)) == 0) {
- if ((rc = smb_avl_add(share_avl, shr)) != 0) {
- cmn_err(CE_WARN, "export[%s]: failed caching (%d)",
- shr->shr_name, rc);
- smb_vfs_rele(&sv->sv_export, vp->v_vfsp);
+ shr->shr_root_node = snode;
+ if ((rc = smb_avl_add(share_avl, shr)) != 0) {
+ cmn_err(CE_WARN, "export[%s]: failed caching (%d)",
+ shr->shr_name, rc);
+ shr->shr_root_node = NULL;
+ smb_node_release(snode);
+ return (rc);
+ }
+
+ /*
+ * For CA shares, find or create the CA handle dir,
+ * and (if restarted) import persistent handles.
+ */
+ if ((shr->shr_flags & SMB_SHRF_CA) != 0) {
+ rc = smb2_dh_new_ca_share(sv, shr);
+ if (rc != 0) {
+ /* Just make it a non-CA share. */
+ mutex_enter(&shr->shr_mutex);
+ shr->shr_flags &= ~SMB_SHRF_CA;
+ mutex_exit(&shr->shr_mutex);
+ rc = 0;
}
- } else {
- cmn_err(CE_WARN, "export[%s(%s)]: failed holding VFS (%d)",
- shr->shr_name, shr->shr_path, rc);
}
- VN_RELE(vp);
return (rc);
}
@@ -764,8 +767,6 @@ smb_kshare_unexport(smb_server_t *sv, const char *shrname)
smb_avl_t *share_avl;
smb_kshare_t key;
smb_kshare_t *shr;
- vnode_t *vp;
- int rc;
boolean_t auto_unexport;
share_avl = &sv->sv_export.e_share_avl;
@@ -785,19 +786,12 @@ smb_kshare_unexport(smb_server_t *sv, const char *shrname)
}
}
- if (STYPE_ISDSK(shr->shr_type)) {
- if ((rc = smb_server_sharevp(sv, shr->shr_path, &vp)) != 0) {
- smb_avl_release(share_avl, shr);
- cmn_err(CE_WARN, "unexport[%s]: failed obtaining vnode"
- " (%d)", shrname, rc);
- return (rc);
- }
+ smb_avl_remove(share_avl, shr);
- smb_vfs_rele(&sv->sv_export, vp->v_vfsp);
- VN_RELE(vp);
- }
+ mutex_enter(&shr->shr_mutex);
+ shr->shr_flags |= SMB_SHRF_REMOVED;
+ mutex_exit(&shr->shr_mutex);
- smb_avl_remove(share_avl, shr);
smb_avl_release(share_avl, shr);
return (0);
@@ -892,6 +886,7 @@ smb_kshare_decode(nvlist_t *share)
SMB_SHRF_DFSROOT);
tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_QUOTAS,
SMB_SHRF_QUOTAS);
+ tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_CA, SMB_SHRF_CA);
tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_FSO, SMB_SHRF_FSO);
tmp.shr_flags |= smb_kshare_decode_bool(smb, SHOPT_AUTOHOME,
SMB_SHRF_AUTOHOME);
@@ -1041,6 +1036,11 @@ smb_kshare_destroy(void *p)
ASSERT(shr);
ASSERT(shr->shr_magic == SMB_SHARE_MAGIC);
+ if (shr->shr_ca_dir != NULL)
+ smb_node_release(shr->shr_ca_dir);
+ if (shr->shr_root_node)
+ smb_node_release(shr->shr_root_node);
+
smb_mem_free(shr->shr_name);
smb_mem_free(shr->shr_path);
smb_mem_free(shr->shr_cmnt);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_node.c b/usr/src/uts/common/fs/smbsrv/smb_node.c
index 63756f9037..3e9933d51a 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_node.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_node.c
@@ -88,7 +88,7 @@
* course the state of the node should be tested/updated under the
* protection of the mutex).
*/
-#include <smbsrv/smb_kproto.h>
+#include <smbsrv/smb2_kproto.h>
#include <smbsrv/smb_fsops.h>
#include <smbsrv/smb_kstat.h>
#include <sys/ddi.h>
@@ -1574,10 +1574,20 @@ smb_node_setattr(smb_request_t *sr, smb_node_t *node,
attr->sa_crtime;
mutex_exit(&of->f_mutex);
+
/*
* The f_pending_attr times are reapplied in
* smb_ofile_close().
*/
+
+ /*
+ * If this change is coming directly from a client
+ * (sr != NULL) and it's a persistent handle, save
+ * the "sticky times" in the handle.
+ */
+ if (sr != NULL && of->dh_persist) {
+ smb2_dh_update_times(sr, of, attr);
+ }
}
if ((attr->sa_mask & SMB_AT_ALLOCSZ) != 0) {
diff --git a/usr/src/uts/common/fs/smbsrv/smb_ofile.c b/usr/src/uts/common/fs/smbsrv/smb_ofile.c
index 0142bf9164..531ca314fb 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_ofile.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_ofile.c
@@ -280,11 +280,7 @@
#include <smbsrv/smb2_kproto.h>
#include <smbsrv/smb_fsops.h>
#include <sys/time.h>
-
-/* XXX: May need to actually assign GUIDs for these. */
-/* Don't leak object addresses */
-#define SMB_OFILE_PERSISTID(of) \
- ((uintptr_t)&smb_cache_ofile ^ (uintptr_t)(of))
+#include <sys/random.h>
static boolean_t smb_ofile_is_open_locked(smb_ofile_t *);
static void smb_ofile_delete(void *arg);
@@ -296,6 +292,14 @@ static int smb_ofile_netinfo_init(smb_ofile_t *, smb_netfileinfo_t *);
static void smb_ofile_netinfo_fini(smb_netfileinfo_t *);
/*
+ * The uniq_fid is a CIFS-server-wide unique identifier for an ofile
+ * which is used to uniquely identify open instances for the
+ * VFS share reservation and POSIX locks.
+ */
+static volatile uint32_t smb_fids = 0;
+#define SMB_UNIQ_FID() atomic_inc_32_nv(&smb_fids)
+
+/*
* smb_ofile_alloc
* Allocate an ofile and fill in it's "up" pointers, but
* do NOT link it into the tree's list of ofiles or the
@@ -304,6 +308,9 @@ static void smb_ofile_netinfo_fini(smb_netfileinfo_t *);
*
* If we don't get as far as smb_ofile_open with this OF,
* call smb_ofile_free() to free this object.
+ *
+ * Note: The following sr members may be null during
+ * persistent handle import: session, uid_usr, tid_tree
*/
smb_ofile_t *
smb_ofile_alloc(
@@ -311,10 +318,10 @@ smb_ofile_alloc(
smb_arg_open_t *op,
smb_node_t *node, /* optional (may be NULL) */
uint16_t ftype,
- uint16_t tree_fid,
- uint32_t uniqid)
+ uint16_t tree_fid)
{
- smb_tree_t *tree = sr->tid_tree;
+ smb_user_t *user = sr->uid_user; /* optional */
+ smb_tree_t *tree = sr->tid_tree; /* optional */
smb_ofile_t *of;
of = kmem_cache_alloc(smb_cache_ofile, KM_SLEEP);
@@ -324,22 +331,28 @@ smb_ofile_alloc(
mutex_init(&of->f_mutex, NULL, MUTEX_DEFAULT, NULL);
list_create(&of->f_notify.nc_waiters, sizeof (smb_request_t),
offsetof(smb_request_t, sr_waiters));
+ mutex_init(&of->dh_nvlock, NULL, MUTEX_DEFAULT, NULL);
of->f_state = SMB_OFILE_STATE_ALLOC;
of->f_refcnt = 1;
of->f_ftype = ftype;
of->f_fid = tree_fid;
/* of->f_persistid see smb2_create */
- of->f_uniqid = uniqid;
+ of->f_uniqid = SMB_UNIQ_FID();
of->f_opened_by_pid = sr->smb_pid;
of->f_granted_access = op->desired_access;
of->f_share_access = op->share_access;
of->f_create_options = op->create_options;
- of->f_cr = (op->create_options & FILE_OPEN_FOR_BACKUP_INTENT) ?
- smb_user_getprivcred(sr->uid_user) : sr->uid_user->u_cred;
- crhold(of->f_cr);
- of->f_server = tree->t_server;
- of->f_session = tree->t_session;
+ if (user != NULL) {
+ if ((op->create_options & FILE_OPEN_FOR_BACKUP_INTENT) != 0)
+ of->f_cr = smb_user_getprivcred(user);
+ else
+ of->f_cr = user->u_cred;
+ crhold(of->f_cr);
+ }
+ of->f_server = sr->sr_server;
+ of->f_session = sr->session; /* may be NULL */
+
(void) memset(of->f_lock_seq, -1, SMB_OFILE_LSEQ_MAX);
of->f_mode = smb_fsop_amask_to_omode(of->f_granted_access);
@@ -361,11 +374,15 @@ smb_ofile_alloc(
* held by our caller, until smb_ofile_open puts this
* ofile on the node ofile list with smb_node_add_ofile.
*/
- smb_user_hold_internal(sr->uid_user);
- smb_tree_hold_internal(tree);
- of->f_user = sr->uid_user;
- of->f_tree = tree;
- of->f_node = node;
+ if (user != NULL) {
+ smb_user_hold_internal(user);
+ of->f_user = user;
+ }
+ if (tree != NULL) {
+ smb_tree_hold_internal(tree);
+ of->f_tree = tree;
+ }
+ of->f_node = node; /* may be NULL */
return (of);
}
@@ -448,6 +465,9 @@ smb_ofile_close(smb_ofile_t *of, int32_t mtime_sec)
return;
}
+ /*
+ * Only one thread here (the one that that set f_state closing)
+ */
switch (of->f_ftype) {
case SMB_FTYPE_BYTE_PIPE:
case SMB_FTYPE_MESG_PIPE:
@@ -456,6 +476,8 @@ smb_ofile_close(smb_ofile_t *of, int32_t mtime_sec)
break;
case SMB_FTYPE_DISK:
+ if (of->dh_persist)
+ smb2_dh_close_persistent(of);
if (of->f_persistid != 0)
smb_ofile_del_persistid(of);
if (of->f_lease != NULL)
@@ -961,6 +983,9 @@ smb_ofile_lookup_by_persistid(smb_request_t *sr, uint64_t persistid)
smb_ofile_t *of;
uint_t idx;
+ if (persistid == 0)
+ return (NULL);
+
hash = sr->sr_server->sv_persistid_ht;
idx = smb_hash_uint64(hash, persistid);
bucket = &hash->buckets[idx];
@@ -981,28 +1006,132 @@ smb_ofile_lookup_by_persistid(smb_request_t *sr, uint64_t persistid)
}
/*
- * Create a (unique) persistent ID for a new ofile,
- * and add this ofile to the persistid hash table.
+ * Create a (unique) durable/persistent ID for a new ofile,
+ * and add this ofile to the persistid hash table. This ID
+ * is referred to as the persistent ID in the protocol spec,
+ * so that's what we call it too, though the persistence may
+ * vary. "Durable" handles are persistent across reconnects
+ * but not server reboots. Persistent handles are persistent
+ * across server reboots too.
+ *
+ * Note that persistent IDs need to be unique for the lifetime of
+ * any given ofile. For normal (non-persistent) ofiles we can just
+ * use a persistent ID derived from the ofile memory address, as
+ * these don't ever live beyond the current OS boot lifetime.
+ *
+ * Persistent handles are re-imported after server restart, and
+ * generally have a different memory address after import than
+ * they had in the previous OS boot lifetime, so for these we
+ * use a randomly assigned value that won't conflict with any
+ * non-persistent (durable) handles. Ensuring that a randomly
+ * generated ID is unique requires a search of the ofiles in one
+ * hash bucket, which we'd rather avoid for non-persistent opens.
+ *
+ * The solution used here is to divide the persistent ID space
+ * in half (odd and even values) where durable opens use an ID
+ * derived from the ofile address (which is always even), and
+ * persistent opens use an ID generated randomly (always odd).
+ *
+ * smb_ofile_set_persistid_dh() sets a durable handle ID and
+ * smb_ofile_set_persistid_ph() sets a persistent handle ID.
*/
void
-smb_ofile_set_persistid(smb_ofile_t *of)
+smb_ofile_set_persistid_dh(smb_ofile_t *of)
{
smb_hash_t *hash = of->f_server->sv_persistid_ht;
smb_bucket_t *bucket;
smb_llist_t *ll;
+ uint64_t persistid;
uint_t idx;
- of->f_persistid = SMB_OFILE_PERSISTID(of);
+ persistid = (uintptr_t)of;
+ /* Avoid showing object addresses */
+ persistid ^= ((uintptr_t)&smb_cache_ofile);
+ /* make sure it's even */
+ persistid &= ~((uint64_t)1);
- idx = smb_hash_uint64(hash, of->f_persistid);
+ idx = smb_hash_uint64(hash, persistid);
bucket = &hash->buckets[idx];
ll = &bucket->b_list;
smb_llist_enter(ll, RW_WRITER);
- smb_llist_insert_tail(ll, of);
+ if (of->f_persistid == 0) {
+ of->f_persistid = persistid;
+ smb_llist_insert_tail(ll, of);
+ }
smb_llist_exit(ll);
}
void
+smb_ofile_set_persistid_ph(smb_ofile_t *of)
+{
+ uint64_t persistid;
+ int rc;
+
+top:
+ (void) random_get_pseudo_bytes((uint8_t *)&persistid,
+ sizeof (persistid));
+ if (persistid == 0) {
+ cmn_err(CE_NOTE, "random gave all zeros!");
+ goto top;
+ }
+ /* make sure it's odd */
+ persistid |= (uint64_t)1;
+
+ /*
+ * Try inserting with this persistent ID.
+ */
+ rc = smb_ofile_insert_persistid(of, persistid);
+ if (rc == EEXIST)
+ goto top;
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "set persistid rc=%d", rc);
+ }
+}
+
+/*
+ * Insert an ofile into the persistid hash table.
+ * If the persistent ID is in use, error.
+ */
+int
+smb_ofile_insert_persistid(smb_ofile_t *new_of, uint64_t persistid)
+{
+ smb_hash_t *hash = new_of->f_server->sv_persistid_ht;
+ smb_bucket_t *bucket;
+ smb_llist_t *ll;
+ smb_ofile_t *of;
+ uint_t idx;
+
+ ASSERT(persistid != 0);
+
+ /*
+ * Look to see if this key alreay exists.
+ */
+ idx = smb_hash_uint64(hash, persistid);
+ bucket = &hash->buckets[idx];
+ ll = &bucket->b_list;
+
+ smb_llist_enter(ll, RW_WRITER);
+ of = smb_llist_head(ll);
+ while (of != NULL) {
+ if (of->f_persistid == persistid) {
+ /* already in use */
+ smb_llist_exit(ll);
+ return (EEXIST);
+ }
+ of = smb_llist_next(ll, of);
+ }
+
+ /* Not found, so OK to insert. */
+ if (new_of->f_persistid == 0) {
+ new_of->f_persistid = persistid;
+ smb_llist_insert_tail(ll, new_of);
+ }
+ smb_llist_exit(ll);
+
+ return (0);
+}
+
+void
smb_ofile_del_persistid(smb_ofile_t *of)
{
smb_hash_t *hash = of->f_server->sv_persistid_ht;
@@ -1014,7 +1143,10 @@ smb_ofile_del_persistid(smb_ofile_t *of)
bucket = &hash->buckets[idx];
ll = &bucket->b_list;
smb_llist_enter(ll, RW_WRITER);
- smb_llist_remove(ll, of);
+ if (of->f_persistid != 0) {
+ smb_llist_remove(ll, of);
+ of->f_persistid = 0;
+ }
smb_llist_exit(ll);
}
@@ -1390,6 +1522,7 @@ smb_ofile_free(smb_ofile_t *of)
of->f_magic = (uint32_t)~SMB_OFILE_MAGIC;
list_destroy(&of->f_notify.nc_waiters);
+ mutex_destroy(&of->dh_nvlock);
mutex_destroy(&of->f_mutex);
kmem_cache_free(smb_cache_ofile, of);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_pathname.c b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
index a8f5ae3aa4..fbf003c7c0 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_pathname.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_pathname.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
*/
#include <smbsrv/smb_kproto.h>
@@ -154,7 +154,7 @@ smb_pathname_reduce(
pathname_t ppn;
char *usepath;
int lookup_flags = FOLLOW;
- int trailing_slash = 0;
+ int trailing_slash = 0;
int err = 0;
int len;
smb_node_t *vss_cur_node;
@@ -423,6 +423,10 @@ smb_pathname(smb_request_t *sr, char *path, int flags,
if ((err = pn_set(&pn, namep)) != 0)
break;
+ /* We want the DOS attributes. */
+ bzero(&attr, sizeof (attr));
+ attr.sa_mask = SMB_AT_DOSATTR;
+
local_flags = flags & FIGNORECASE;
err = smb_pathname_lookup(&pn, &rpn, local_flags,
&vp, rootvp, dnode->vp, &attr, cred);
@@ -1066,6 +1070,27 @@ smb_is_stream_name(char *path)
}
/*
+ * Is this stream node a "restricted" type?
+ */
+boolean_t
+smb_strname_restricted(char *strname)
+{
+ char *stype;
+
+ stype = strrchr(strname, ':');
+ if (stype == NULL)
+ return (B_FALSE);
+
+ /*
+ * Only ":$CA" is restricted (for now).
+ */
+ if (strcmp(stype, ":$CA") == 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
* smb_validate_stream_name
*
* B_FALSE will be returned, and the error status ser in the sr, if:
@@ -1079,6 +1104,7 @@ boolean_t
smb_validate_stream_name(smb_request_t *sr, smb_pathname_t *pn)
{
static char *strmtype[] = {
+ "$CA",
"$DATA",
"$INDEX_ALLOCATION"
};
diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c
index 42b6f8defa..6b2390d633 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_server.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_server.c
@@ -20,8 +20,8 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -229,12 +229,12 @@ static void smb_server_fsop_stop(smb_server_t *);
static void smb_event_cancel(smb_server_t *, uint32_t);
static uint32_t smb_event_alloc_txid(void);
-static void smb_server_disconnect_share(smb_llist_t *, const char *);
-static void smb_server_enum_users(smb_llist_t *, smb_svcenum_t *);
-static void smb_server_enum_trees(smb_llist_t *, smb_svcenum_t *);
-static int smb_server_session_disconnect(smb_llist_t *, const char *,
+static void smb_server_disconnect_share(smb_server_t *, const char *);
+static void smb_server_enum_users(smb_server_t *, smb_svcenum_t *);
+static void smb_server_enum_trees(smb_server_t *, smb_svcenum_t *);
+static int smb_server_session_disconnect(smb_server_t *, const char *,
const char *);
-static int smb_server_fclose(smb_llist_t *, uint32_t);
+static int smb_server_fclose(smb_server_t *, uint32_t);
static int smb_server_kstat_update(kstat_t *, int);
static int smb_server_legacy_kstat_update(kstat_t *, int);
static void smb_server_listener_init(smb_server_t *, smb_listener_daemon_t *,
@@ -473,14 +473,8 @@ smb_server_create(void)
* activity associated that server has ceased before destroying it.
*/
int
-smb_server_delete(void)
+smb_server_delete(smb_server_t *sv)
{
- smb_server_t *sv;
- int rc;
-
- rc = smb_server_lookup(&sv);
- if (rc != 0)
- return (rc);
mutex_enter(&sv->sv_mutex);
switch (sv->sv_state) {
@@ -608,6 +602,7 @@ smb_server_start(smb_ioc_start_t *ioc)
int rc = 0;
int family;
smb_server_t *sv;
+ cred_t *ucr;
rc = smb_server_lookup(&sv);
if (rc)
@@ -620,6 +615,31 @@ smb_server_start(smb_ioc_start_t *ioc)
if ((rc = smb_server_fsop_start(sv)) != 0)
break;
+ /*
+ * Note: smb_kshare_start needs sv_session.
+ */
+ sv->sv_session = smb_session_create(NULL, 0, sv, 0);
+ if (sv->sv_session == NULL) {
+ rc = ENOMEM;
+ break;
+ }
+
+ /*
+ * Create a logon on the server session,
+ * used when importing CA shares.
+ */
+ sv->sv_rootuser = smb_user_new(sv->sv_session);
+ ucr = smb_kcred_create();
+ rc = smb_user_logon(sv->sv_rootuser, ucr, "", "root",
+ SMB_USER_FLAG_ADMIN, 0, 0);
+ crfree(ucr);
+ ucr = NULL;
+ if (rc != 0) {
+ cmn_err(CE_NOTE, "smb_server_start: "
+ "failed to create root user");
+ break;
+ }
+
if ((rc = smb_kshare_start(sv)) != 0)
break;
@@ -637,9 +657,8 @@ smb_server_start(smb_ioc_start_t *ioc)
sv->sv_cfg.skc_maxconnections, INT_MAX,
curzone->zone_zsched, TASKQ_DYNAMIC);
- sv->sv_session = smb_session_create(NULL, 0, sv, 0);
-
- if (sv->sv_worker_pool == NULL || sv->sv_session == NULL) {
+ if (sv->sv_worker_pool == NULL ||
+ sv->sv_receiver_pool == NULL) {
rc = ENOMEM;
break;
}
@@ -904,11 +923,11 @@ smb_server_enum(smb_ioc_svcenum_t *ioc)
switch (svcenum->se_type) {
case SMB_SVCENUM_TYPE_USER:
- smb_server_enum_users(&sv->sv_session_list, svcenum);
+ smb_server_enum_users(sv, svcenum);
break;
case SMB_SVCENUM_TYPE_TREE:
case SMB_SVCENUM_TYPE_FILE:
- smb_server_enum_trees(&sv->sv_session_list, svcenum);
+ smb_server_enum_trees(sv, svcenum);
break;
default:
rc = EINVAL;
@@ -924,7 +943,6 @@ smb_server_enum(smb_ioc_svcenum_t *ioc)
int
smb_server_session_close(smb_ioc_session_t *ioc)
{
- smb_llist_t *ll;
smb_server_t *sv;
int cnt;
int rc;
@@ -932,8 +950,7 @@ smb_server_session_close(smb_ioc_session_t *ioc)
if ((rc = smb_server_lookup(&sv)) != 0)
return (rc);
- ll = &sv->sv_session_list;
- cnt = smb_server_session_disconnect(ll, ioc->client, ioc->username);
+ cnt = smb_server_session_disconnect(sv, ioc->client, ioc->username);
smb_server_release(sv);
@@ -949,15 +966,13 @@ int
smb_server_file_close(smb_ioc_fileid_t *ioc)
{
uint32_t uniqid = ioc->uniqid;
- smb_llist_t *ll;
smb_server_t *sv;
int rc;
if ((rc = smb_server_lookup(&sv)) != 0)
return (rc);
- ll = &sv->sv_session_list;
- rc = smb_server_fclose(ll, uniqid);
+ rc = smb_server_fclose(sv, uniqid);
smb_server_release(sv);
return (rc);
@@ -978,17 +993,16 @@ smb_server_get_session_count(smb_server_t *sv)
}
/*
- * Gets the vnode of the specified share path.
- *
- * A hold on the returned vnode pointer is taken so the caller
- * must call VN_RELE.
+ * Gets the smb_node of the specified share path.
+ * Node is returned held (caller must rele.)
*/
int
-smb_server_sharevp(smb_server_t *sv, const char *shr_path, vnode_t **vp)
+smb_server_share_lookup(smb_server_t *sv, const char *shr_path,
+ smb_node_t **nodepp)
{
smb_request_t *sr;
smb_node_t *fnode = NULL;
- smb_node_t *dnode;
+ smb_node_t *dnode = NULL;
char last_comp[MAXNAMELEN];
int rc = 0;
@@ -1025,10 +1039,7 @@ smb_server_sharevp(smb_server_t *sv, const char *shr_path, vnode_t **vp)
ASSERT(fnode->vp && fnode->vp->v_vfsp);
- VN_HOLD(fnode->vp);
- *vp = fnode->vp;
-
- smb_node_release(fnode);
+ *nodepp = fnode;
return (0);
}
@@ -1070,7 +1081,6 @@ int
smb_server_unshare(const char *sharename)
{
smb_server_t *sv;
- smb_llist_t *ll;
int rc;
if ((rc = smb_server_lookup(&sv)))
@@ -1088,8 +1098,7 @@ smb_server_unshare(const char *sharename)
}
mutex_exit(&sv->sv_mutex);
- ll = &sv->sv_session_list;
- smb_server_disconnect_share(ll, sharename);
+ smb_server_disconnect_share(sv, sharename);
smb_server_release(sv);
return (0);
@@ -1100,10 +1109,12 @@ smb_server_unshare(const char *sharename)
* Typically called when a share has been removed.
*/
static void
-smb_server_disconnect_share(smb_llist_t *ll, const char *sharename)
+smb_server_disconnect_share(smb_server_t *sv, const char *sharename)
{
+ smb_llist_t *ll;
smb_session_t *session;
+ ll = &sv->sv_session_list;
smb_llist_enter(ll, RW_READER);
session = smb_llist_head(ll);
@@ -1514,9 +1525,17 @@ smb_server_shutdown(smb_server_t *sv)
* normal sessions, this happens in smb_session_cancel,
* but that's not called for the server session.
*/
+ if (sv->sv_rootuser != NULL) {
+ smb_user_logoff(sv->sv_rootuser);
+ smb_user_release(sv->sv_rootuser);
+ sv->sv_rootuser = NULL;
+ }
if (sv->sv_session != NULL) {
smb_slist_wait_for_empty(&sv->sv_session->s_req_list);
+ /* Just in case import left users and trees */
+ smb_session_logoff(sv->sv_session);
+
smb_session_delete(sv->sv_session);
sv->sv_session = NULL;
}
@@ -1817,8 +1836,9 @@ smb_server_release(smb_server_t *sv)
* Enumerate the users associated with a session list.
*/
static void
-smb_server_enum_users(smb_llist_t *ll, smb_svcenum_t *svcenum)
+smb_server_enum_users(smb_server_t *sv, smb_svcenum_t *svcenum)
{
+ smb_llist_t *ll = &sv->sv_session_list;
smb_session_t *sn;
smb_llist_t *ulist;
smb_user_t *user;
@@ -1859,8 +1879,9 @@ smb_server_enum_users(smb_llist_t *ll, smb_svcenum_t *svcenum)
* Enumerate the trees/files associated with a session list.
*/
static void
-smb_server_enum_trees(smb_llist_t *ll, smb_svcenum_t *svcenum)
+smb_server_enum_trees(smb_server_t *sv, smb_svcenum_t *svcenum)
{
+ smb_llist_t *ll = &sv->sv_session_list;
smb_session_t *sn;
smb_llist_t *tlist;
smb_tree_t *tree;
@@ -1902,9 +1923,10 @@ smb_server_enum_trees(smb_llist_t *ll, smb_svcenum_t *svcenum)
* Empty strings are treated as wildcards.
*/
static int
-smb_server_session_disconnect(smb_llist_t *ll,
+smb_server_session_disconnect(smb_server_t *sv,
const char *client, const char *name)
{
+ smb_llist_t *ll = &sv->sv_session_list;
smb_session_t *sn;
smb_llist_t *ulist;
smb_user_t *user;
@@ -1949,13 +1971,15 @@ smb_server_session_disconnect(smb_llist_t *ll,
* Close a file by its unique id.
*/
static int
-smb_server_fclose(smb_llist_t *ll, uint32_t uniqid)
+smb_server_fclose(smb_server_t *sv, uint32_t uniqid)
{
+ smb_llist_t *ll;
smb_session_t *sn;
smb_llist_t *tlist;
smb_tree_t *tree;
int rc = ENOENT;
+ ll = &sv->sv_session_list;
smb_llist_enter(ll, RW_READER);
sn = smb_llist_head(ll);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c
index 205c21179b..2878df28e7 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_session.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_session.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/atomic.h>
@@ -72,8 +72,6 @@ static int smb_session_reader(smb_session_t *);
static int smb_session_xprt_puthdr(smb_session_t *,
uint8_t msg_type, uint32_t msg_len,
uint8_t *dst, size_t dstlen);
-static smb_tree_t *smb_session_get_tree(smb_session_t *, smb_tree_t *);
-static void smb_session_logoff(smb_session_t *);
static void smb_session_disconnect_trees(smb_session_t *);
static void smb_request_init_command_mbuf(smb_request_t *sr);
static void smb_session_genkey(smb_session_t *);
@@ -752,7 +750,22 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv,
smb_rwx_init(&session->s_lock);
- if (new_so != NULL) {
+ session->s_srqueue = &sv->sv_srqueue;
+ smb_server_get_cfg(sv, &session->s_cfg);
+
+ if (new_so == NULL) {
+ /*
+ * This call is creating the special "server" session,
+ * used for kshare export, oplock breaks, CA import.
+ * CA import creates temporary trees on this session
+ * and those should never get map/unmap up-calls, so
+ * force the map/unmap flags zero on this session.
+ * Set a "modern" dialect for CA import too, so
+ * pathname parse doesn't do OS/2 stuff, etc.
+ */
+ session->s_cfg.skc_execflags = 0;
+ session->dialect = session->s_cfg.skc_max_protocol;
+ } else {
if (family == AF_INET) {
slen = sizeof (sin);
(void) ksocket_getsockname(new_so,
@@ -794,8 +807,6 @@ smb_session_create(ksocket_t new_so, uint16_t port, smb_server_t *sv,
else
smb_server_inc_tcp_sess(sv);
}
- smb_server_get_cfg(sv, &session->s_cfg);
- session->s_srqueue = &sv->sv_srqueue;
/*
* The initial new request handler is special,
@@ -1006,117 +1017,35 @@ smb_session_lookup_tree(
}
/*
- * Find the first connected tree that matches the specified sharename.
- * If the specified tree is NULL the search starts from the beginning of
- * the user's tree list. If a tree is provided the search starts just
- * after that tree.
- */
-smb_tree_t *
-smb_session_lookup_share(
- smb_session_t *session,
- const char *sharename,
- smb_tree_t *tree)
-{
- SMB_SESSION_VALID(session);
- ASSERT(sharename);
-
- smb_llist_enter(&session->s_tree_list, RW_READER);
-
- if (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- tree = smb_llist_next(&session->s_tree_list, tree);
- } else {
- tree = smb_llist_head(&session->s_tree_list);
- }
-
- while (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- if (smb_strcasecmp(tree->t_sharename, sharename, 0) == 0) {
- if (smb_tree_hold(tree)) {
- smb_llist_exit(&session->s_tree_list);
- return (tree);
- }
- }
- tree = smb_llist_next(&session->s_tree_list, tree);
- }
-
- smb_llist_exit(&session->s_tree_list);
- return (NULL);
-}
-
-/*
- * Find the first connected tree that matches the specified volume name.
- * If the specified tree is NULL the search starts from the beginning of
- * the user's tree list. If a tree is provided the search starts just
- * after that tree.
- */
-smb_tree_t *
-smb_session_lookup_volume(
- smb_session_t *session,
- const char *name,
- smb_tree_t *tree)
-{
- SMB_SESSION_VALID(session);
- ASSERT(name);
-
- smb_llist_enter(&session->s_tree_list, RW_READER);
-
- if (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- tree = smb_llist_next(&session->s_tree_list, tree);
- } else {
- tree = smb_llist_head(&session->s_tree_list);
- }
-
- while (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
-
- if (smb_strcasecmp(tree->t_volume, name, 0) == 0) {
- if (smb_tree_hold(tree)) {
- smb_llist_exit(&session->s_tree_list);
- return (tree);
- }
- }
-
- tree = smb_llist_next(&session->s_tree_list, tree);
- }
-
- smb_llist_exit(&session->s_tree_list);
- return (NULL);
-}
-
-/*
* Disconnect all trees that match the specified client process-id.
+ * Used by the SMB1 "process exit" request.
*/
void
smb_session_close_pid(
smb_session_t *session,
uint32_t pid)
{
+ smb_llist_t *tree_list = &session->s_tree_list;
smb_tree_t *tree;
- SMB_SESSION_VALID(session);
+ smb_llist_enter(tree_list, RW_READER);
- tree = smb_session_get_tree(session, NULL);
+ tree = smb_llist_head(tree_list);
while (tree) {
- smb_tree_t *next;
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- smb_tree_close_pid(tree, pid);
- next = smb_session_get_tree(session, tree);
- smb_tree_release(tree);
- tree = next;
+ if (smb_tree_hold(tree)) {
+ smb_tree_close_pid(tree, pid);
+ smb_tree_release(tree);
+ }
+ tree = smb_llist_next(tree_list, tree);
}
+
+ smb_llist_exit(tree_list);
}
static void
-smb_session_tree_dtor(void *t)
+smb_session_tree_dtor(void *arg)
{
- smb_tree_t *tree = (smb_tree_t *)t;
+ smb_tree_t *tree = arg;
smb_tree_disconnect(tree, B_TRUE);
/* release the ref acquired during the traversal loop */
@@ -1167,84 +1096,76 @@ static void
smb_session_disconnect_trees(
smb_session_t *session)
{
- smb_tree_t *tree, *next_tree;
+ smb_llist_t *tree_list = &session->s_tree_list;
+ smb_tree_t *tree;
- SMB_SESSION_VALID(session);
+ smb_llist_enter(tree_list, RW_READER);
- tree = smb_session_get_tree(session, NULL);
+ tree = smb_llist_head(tree_list);
while (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- smb_tree_disconnect(tree, B_TRUE);
- next_tree = smb_session_get_tree(session, tree);
- smb_tree_release(tree);
- tree = next_tree;
+ if (smb_tree_hold(tree)) {
+ smb_llist_post(tree_list, tree,
+ smb_session_tree_dtor);
+ }
+ tree = smb_llist_next(tree_list, tree);
}
+
+ /* drop the lock and flush the dtor queue */
+ smb_llist_exit(tree_list);
}
/*
- * Disconnect all trees that match the specified share name.
+ * Variant of smb_session_tree_dtor that also
+ * cancels requests using this tree.
*/
-void
-smb_session_disconnect_share(
- smb_session_t *session,
- const char *sharename)
+static void
+smb_session_tree_kill(void *arg)
{
- smb_tree_t *tree;
- smb_tree_t *next;
+ smb_tree_t *tree = arg;
- SMB_SESSION_VALID(session);
+ SMB_TREE_VALID(tree);
- tree = smb_session_lookup_share(session, sharename, NULL);
- while (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- ASSERT(tree->t_session == session);
- smb_tree_disconnect(tree, B_TRUE);
- smb_session_cancel_requests(session, tree, NULL);
- next = smb_session_lookup_share(session, sharename, tree);
- smb_tree_release(tree);
- tree = next;
- }
+ smb_tree_disconnect(tree, B_TRUE);
+ smb_session_cancel_requests(tree->t_session, tree, NULL);
+
+ /* release the ref acquired during the traversal loop */
+ smb_tree_release(tree);
}
/*
- * Get the next connected tree in the list. A reference is taken on
- * the tree, which can be released later with smb_tree_release().
- *
- * If the specified tree is NULL the search starts from the beginning of
- * the tree list. If a tree is provided the search starts just after
- * that tree.
- *
- * Returns NULL if there are no connected trees in the list.
+ * Disconnect all trees that match the specified share name,
+ * and kill requests using those trees.
*/
-static smb_tree_t *
-smb_session_get_tree(
+void
+smb_session_disconnect_share(
smb_session_t *session,
- smb_tree_t *tree)
+ const char *sharename)
{
- smb_llist_t *tree_list;
+ smb_llist_t *ll;
+ smb_tree_t *tree;
SMB_SESSION_VALID(session);
- tree_list = &session->s_tree_list;
- smb_llist_enter(tree_list, RW_READER);
+ ll = &session->s_tree_list;
+ smb_llist_enter(ll, RW_READER);
- if (tree) {
- ASSERT3U(tree->t_magic, ==, SMB_TREE_MAGIC);
- tree = smb_llist_next(tree_list, tree);
- } else {
- tree = smb_llist_head(tree_list);
- }
+ for (tree = smb_llist_head(ll);
+ tree != NULL;
+ tree = smb_llist_next(ll, tree)) {
- while (tree) {
- if (smb_tree_hold(tree))
- break;
+ SMB_TREE_VALID(tree);
+ ASSERT(tree->t_session == session);
- tree = smb_llist_next(tree_list, tree);
+ if (smb_strcasecmp(tree->t_sharename, sharename, 0) != 0)
+ continue;
+
+ if (smb_tree_hold(tree)) {
+ smb_llist_post(ll, tree,
+ smb_session_tree_kill);
+ }
}
- smb_llist_exit(tree_list);
- return (tree);
+ smb_llist_exit(ll);
}
/*
@@ -1255,7 +1176,7 @@ smb_session_get_tree(
* disconnect (SMB_SESSION_STATE_DISCONNECTED).
* If client-initiated, save durable handles.
*/
-static void
+void
smb_session_logoff(smb_session_t *session)
{
smb_llist_t *ulist;
@@ -1279,9 +1200,6 @@ top:
// smb_user_hold_internal(user);
user->u_refcnt++;
mutex_exit(&user->u_mutex);
- if (user->u_session->s_state ==
- SMB_SESSION_STATE_DISCONNECTED)
- user->preserve_opens = SMB2_DH_PRESERVE_ALL;
smb_user_logoff(user);
smb_user_release(user);
break;
diff --git a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
index 86ce24c0b0..7c4be2f56e 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
@@ -346,6 +346,11 @@ smb_oplock_async_break(void *arg)
break;
}
+ if (sr->dh_nvl_dirty) {
+ sr->dh_nvl_dirty = B_FALSE;
+ smb2_dh_update_nvfile(sr);
+ }
+
sr->sr_state = SMB_REQ_STATE_COMPLETED;
smb_request_free(sr);
}
@@ -444,6 +449,10 @@ smb_oplock_send_brk(smb_request_t *sr)
if (lease != NULL)
lease->ls_state = NewLevel & CACHE_RWH;
ofile->f_oplock.og_state = NewLevel;
+
+ if (ofile->dh_persist) {
+ smb2_dh_update_oplock(sr, ofile);
+ }
}
/*
@@ -583,6 +592,10 @@ smb_oplock_send_brk(smb_request_t *sr)
if (lease != NULL) {
lease->ls_state = NewLevel & CACHE_RWH;
}
+
+ if (ofile->dh_persist) {
+ smb2_dh_update_oplock(sr, ofile);
+ }
}
/*
diff --git a/usr/src/uts/common/fs/smbsrv/smb_tree.c b/usr/src/uts/common/fs/smbsrv/smb_tree.c
index 5020dec794..aedacf2123 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_tree.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_tree.c
@@ -184,8 +184,6 @@ uint32_t smb_tree_connect_core(smb_request_t *);
uint32_t smb_tree_connect_disk(smb_request_t *, smb_arg_tcon_t *);
uint32_t smb_tree_connect_printq(smb_request_t *, smb_arg_tcon_t *);
uint32_t smb_tree_connect_ipc(smb_request_t *, smb_arg_tcon_t *);
-static smb_tree_t *smb_tree_alloc(smb_request_t *, const smb_kshare_t *,
- smb_node_t *, uint32_t, uint32_t);
static void smb_tree_dealloc(void *);
static boolean_t smb_tree_is_connected_locked(smb_tree_t *);
static char *smb_tree_get_sharename(char *);
@@ -193,9 +191,7 @@ static int smb_tree_getattr(const smb_kshare_t *, smb_node_t *, smb_tree_t *);
static void smb_tree_get_volname(vfs_t *, smb_tree_t *);
static void smb_tree_get_flags(const smb_kshare_t *, vfs_t *, smb_tree_t *);
static void smb_tree_log(smb_request_t *, const char *, const char *, ...);
-static void smb_tree_close_odirs(smb_tree_t *, uint16_t);
-static smb_ofile_t *smb_tree_get_ofile(smb_tree_t *, smb_ofile_t *);
-static smb_odir_t *smb_tree_get_odir(smb_tree_t *, smb_odir_t *);
+static void smb_tree_close_odirs(smb_tree_t *, uint32_t);
static void smb_tree_set_execinfo(smb_tree_t *, smb_shr_execinfo_t *, int);
static int smb_tree_enum_private(smb_tree_t *, smb_svcenum_t *);
static int smb_tree_netinfo_encode(smb_tree_t *, uint8_t *, size_t, uint32_t *);
@@ -303,10 +299,13 @@ out:
/*
* Disconnect a tree.
+ *
+ * The "do_exec" arg is obsolete and ignored.
*/
void
smb_tree_disconnect(smb_tree_t *tree, boolean_t do_exec)
{
+ _NOTE(ARGUNUSED(do_exec))
smb_shr_execinfo_t execinfo;
ASSERT(tree->t_magic == SMB_TREE_MAGIC);
@@ -314,34 +313,27 @@ smb_tree_disconnect(smb_tree_t *tree, boolean_t do_exec)
mutex_enter(&tree->t_mutex);
ASSERT(tree->t_refcnt);
- if (smb_tree_is_connected_locked(tree)) {
- /*
- * Indicate that the disconnect process has started.
- */
- tree->t_state = SMB_TREE_STATE_DISCONNECTING;
+ if (!smb_tree_is_connected_locked(tree)) {
mutex_exit(&tree->t_mutex);
-
- if (do_exec) {
- /*
- * The files opened under this tree are closed.
- */
- smb_ofile_close_all(tree, 0);
- /*
- * The directories opened under this tree are closed.
- */
- smb_tree_close_odirs(tree, 0);
- }
-
- mutex_enter(&tree->t_mutex);
- tree->t_state = SMB_TREE_STATE_DISCONNECTED;
- smb_server_dec_trees(tree->t_server);
+ return;
}
+ /*
+ * Indicate that the disconnect process has started.
+ */
+ tree->t_state = SMB_TREE_STATE_DISCONNECTING;
mutex_exit(&tree->t_mutex);
- if (do_exec && (tree->t_state == SMB_TREE_STATE_DISCONNECTED) &&
- (tree->t_execflags & SMB_EXEC_UNMAP)) {
+ /*
+ * The files opened under this tree are closed.
+ */
+ smb_ofile_close_all(tree, 0);
+ /*
+ * The directories opened under this tree are closed.
+ */
+ smb_tree_close_odirs(tree, 0);
+ if ((tree->t_execflags & SMB_EXEC_UNMAP) != 0) {
smb_tree_set_execinfo(tree, &execinfo, SMB_EXEC_UNMAP);
(void) smb_kshare_exec(tree->t_server, &execinfo);
}
@@ -408,7 +400,7 @@ smb_tree_release(
tree->t_refcnt--;
switch (tree->t_state) {
- case SMB_TREE_STATE_DISCONNECTED:
+ case SMB_TREE_STATE_DISCONNECTING:
if (tree->t_refcnt == 0) {
smb_session_t *ssn = tree->t_session;
tree->t_state = SMB_TREE_STATE_DISCONNECTED;
@@ -417,7 +409,6 @@ smb_tree_release(
}
break;
case SMB_TREE_STATE_CONNECTED:
- case SMB_TREE_STATE_DISCONNECTING:
break;
default:
ASSERT(0);
@@ -463,31 +454,29 @@ smb_tree_has_feature(smb_tree_t *tree, uint32_t flags)
int
smb_tree_enum(smb_tree_t *tree, smb_svcenum_t *svcenum)
{
+ smb_llist_t *of_list;
smb_ofile_t *of;
- smb_ofile_t *next;
int rc = 0;
- ASSERT(tree);
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
-
if (svcenum->se_type == SMB_SVCENUM_TYPE_TREE)
return (smb_tree_enum_private(tree, svcenum));
- of = smb_tree_get_ofile(tree, NULL);
- while (of) {
- ASSERT(of->f_tree == tree);
+ of_list = &tree->t_ofile_list;
+ smb_llist_enter(of_list, RW_READER);
- rc = smb_ofile_enum(of, svcenum);
- if (rc != 0) {
+ of = smb_llist_head(of_list);
+ while (of) {
+ if (smb_ofile_hold(of)) {
+ rc = smb_ofile_enum(of, svcenum);
smb_ofile_release(of);
- break;
}
-
- next = smb_tree_get_ofile(tree, of);
- smb_ofile_release(of);
- of = next;
+ if (rc != 0)
+ break;
+ of = smb_llist_next(of_list, of);
}
+ smb_llist_exit(of_list);
+
return (rc);
}
@@ -662,6 +651,9 @@ smb_tree_chkaccess(smb_request_t *sr, smb_kshare_t *shr, vnode_t *vp)
return (access);
}
+/* How long should tree connect wait for DH import to complete? */
+int smb_tcon_import_wait = 20; /* sec. */
+
/*
* Connect a share for use with files and directories.
*/
@@ -671,16 +663,14 @@ smb_tree_connect_disk(smb_request_t *sr, smb_arg_tcon_t *tcon)
char *sharename = tcon->path;
const char *any = "?????";
smb_user_t *user = sr->uid_user;
- smb_node_t *dnode = NULL;
smb_node_t *snode = NULL;
smb_kshare_t *si = tcon->si;
char *service = tcon->service;
- char last_component[MAXNAMELEN];
smb_tree_t *tree;
- cred_t *kcr;
int rc;
uint32_t access;
smb_shr_execinfo_t execinfo;
+ clock_t time;
ASSERT(user);
ASSERT(user->u_cred);
@@ -694,34 +684,34 @@ smb_tree_connect_disk(smb_request_t *sr, smb_arg_tcon_t *tcon)
/*
* Check that the shared directory exists.
- * Client might not have access to the path _leading_ to the share,
- * so we use "kcred" to get to the share root.
*/
- kcr = zone_kcred();
- rc = smb_pathname_reduce(sr, kcr, si->shr_path, 0, 0, &dnode,
- last_component);
- if (rc == 0) {
- rc = smb_fsop_lookup(sr, kcr, SMB_FOLLOW_LINKS,
- sr->sr_server->si_root_smb_node, dnode, last_component,
- &snode);
-
- smb_node_release(dnode);
- }
-
- if (rc) {
- if (snode)
- smb_node_release(snode);
-
+ snode = si->shr_root_node;
+ if (snode == NULL) {
smb_tree_log(sr, sharename, "bad path: %s", si->shr_path);
return (NT_STATUS_BAD_NETWORK_NAME);
}
if ((access = smb_tree_chkaccess(sr, si, snode->vp)) == 0) {
- smb_node_release(snode);
return (NT_STATUS_ACCESS_DENIED);
}
/*
+ * Wait for DH import of persistent handles to finish.
+ * If we timeout, it's not clear what status to return,
+ * but as the share is not really available yet, let's
+ * return the status for "no such share".
+ */
+ time = SEC_TO_TICK(smb_tcon_import_wait) + ddi_get_lbolt();
+ mutex_enter(&si->shr_mutex);
+ while (si->shr_import_busy != NULL) {
+ if (cv_timedwait(&si->shr_cv, &si->shr_mutex, time) < 0) {
+ mutex_exit(&si->shr_mutex);
+ return (NT_STATUS_BAD_NETWORK_NAME);
+ }
+ }
+ mutex_exit(&si->shr_mutex);
+
+ /*
* Set up the OptionalSupport for this share.
*/
tcon->optional_support = SMB_SUPPORT_SEARCH_BITS;
@@ -758,8 +748,6 @@ smb_tree_connect_disk(smb_request_t *sr, smb_arg_tcon_t *tcon)
tree = smb_tree_alloc(sr, si, snode, access, sr->sr_cfg->skc_execflags);
- smb_node_release(snode);
-
if (tree == NULL)
return (NT_STATUS_INSUFF_SERVER_RESOURCES);
@@ -769,7 +757,17 @@ smb_tree_connect_disk(smb_request_t *sr, smb_arg_tcon_t *tcon)
rc = smb_kshare_exec(tree->t_server, &execinfo);
if ((rc != 0) && (tree->t_execflags & SMB_EXEC_TERM)) {
- smb_tree_disconnect(tree, B_FALSE);
+ /*
+ * Inline parts of: smb_tree_disconnect()
+ * Not using smb_tree_disconnect() for cleanup
+ * here because: we don't want an exec up-call,
+ * and there can't be any opens as we never
+ * returned this TID to the client.
+ */
+ mutex_enter(&tree->t_mutex);
+ tree->t_state = SMB_TREE_STATE_DISCONNECTING;
+ mutex_exit(&tree->t_mutex);
+
smb_tree_release(tree);
return (NT_STATUS_ACCESS_DENIED);
}
@@ -901,7 +899,7 @@ smb_tree_connect_ipc(smb_request_t *sr, smb_arg_tcon_t *tcon)
/*
* Allocate a tree.
*/
-static smb_tree_t *
+smb_tree_t *
smb_tree_alloc(smb_request_t *sr, const smb_kshare_t *si,
smb_node_t *snode, uint32_t access, uint32_t execflags)
{
@@ -1001,6 +999,8 @@ smb_tree_dealloc(void *arg)
ASSERT(tree->t_state == SMB_TREE_STATE_DISCONNECTED);
ASSERT(tree->t_refcnt == 0);
+ smb_server_dec_trees(tree->t_server);
+
session = tree->t_session;
smb_llist_enter(&session->s_tree_list, RW_WRITER);
smb_llist_remove(&session->s_tree_list, tree);
@@ -1199,6 +1199,9 @@ smb_tree_get_flags(const smb_kshare_t *si, vfs_t *vfsp, smb_tree_t *tree)
if (si->shr_flags & SMB_SHRF_ABE)
flags |= SMB_TREE_ABE;
+ if (si->shr_flags & SMB_SHRF_CA)
+ flags |= SMB_TREE_CA;
+
if (si->shr_flags & SMB_SHRF_FSO)
flags |= SMB_TREE_FORCE_L2_OPLOCK;
@@ -1361,83 +1364,6 @@ smb_tree_is_connected(smb_tree_t *tree)
}
/*
- * Get the next open ofile in the list. A reference is taken on
- * the ofile, which can be released later with smb_ofile_release().
- *
- * If the specified ofile is NULL, search from the beginning of the
- * list. Otherwise, the search starts just after that ofile.
- *
- * Returns NULL if there are no open files in the list.
- */
-static smb_ofile_t *
-smb_tree_get_ofile(smb_tree_t *tree, smb_ofile_t *of)
-{
- smb_llist_t *ofile_list;
-
- ASSERT(tree);
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
-
- ofile_list = &tree->t_ofile_list;
- smb_llist_enter(ofile_list, RW_READER);
-
- if (of) {
- ASSERT(of->f_magic == SMB_OFILE_MAGIC);
- of = smb_llist_next(ofile_list, of);
- } else {
- of = smb_llist_head(ofile_list);
- }
-
- while (of) {
- if (smb_ofile_hold(of))
- break;
-
- of = smb_llist_next(ofile_list, of);
- }
-
- smb_llist_exit(ofile_list);
- return (of);
-}
-
-/*
- * smb_tree_get_odir
- *
- * Find the next odir in the tree's list of odirs, and obtain a
- * hold on it.
- * If the specified odir is NULL the search starts at the beginning
- * of the tree's odir list, otherwise the search starts after the
- * specified odir.
- */
-static smb_odir_t *
-smb_tree_get_odir(smb_tree_t *tree, smb_odir_t *od)
-{
- smb_llist_t *od_list;
-
- ASSERT(tree);
- ASSERT(tree->t_magic == SMB_TREE_MAGIC);
-
- od_list = &tree->t_odir_list;
- smb_llist_enter(od_list, RW_READER);
-
- if (od) {
- ASSERT(od->d_magic == SMB_ODIR_MAGIC);
- od = smb_llist_next(od_list, od);
- } else {
- od = smb_llist_head(od_list);
- }
-
- while (od) {
- ASSERT(od->d_magic == SMB_ODIR_MAGIC);
-
- if (smb_odir_hold(od))
- break;
- od = smb_llist_next(od_list, od);
- }
-
- smb_llist_exit(od_list);
- return (od);
-}
-
-/*
* smb_tree_close_odirs
*
* Close all open odirs in the tree's list which were opened by
@@ -1445,25 +1371,34 @@ smb_tree_get_odir(smb_tree_t *tree, smb_odir_t *od)
* If pid is zero, close all open odirs in the tree's list.
*/
static void
-smb_tree_close_odirs(smb_tree_t *tree, uint16_t pid)
+smb_tree_close_odirs(smb_tree_t *tree, uint32_t pid)
{
- smb_odir_t *od, *next_od;
+ smb_llist_t *od_list;
+ smb_odir_t *od;
ASSERT(tree);
ASSERT(tree->t_magic == SMB_TREE_MAGIC);
- od = smb_tree_get_odir(tree, NULL);
- while (od) {
+ od_list = &tree->t_odir_list;
+ smb_llist_enter(od_list, RW_READER);
+
+ for (od = smb_llist_head(od_list);
+ od != NULL;
+ od = smb_llist_next(od_list, od)) {
+
ASSERT(od->d_magic == SMB_ODIR_MAGIC);
ASSERT(od->d_tree == tree);
- next_od = smb_tree_get_odir(tree, od);
- if ((pid == 0) || (od->d_opened_by_pid == pid))
- smb_odir_close(od);
- smb_odir_release(od);
+ if (pid != 0 && od->d_opened_by_pid != pid)
+ continue;
- od = next_od;
+ if (smb_odir_hold(od)) {
+ smb_odir_close(od);
+ smb_odir_release(od);
+ }
}
+
+ smb_llist_exit(od_list);
}
static void
diff --git a/usr/src/uts/common/fs/smbsrv/smb_user.c b/usr/src/uts/common/fs/smbsrv/smb_user.c
index 0bfceb4ff4..74bb502c56 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_user.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_user.c
@@ -303,7 +303,6 @@ smb_user_logon(
* we always have an auth. socket to close.
*/
authsock = user->u_authsock;
- ASSERT(authsock != NULL);
user->u_authsock = NULL;
tmo = user->u_auth_tmo;
user->u_auth_tmo = NULL;
@@ -325,7 +324,8 @@ smb_user_logon(
(void) untimeout(tmo);
/* This close can block, so not under the mutex. */
- smb_authsock_close(user, authsock);
+ if (authsock != NULL)
+ smb_authsock_close(user, authsock);
return (0);
}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_vfs.c b/usr/src/uts/common/fs/smbsrv/smb_vfs.c
deleted file mode 100644
index ae631e4ffa..0000000000
--- a/usr/src/uts/common/fs/smbsrv/smb_vfs.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
- */
-
-#include <sys/vfs.h>
-#include <smbsrv/smb_ktypes.h>
-#include <smbsrv/smb_kproto.h>
-
-static smb_vfs_t *smb_vfs_find(smb_export_t *, vfs_t *);
-static void smb_vfs_destroy(smb_vfs_t *);
-
-/*
- * If a hold on the specified VFS has already been taken
- * then only increment the reference count of the corresponding
- * smb_vfs_t structure. If no smb_vfs_t structure has been created
- * yet for the specified VFS then create one and take a hold on
- * the VFS.
- */
-int
-smb_vfs_hold(smb_export_t *se, vfs_t *vfsp)
-{
- smb_vfs_t *smb_vfs;
- vnode_t *rootvp;
- int rc;
-
- if (se == NULL || vfsp == NULL)
- return (EINVAL);
-
- smb_llist_enter(&se->e_vfs_list, RW_WRITER);
-
- if ((smb_vfs = smb_vfs_find(se, vfsp)) != NULL) {
- smb_vfs->sv_refcnt++;
- DTRACE_PROBE1(smb_vfs_hold_hit, smb_vfs_t *, smb_vfs);
- smb_llist_exit(&se->e_vfs_list);
- return (0);
- }
-
- if ((rc = VFS_ROOT(vfsp, &rootvp)) != 0) {
- smb_llist_exit(&se->e_vfs_list);
- return (rc);
- }
-
- smb_vfs = kmem_cache_alloc(smb_kshare_cache_vfs, KM_SLEEP);
-
- bzero(smb_vfs, sizeof (smb_vfs_t));
-
- smb_vfs->sv_magic = SMB_VFS_MAGIC;
- smb_vfs->sv_refcnt = 1;
- smb_vfs->sv_vfsp = vfsp;
- /*
- * We have a hold on the root vnode of the file system
- * from the VFS_ROOT call above.
- */
- smb_vfs->sv_rootvp = rootvp;
-
- smb_llist_insert_head(&se->e_vfs_list, smb_vfs);
- DTRACE_PROBE1(smb_vfs_hold_miss, smb_vfs_t *, smb_vfs);
- smb_llist_exit(&se->e_vfs_list);
-
- return (0);
-}
-
-/*
- * smb_vfs_rele
- *
- * Decrements the reference count of the fs passed in. If the reference count
- * drops to zero the smb_vfs_t structure associated with the fs is freed.
- */
-void
-smb_vfs_rele(smb_export_t *se, vfs_t *vfsp)
-{
- smb_vfs_t *smb_vfs;
-
- ASSERT(vfsp);
-
- smb_llist_enter(&se->e_vfs_list, RW_WRITER);
- smb_vfs = smb_vfs_find(se, vfsp);
- DTRACE_PROBE1(smb_vfs_release, smb_vfs_t *, smb_vfs);
- if (smb_vfs) {
- ASSERT(smb_vfs->sv_refcnt);
- if (--smb_vfs->sv_refcnt == 0) {
- smb_llist_remove(&se->e_vfs_list, smb_vfs);
- smb_llist_exit(&se->e_vfs_list);
- smb_vfs_destroy(smb_vfs);
- return;
- }
- }
- smb_llist_exit(&se->e_vfs_list);
-}
-
-/*
- * smb_vfs_rele_all()
- *
- * Release all holds on root vnodes of file systems which were taken
- * due to the existence of at least one enabled share on the file system.
- * Called at driver close time.
- */
-void
-smb_vfs_rele_all(smb_export_t *se)
-{
- smb_vfs_t *smb_vfs;
-
- smb_llist_enter(&se->e_vfs_list, RW_WRITER);
- while ((smb_vfs = smb_llist_head(&se->e_vfs_list)) != NULL) {
-
- ASSERT(smb_vfs->sv_magic == SMB_VFS_MAGIC);
- DTRACE_PROBE1(smb_vfs_rele_all_hit, smb_vfs_t *, smb_vfs);
- smb_llist_remove(&se->e_vfs_list, smb_vfs);
- smb_vfs_destroy(smb_vfs);
- }
- smb_llist_exit(&se->e_vfs_list);
-}
-
-/*
- * Goes through the list of smb_vfs_t structure and returns the one matching
- * the vnode passed in. If no match is found a NULL pointer is returned.
- *
- * The list of smb_vfs_t structures has to have been entered prior calling
- * this function.
- */
-static smb_vfs_t *
-smb_vfs_find(smb_export_t *se, vfs_t *vfsp)
-{
- smb_vfs_t *smb_vfs;
-
- smb_vfs = smb_llist_head(&se->e_vfs_list);
- while (smb_vfs) {
- ASSERT(smb_vfs->sv_magic == SMB_VFS_MAGIC);
- if (smb_vfs->sv_vfsp == vfsp)
- return (smb_vfs);
- smb_vfs = smb_llist_next(&se->e_vfs_list, smb_vfs);
- }
-
- return (NULL);
-}
-
-static void
-smb_vfs_destroy(smb_vfs_t *smb_vfs)
-{
- VN_RELE(smb_vfs->sv_rootvp);
- smb_vfs->sv_magic = (uint32_t)~SMB_VFS_MAGIC;
- kmem_cache_free(smb_kshare_cache_vfs, smb_vfs);
-}
diff --git a/usr/src/uts/common/fs/smbsrv/smb_vops.c b/usr/src/uts/common/fs/smbsrv/smb_vops.c
index d2f0fd7085..4b0f99839f 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_vops.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_vops.c
@@ -608,8 +608,14 @@ smb_vop_lookup(
char *np = name;
char namebuf[MAXNAMELEN];
- if (*name == '\0')
- return (EINVAL);
+ if (*name == '\0') {
+ /*
+ * This happens creating named streams at the share root.
+ */
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
ASSERT(vpp);
*vpp = NULL;
diff --git a/usr/src/uts/common/io/vioblk/vioblk.c b/usr/src/uts/common/io/vioblk/vioblk.c
index 074d886857..8801a0e760 100644
--- a/usr/src/uts/common/io/vioblk/vioblk.c
+++ b/usr/src/uts/common/io/vioblk/vioblk.c
@@ -22,9 +22,50 @@
/*
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
- * Copyright 2017, Joyent Inc.
+ * Copyright 2019 Joyent Inc.
*/
+/*
+ * VIRTIO BLOCK DRIVER
+ *
+ * This driver provides support for Virtio Block devices. Each driver instance
+ * attaches to a single underlying block device.
+ *
+ * REQUEST CHAIN LAYOUT
+ *
+ * Every request chain sent to the I/O queue has the following structure. Each
+ * box in the diagram represents a descriptor entry (i.e., a DMA cookie) within
+ * the chain:
+ *
+ * +-0-----------------------------------------+
+ * | struct virtio_blk_hdr |-----------------------\
+ * | (written by driver, read by device) | |
+ * +-1-----------------------------------------+ |
+ * | optional data payload |--\ |
+ * | (written by driver for write requests, | | |
+ * | or by device for read requests) | | |
+ * +-2-----------------------------------------+ | |
+ * | ,~` : |-cookies loaned |
+ * |/ : ,~`| | from blkdev |
+ * : / | | |
+ * +-(N - 1)-----------------------------------+ | |
+ * | ... end of data payload. | | |
+ * | | | |
+ * | |--/ |
+ * +-N-----------------------------------------+ |
+ * | status byte | |
+ * | (written by device, read by driver) |--------------------\ |
+ * +-------------------------------------------+ | |
+ * | |
+ * The memory for the header and status bytes (i.e., 0 and N above) | |
+ * is allocated as a single chunk by vioblk_alloc_reqs(): | |
+ * | |
+ * +-------------------------------------------+ | |
+ * | struct virtio_blk_hdr |<----------------------/
+ * +-------------------------------------------+ |
+ * | status byte |<-------------------/
+ * +-------------------------------------------+
+ */
#include <sys/modctl.h>
#include <sys/blkdev.h>
@@ -43,402 +84,429 @@
#include <sys/debug.h>
#include <sys/pci.h>
#include <sys/containerof.h>
-#include "virtiovar.h"
-#include "virtioreg.h"
-
-/* Feature bits */
-#define VIRTIO_BLK_F_BARRIER (1<<0)
-#define VIRTIO_BLK_F_SIZE_MAX (1<<1)
-#define VIRTIO_BLK_F_SEG_MAX (1<<2)
-#define VIRTIO_BLK_F_GEOMETRY (1<<4)
-#define VIRTIO_BLK_F_RO (1<<5)
-#define VIRTIO_BLK_F_BLK_SIZE (1<<6)
-#define VIRTIO_BLK_F_SCSI (1<<7)
-#define VIRTIO_BLK_F_FLUSH (1<<9)
-#define VIRTIO_BLK_F_TOPOLOGY (1<<10)
-
-/* Configuration registers */
-#define VIRTIO_BLK_CONFIG_CAPACITY 0 /* 64bit */
-#define VIRTIO_BLK_CONFIG_SIZE_MAX 8 /* 32bit */
-#define VIRTIO_BLK_CONFIG_SEG_MAX 12 /* 32bit */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_C 16 /* 16bit */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_H 18 /* 8bit */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_S 19 /* 8bit */
-#define VIRTIO_BLK_CONFIG_BLK_SIZE 20 /* 32bit */
-#define VIRTIO_BLK_CONFIG_TOPO_PBEXP 24 /* 8bit */
-#define VIRTIO_BLK_CONFIG_TOPO_ALIGN 25 /* 8bit */
-#define VIRTIO_BLK_CONFIG_TOPO_MIN_SZ 26 /* 16bit */
-#define VIRTIO_BLK_CONFIG_TOPO_OPT_SZ 28 /* 32bit */
-
-/* Command */
-#define VIRTIO_BLK_T_IN 0
-#define VIRTIO_BLK_T_OUT 1
-#define VIRTIO_BLK_T_SCSI_CMD 2
-#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
-#define VIRTIO_BLK_T_FLUSH 4
-#define VIRTIO_BLK_T_FLUSH_OUT 5
-#define VIRTIO_BLK_T_GET_ID 8
-#define VIRTIO_BLK_T_BARRIER 0x80000000
-
-#define VIRTIO_BLK_ID_BYTES 20 /* devid */
-
-/* Statuses */
-#define VIRTIO_BLK_S_OK 0
-#define VIRTIO_BLK_S_IOERR 1
-#define VIRTIO_BLK_S_UNSUPP 2
-
-#define DEF_MAXINDIRECT (128)
-#define DEF_MAXSECTOR (4096)
-
-#define VIOBLK_POISON 0xdead0001dead0001
+#include <sys/ctype.h>
+#include <sys/sysmacros.h>
-/*
- * Static Variables.
- */
-static char vioblk_ident[] = "VirtIO block driver";
+#include "virtio.h"
+#include "vioblk.h"
-/* Request header structure */
-struct vioblk_req_hdr {
- uint32_t type; /* VIRTIO_BLK_T_* */
- uint32_t ioprio;
- uint64_t sector;
-};
-struct vioblk_req {
- struct vioblk_req_hdr hdr;
- uint8_t status;
- uint8_t unused[3];
- unsigned int ndmac;
- ddi_dma_handle_t dmah;
- ddi_dma_handle_t bd_dmah;
- ddi_dma_cookie_t dmac;
- bd_xfer_t *xfer;
-};
+static void vioblk_get_id(vioblk_t *);
+uint_t vioblk_int_handler(caddr_t, caddr_t);
+static uint_t vioblk_poll(vioblk_t *);
+static int vioblk_quiesce(dev_info_t *);
+static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
+static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
-struct vioblk_stats {
- struct kstat_named sts_rw_outofmemory;
- struct kstat_named sts_rw_badoffset;
- struct kstat_named sts_rw_queuemax;
- struct kstat_named sts_rw_cookiesmax;
- struct kstat_named sts_rw_cacheflush;
- struct kstat_named sts_intr_queuemax;
- struct kstat_named sts_intr_total;
- struct kstat_named sts_io_errors;
- struct kstat_named sts_unsupp_errors;
- struct kstat_named sts_nxio_errors;
+
+static struct dev_ops vioblk_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+
+ .devo_attach = vioblk_attach,
+ .devo_detach = vioblk_detach,
+ .devo_quiesce = vioblk_quiesce,
+
+ .devo_getinfo = ddi_no_info,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_reset = nodev,
+ .devo_cb_ops = NULL,
+ .devo_bus_ops = NULL,
+ .devo_power = NULL,
};
-struct vioblk_lstats {
- uint64_t rw_cacheflush;
- uint64_t intr_total;
- unsigned int rw_cookiesmax;
- unsigned int intr_queuemax;
- unsigned int io_errors;
- unsigned int unsupp_errors;
- unsigned int nxio_errors;
+static struct modldrv vioblk_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "VIRTIO block driver",
+ .drv_dev_ops = &vioblk_dev_ops
};
-struct vioblk_softc {
- dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
- struct virtio_softc sc_virtio;
- struct virtqueue *sc_vq;
- bd_handle_t bd_h;
- struct vioblk_req *sc_reqs;
- struct vioblk_stats *ks_data;
- kstat_t *sc_intrstat;
- uint64_t sc_capacity;
- uint64_t sc_nblks;
- struct vioblk_lstats sc_stats;
- short sc_blkflags;
- boolean_t sc_in_poll_mode;
- boolean_t sc_readonly;
- int sc_blk_size;
- int sc_pblk_size;
- int sc_seg_max;
- int sc_seg_size_max;
- kmutex_t lock_devid;
- kcondvar_t cv_devid;
- char devid[VIRTIO_BLK_ID_BYTES + 1];
+static struct modlinkage vioblk_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &vioblk_modldrv, NULL }
};
-static int vioblk_get_id(struct vioblk_softc *sc);
-
-static int vioblk_read(void *arg, bd_xfer_t *xfer);
-static int vioblk_write(void *arg, bd_xfer_t *xfer);
-static int vioblk_flush(void *arg, bd_xfer_t *xfer);
-static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
-static int vioblk_mediainfo(void *arg, bd_media_t *media);
-static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
-uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
-
-static bd_ops_t vioblk_ops = {
- BD_OPS_VERSION_0,
- vioblk_driveinfo,
- vioblk_mediainfo,
- vioblk_devid_init,
- vioblk_flush,
- vioblk_read,
- vioblk_write,
+/*
+ * DMA attribute template for header and status blocks. We also make a
+ * per-instance copy of this template with negotiated sizes from the device for
+ * blkdev.
+ */
+static const ddi_dma_attr_t vioblk_dma_attr = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = 1,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
};
-static int vioblk_quiesce(dev_info_t *);
-static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
-static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
-static struct dev_ops vioblk_dev_ops = {
- DEVO_REV,
- 0,
- ddi_no_info,
- nulldev, /* identify */
- nulldev, /* probe */
- vioblk_attach, /* attach */
- vioblk_detach, /* detach */
- nodev, /* reset */
- NULL, /* cb_ops */
- NULL, /* bus_ops */
- NULL, /* power */
- vioblk_quiesce /* quiesce */
-};
+static vioblk_req_t *
+vioblk_req_alloc(vioblk_t *vib)
+{
+ vioblk_req_t *vbr;
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
+ if ((vbr = list_remove_head(&vib->vib_reqs)) == NULL) {
+ return (NULL);
+ }
+ vib->vib_nreqs_alloc++;
-/* Standard Module linkage initialization for a Streams driver */
-extern struct mod_ops mod_driverops;
+ VERIFY0(vbr->vbr_status);
+ vbr->vbr_status |= VIOBLK_REQSTAT_ALLOCATED;
-static struct modldrv modldrv = {
- &mod_driverops, /* Type of module. This one is a driver */
- vioblk_ident, /* short description */
- &vioblk_dev_ops /* driver specific ops */
-};
+ VERIFY3P(vbr->vbr_xfer, ==, NULL);
+ VERIFY3S(vbr->vbr_error, ==, 0);
-static struct modlinkage modlinkage = {
- MODREV_1,
- {
- (void *)&modldrv,
- NULL,
- },
-};
+ return (vbr);
+}
-ddi_device_acc_attr_t vioblk_attr = {
- DDI_DEVICE_ATTR_V0,
- DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
- DDI_STORECACHING_OK_ACC,
- DDI_DEFAULT_ACC
-};
+static void
+vioblk_req_free(vioblk_t *vib, vioblk_req_t *vbr)
+{
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
-/* DMA attr for the header/status blocks. */
-static ddi_dma_attr_t vioblk_req_dma_attr = {
- DMA_ATTR_V0, /* dma_attr version */
- 0, /* dma_attr_addr_lo */
- 0xFFFFFFFFFFFFFFFFull, /* dma_attr_addr_hi */
- 0x00000000FFFFFFFFull, /* dma_attr_count_max */
- 1, /* dma_attr_align */
- 1, /* dma_attr_burstsizes */
- 1, /* dma_attr_minxfer */
- 0xFFFFFFFFull, /* dma_attr_maxxfer */
- 0xFFFFFFFFFFFFFFFFull, /* dma_attr_seg */
- 1, /* dma_attr_sgllen */
- 1, /* dma_attr_granular */
- 0, /* dma_attr_flags */
-};
+ /*
+ * Check that this request was allocated, then zero the status field to
+ * clear all status bits.
+ */
+ VERIFY(vbr->vbr_status & VIOBLK_REQSTAT_ALLOCATED);
+ vbr->vbr_status = 0;
-/* DMA attr for the data blocks. */
-static ddi_dma_attr_t vioblk_bd_dma_attr = {
- DMA_ATTR_V0, /* dma_attr version */
- 0, /* dma_attr_addr_lo */
- 0xFFFFFFFFFFFFFFFFull, /* dma_attr_addr_hi */
- 0x00000000FFFFFFFFull, /* dma_attr_count_max */
- 1, /* dma_attr_align */
- 1, /* dma_attr_burstsizes */
- 1, /* dma_attr_minxfer */
- 0, /* dma_attr_maxxfer, set in attach */
- 0xFFFFFFFFFFFFFFFFull, /* dma_attr_seg */
- 0, /* dma_attr_sgllen, set in attach */
- 1, /* dma_attr_granular */
- 0, /* dma_attr_flags */
-};
+ vbr->vbr_xfer = NULL;
+ vbr->vbr_error = 0;
+ vbr->vbr_type = 0;
-static int
-vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
- uint32_t len)
+ list_insert_head(&vib->vib_reqs, vbr);
+
+ VERIFY3U(vib->vib_nreqs_alloc, >, 0);
+ vib->vib_nreqs_alloc--;
+}
+
+static void
+vioblk_complete(vioblk_t *vib, vioblk_req_t *vbr)
{
- struct vioblk_req *req;
- struct vq_entry *ve_hdr;
- int total_cookies, write;
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
- write = (type == VIRTIO_BLK_T_OUT ||
- type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
- total_cookies = 2;
+ VERIFY(!(vbr->vbr_status & VIOBLK_REQSTAT_COMPLETE));
+ vbr->vbr_status |= VIOBLK_REQSTAT_COMPLETE;
- if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
- sc->ks_data->sts_rw_badoffset.value.ui64++;
- return (EINVAL);
+ if (vbr->vbr_type == VIRTIO_BLK_T_FLUSH) {
+ vib->vib_stats->vbs_rw_cacheflush.value.ui64++;
}
- /* allocate top entry */
- ve_hdr = vq_alloc_entry(sc->sc_vq);
- if (!ve_hdr) {
- sc->ks_data->sts_rw_outofmemory.value.ui64++;
- return (ENOMEM);
+ if (vbr->vbr_xfer != NULL) {
+ /*
+ * This is a blkdev framework request.
+ */
+ mutex_exit(&vib->vib_mutex);
+ bd_xfer_done(vbr->vbr_xfer, vbr->vbr_error);
+ mutex_enter(&vib->vib_mutex);
+ vbr->vbr_xfer = NULL;
}
+}
- /* getting request */
- req = &sc->sc_reqs[ve_hdr->qe_index];
- req->hdr.type = type;
- req->hdr.ioprio = 0;
- req->hdr.sector = xfer->x_blkno;
- req->xfer = xfer;
-
- /* Header */
- virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
- sizeof (struct vioblk_req_hdr), B_TRUE);
-
- /* Payload */
- if (len > 0) {
- virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
- xfer->x_ndmac, write ? B_TRUE : B_FALSE);
- total_cookies += xfer->x_ndmac;
+static virtio_chain_t *
+vioblk_common_start(vioblk_t *vib, int type, uint64_t sector,
+ boolean_t polled)
+{
+ vioblk_req_t *vbr = NULL;
+ virtio_chain_t *vic = NULL;
+
+ if ((vbr = vioblk_req_alloc(vib)) == NULL) {
+ vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
+ return (NULL);
+ }
+ vbr->vbr_type = type;
+
+ if (polled) {
+ /*
+ * Mark this command as polled so that we can wait on it
+ * ourselves.
+ */
+ vbr->vbr_status |= VIOBLK_REQSTAT_POLLED;
}
- /* Status */
- virtio_ve_add_indirect_buf(ve_hdr,
- req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
- sizeof (uint8_t), B_FALSE);
+ if ((vic = virtio_chain_alloc(vib->vib_vq, KM_NOSLEEP)) == NULL) {
+ vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
+ goto fail;
+ }
- /* sending the whole chain to the device */
- virtio_push_chain(ve_hdr, B_TRUE);
+ struct vioblk_req_hdr vbh;
+ vbh.vbh_type = type;
+ vbh.vbh_ioprio = 0;
+ vbh.vbh_sector = sector;
+ bcopy(&vbh, virtio_dma_va(vbr->vbr_dma, 0), sizeof (vbh));
- if (sc->sc_stats.rw_cookiesmax < total_cookies)
- sc->sc_stats.rw_cookiesmax = total_cookies;
+ virtio_chain_data_set(vic, vbr);
- return (DDI_SUCCESS);
+ /*
+ * Put the header in the first descriptor. See the block comment at
+ * the top of the file for more details on the chain layout.
+ */
+ if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0),
+ sizeof (struct vioblk_req_hdr), VIRTIO_DIR_DEVICE_READS) !=
+ DDI_SUCCESS) {
+ goto fail;
+ }
+
+ return (vic);
+
+fail:
+ vbr->vbr_xfer = NULL;
+ vioblk_req_free(vib, vbr);
+ if (vic != NULL) {
+ virtio_chain_free(vic);
+ }
+ return (NULL);
}
-/*
- * Now in polling mode. Interrupts are off, so we
- * 1) poll for the already queued requests to complete.
- * 2) push our request.
- * 3) wait for our request to complete.
- */
static int
-vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
- int type, uint32_t len)
+vioblk_common_submit(vioblk_t *vib, virtio_chain_t *vic)
{
- clock_t tmout;
- int ret;
+ int r;
+ vioblk_req_t *vbr = virtio_chain_data(vic);
- ASSERT(xfer->x_flags & BD_XFER_POLL);
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
- /* Prevent a hard hang. */
- tmout = drv_usectohz(30000000);
-
- /* Poll for an empty queue */
- while (vq_num_used(sc->sc_vq)) {
- /* Check if any pending requests completed. */
- ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
- if (ret != DDI_INTR_CLAIMED) {
- drv_usecwait(10);
- tmout -= 10;
- return (ETIMEDOUT);
- }
+ /*
+ * The device will write the status byte into this last descriptor.
+ * See the block comment at the top of the file for more details on the
+ * chain layout.
+ */
+ if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0) +
+ sizeof (struct vioblk_req_hdr), sizeof (uint8_t),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ r = ENOMEM;
+ goto out;
}
- ret = vioblk_rw(sc, xfer, type, len);
- if (ret)
- return (ret);
+ virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORDEV);
+ virtio_chain_submit(vic, B_TRUE);
+
+ if (!(vbr->vbr_status & VIOBLK_REQSTAT_POLLED)) {
+ /*
+ * This is not a polled request. Our request will be freed and
+ * the caller notified later in vioblk_poll().
+ */
+ return (0);
+ }
- tmout = drv_usectohz(30000000);
- /* Poll for an empty queue again. */
- while (vq_num_used(sc->sc_vq)) {
- /* Check if any pending requests completed. */
- ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
- if (ret != DDI_INTR_CLAIMED) {
+ /*
+ * This is a polled request. We need to block here and wait for the
+ * device to complete request processing.
+ */
+ while (!(vbr->vbr_status & VIOBLK_REQSTAT_POLL_COMPLETE)) {
+ if (ddi_in_panic()) {
+ /*
+ * When panicking, interrupts are disabled. We must
+ * poll the queue manually.
+ */
drv_usecwait(10);
- tmout -= 10;
- return (ETIMEDOUT);
+ (void) vioblk_poll(vib);
+ continue;
}
+
+ /*
+ * When not panicking, the device will interrupt on command
+ * completion and vioblk_poll() will be called to wake us up.
+ */
+ cv_wait(&vib->vib_cv, &vib->vib_mutex);
}
- return (DDI_SUCCESS);
+ vioblk_complete(vib, vbr);
+ r = vbr->vbr_error;
+
+out:
+ vioblk_req_free(vib, vbr);
+ virtio_chain_free(vic);
+ return (r);
}
static int
-vioblk_read(void *arg, bd_xfer_t *xfer)
+vioblk_internal(vioblk_t *vib, int type, virtio_dma_t *dma,
+ uint64_t sector, virtio_direction_t dir)
{
- int ret;
- struct vioblk_softc *sc = (void *)arg;
+ virtio_chain_t *vic;
+ vioblk_req_t *vbr;
+ int r;
- if (xfer->x_flags & BD_XFER_POLL) {
- if (!sc->sc_in_poll_mode) {
- virtio_stop_vq_intr(sc->sc_vq);
- sc->sc_in_poll_mode = 1;
- }
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
- ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
- xfer->x_nblks * DEV_BSIZE);
- } else {
- if (sc->sc_in_poll_mode) {
- virtio_start_vq_intr(sc->sc_vq);
- sc->sc_in_poll_mode = 0;
- }
+ /*
+ * Allocate a polled request.
+ */
+ if ((vic = vioblk_common_start(vib, type, sector, B_TRUE)) == NULL) {
+ return (ENOMEM);
+ }
+ vbr = virtio_chain_data(vic);
- ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
- xfer->x_nblks * DEV_BSIZE);
+ /*
+ * If there is a request payload, it goes between the header and the
+ * status byte. See the block comment at the top of the file for more
+ * detail on the chain layout.
+ */
+ if (dma != NULL) {
+ for (uint_t n = 0; n < virtio_dma_ncookies(dma); n++) {
+ if (virtio_chain_append(vic,
+ virtio_dma_cookie_pa(dma, n),
+ virtio_dma_cookie_size(dma, n), dir) !=
+ DDI_SUCCESS) {
+ r = ENOMEM;
+ goto out;
+ }
+ }
}
- return (ret);
+ return (vioblk_common_submit(vib, vic));
+
+out:
+ vioblk_req_free(vib, vbr);
+ virtio_chain_free(vic);
+ return (r);
}
static int
-vioblk_write(void *arg, bd_xfer_t *xfer)
+vioblk_request(vioblk_t *vib, bd_xfer_t *xfer, int type)
{
- int ret;
- struct vioblk_softc *sc = (void *)arg;
+ virtio_chain_t *vic = NULL;
+ vioblk_req_t *vbr = NULL;
+ uint_t total_cookies = 2;
+ boolean_t polled = (xfer->x_flags & BD_XFER_POLL) != 0;
+ int r;
- if (xfer->x_flags & BD_XFER_POLL) {
- if (!sc->sc_in_poll_mode) {
- virtio_stop_vq_intr(sc->sc_vq);
- sc->sc_in_poll_mode = 1;
- }
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
+
+ /*
+ * Ensure that this request falls within the advertised size of the
+ * block device. Be careful to avoid overflow.
+ */
+ if (xfer->x_nblks > SIZE_MAX - xfer->x_blkno ||
+ (xfer->x_blkno + xfer->x_nblks) > vib->vib_nblks) {
+ vib->vib_stats->vbs_rw_badoffset.value.ui64++;
+ return (EINVAL);
+ }
- ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
- xfer->x_nblks * DEV_BSIZE);
- } else {
- if (sc->sc_in_poll_mode) {
- virtio_start_vq_intr(sc->sc_vq);
- sc->sc_in_poll_mode = 0;
+ if ((vic = vioblk_common_start(vib, type, xfer->x_blkno, polled)) ==
+ NULL) {
+ return (ENOMEM);
+ }
+ vbr = virtio_chain_data(vic);
+ vbr->vbr_xfer = xfer;
+
+ /*
+ * If there is a request payload, it goes between the header and the
+ * status byte. See the block comment at the top of the file for more
+ * detail on the chain layout.
+ */
+ if ((type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT) &&
+ xfer->x_nblks > 0) {
+ virtio_direction_t dir = (type == VIRTIO_BLK_T_OUT) ?
+ VIRTIO_DIR_DEVICE_READS : VIRTIO_DIR_DEVICE_WRITES;
+
+ for (uint_t n = 0; n < xfer->x_ndmac; n++) {
+ ddi_dma_cookie_t dmac;
+
+ if (n == 0) {
+ /*
+ * The first cookie is in the blkdev request.
+ */
+ dmac = xfer->x_dmac;
+ } else {
+ ddi_dma_nextcookie(xfer->x_dmah, &dmac);
+ }
+
+ if (virtio_chain_append(vic, dmac.dmac_laddress,
+ dmac.dmac_size, dir) != DDI_SUCCESS) {
+ r = ENOMEM;
+ goto fail;
+ }
}
- ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
- xfer->x_nblks * DEV_BSIZE);
+ total_cookies += xfer->x_ndmac;
+
+ } else if (xfer->x_nblks > 0) {
+ dev_err(vib->vib_dip, CE_PANIC,
+ "request of type %d had payload length of %lu blocks", type,
+ xfer->x_nblks);
+ }
+
+ if (vib->vib_stats->vbs_rw_cookiesmax.value.ui32 < total_cookies) {
+ vib->vib_stats->vbs_rw_cookiesmax.value.ui32 = total_cookies;
}
- return (ret);
+
+ return (vioblk_common_submit(vib, vic));
+
+fail:
+ vbr->vbr_xfer = NULL;
+ vioblk_req_free(vib, vbr);
+ virtio_chain_free(vic);
+ return (r);
}
static int
-vioblk_flush(void *arg, bd_xfer_t *xfer)
+vioblk_bd_read(void *arg, bd_xfer_t *xfer)
{
- int ret;
- struct vioblk_softc *sc = (void *)arg;
+ vioblk_t *vib = arg;
+ int r;
+
+ mutex_enter(&vib->vib_mutex);
+ r = vioblk_request(vib, xfer, VIRTIO_BLK_T_IN);
+ mutex_exit(&vib->vib_mutex);
- ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
+ return (r);
+}
- ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
- xfer->x_nblks * DEV_BSIZE);
+static int
+vioblk_bd_write(void *arg, bd_xfer_t *xfer)
+{
+ vioblk_t *vib = arg;
+ int r;
- if (!ret)
- sc->sc_stats.rw_cacheflush++;
+ mutex_enter(&vib->vib_mutex);
+ r = vioblk_request(vib, xfer, VIRTIO_BLK_T_OUT);
+ mutex_exit(&vib->vib_mutex);
- return (ret);
+ return (r);
}
+static int
+vioblk_bd_flush(void *arg, bd_xfer_t *xfer)
+{
+ vioblk_t *vib = arg;
+ int r;
+
+ mutex_enter(&vib->vib_mutex);
+ if (!virtio_feature_present(vib->vib_virtio, VIRTIO_BLK_F_FLUSH)) {
+ /*
+ * We don't really expect to get here, because if we did not
+ * negotiate the flush feature we would not have installed this
+ * function in the blkdev ops vector.
+ */
+ mutex_exit(&vib->vib_mutex);
+ return (ENOTSUP);
+ }
+
+ r = vioblk_request(vib, xfer, VIRTIO_BLK_T_FLUSH);
+ mutex_exit(&vib->vib_mutex);
+
+ return (r);
+}
static void
-vioblk_driveinfo(void *arg, bd_drive_t *drive)
+vioblk_bd_driveinfo(void *arg, bd_drive_t *drive)
{
- struct vioblk_softc *sc = (void *)arg;
+ vioblk_t *vib = arg;
- drive->d_qsize = sc->sc_vq->vq_num;
+ drive->d_qsize = vib->vib_reqs_capacity;
drive->d_removable = B_FALSE;
drive->d_hotpluggable = B_TRUE;
drive->d_target = 0;
@@ -450,8 +518,7 @@ vioblk_driveinfo(void *arg, bd_drive_t *drive)
drive->d_product = "Block Device";
drive->d_product_len = strlen(drive->d_product);
- (void) vioblk_get_id(sc);
- drive->d_serial = sc->devid;
+ drive->d_serial = vib->vib_devid;
drive->d_serial_len = strlen(drive->d_serial);
drive->d_revision = "0000";
@@ -459,618 +526,501 @@ vioblk_driveinfo(void *arg, bd_drive_t *drive)
}
static int
-vioblk_mediainfo(void *arg, bd_media_t *media)
+vioblk_bd_mediainfo(void *arg, bd_media_t *media)
{
- struct vioblk_softc *sc = (void *)arg;
+ vioblk_t *vib = (void *)arg;
- media->m_nblks = sc->sc_nblks;
- media->m_blksize = sc->sc_blk_size;
- media->m_readonly = sc->sc_readonly;
- media->m_pblksize = sc->sc_pblk_size;
+ /*
+ * The device protocol is specified in terms of 512 byte logical
+ * blocks, regardless of the recommended I/O size which might be
+ * larger.
+ */
+ media->m_nblks = vib->vib_nblks;
+ media->m_blksize = DEV_BSIZE;
+
+ media->m_readonly = vib->vib_readonly;
+ media->m_pblksize = vib->vib_pblk_size;
return (0);
}
-static int
-vioblk_get_id(struct vioblk_softc *sc)
+static void
+vioblk_get_id(vioblk_t *vib)
{
- clock_t deadline;
- int ret;
- bd_xfer_t xfer;
-
- deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
- (void) memset(&xfer, 0, sizeof (bd_xfer_t));
- xfer.x_nblks = 1;
-
- ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
- DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
- if (ret != DDI_SUCCESS)
- goto out_alloc;
-
- ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
- VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
- DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
- if (ret != DDI_DMA_MAPPED) {
- ret = DDI_FAILURE;
- goto out_map;
- }
+ virtio_dma_t *dma;
+ int r;
- mutex_enter(&sc->lock_devid);
-
- ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
- VIRTIO_BLK_ID_BYTES);
- if (ret) {
- mutex_exit(&sc->lock_devid);
- goto out_rw;
+ if ((dma = virtio_dma_alloc(vib->vib_virtio, VIRTIO_BLK_ID_BYTES,
+ &vioblk_dma_attr, DDI_DMA_CONSISTENT | DDI_DMA_READ,
+ KM_SLEEP)) == NULL) {
+ return;
}
- /* wait for reply */
- ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
- mutex_exit(&sc->lock_devid);
-
- (void) ddi_dma_unbind_handle(xfer.x_dmah);
- ddi_dma_free_handle(&xfer.x_dmah);
+ mutex_enter(&vib->vib_mutex);
+ if ((r = vioblk_internal(vib, VIRTIO_BLK_T_GET_ID, dma, 0,
+ VIRTIO_DIR_DEVICE_WRITES)) == 0) {
+ const char *b = virtio_dma_va(dma, 0);
+ uint_t pos = 0;
- /* timeout */
- if (ret < 0) {
- dev_err(sc->sc_dev, CE_WARN,
- "Cannot get devid from the device");
- return (DDI_FAILURE);
- }
-
- return (0);
+ /*
+ * Save the entire response for debugging purposes.
+ */
+ bcopy(virtio_dma_va(dma, 0), vib->vib_rawid,
+ VIRTIO_BLK_ID_BYTES);
-out_rw:
- (void) ddi_dma_unbind_handle(xfer.x_dmah);
-out_map:
- ddi_dma_free_handle(&xfer.x_dmah);
-out_alloc:
- return (ret);
-}
+ /*
+ * Process the returned ID.
+ */
+ bzero(vib->vib_devid, sizeof (vib->vib_devid));
+ for (uint_t n = 0; n < VIRTIO_BLK_ID_BYTES; n++) {
+ if (isalnum(b[n]) || b[n] == '-' || b[n] == '_') {
+ /*
+ * Accept a subset of printable ASCII
+ * characters.
+ */
+ vib->vib_devid[pos++] = b[n];
+ } else {
+ /*
+ * Stop processing at the first sign of
+ * trouble.
+ */
+ break;
+ }
+ }
-static int
-vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
-{
- struct vioblk_softc *sc = (void *)arg;
- int ret;
-
- ret = vioblk_get_id(sc);
- if (ret != DDI_SUCCESS)
- return (ret);
-
- ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
- VIRTIO_BLK_ID_BYTES, sc->devid, devid);
- if (ret != DDI_SUCCESS) {
- dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
- return (ret);
+ vib->vib_devid_fetched = B_TRUE;
}
+ mutex_exit(&vib->vib_mutex);
- dev_debug(sc->sc_dev, CE_NOTE,
- "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
- sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
- sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
- sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
- sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
- sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
-
- return (0);
-}
-
-static void
-vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
- uint32_t features)
-{
- char buf[512];
- char *bufp = buf;
- char *bufend = buf + sizeof (buf);
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, prefix);
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += virtio_show_features(features, bufp, bufend - bufp);
-
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
-
- if (features & VIRTIO_BLK_F_BARRIER)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
- if (features & VIRTIO_BLK_F_SIZE_MAX)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
- if (features & VIRTIO_BLK_F_SEG_MAX)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
- if (features & VIRTIO_BLK_F_GEOMETRY)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
- if (features & VIRTIO_BLK_F_RO)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "RO ");
- if (features & VIRTIO_BLK_F_BLK_SIZE)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
- if (features & VIRTIO_BLK_F_SCSI)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "SCSI ");
- if (features & VIRTIO_BLK_F_FLUSH)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
- if (features & VIRTIO_BLK_F_TOPOLOGY)
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, ")");
- *bufp = '\0';
-
- dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
+ virtio_dma_free(dma);
}
static int
-vioblk_dev_features(struct vioblk_softc *sc)
+vioblk_bd_devid(void *arg, dev_info_t *dip, ddi_devid_t *devid)
{
- uint32_t host_features;
-
- host_features = virtio_negotiate_features(&sc->sc_virtio,
- VIRTIO_BLK_F_RO |
- VIRTIO_BLK_F_GEOMETRY |
- VIRTIO_BLK_F_BLK_SIZE |
- VIRTIO_BLK_F_FLUSH |
- VIRTIO_BLK_F_TOPOLOGY |
- VIRTIO_BLK_F_SEG_MAX |
- VIRTIO_BLK_F_SIZE_MAX |
- VIRTIO_F_RING_INDIRECT_DESC);
-
- vioblk_show_features(sc, "Host features: ", host_features);
- vioblk_show_features(sc, "Negotiated features: ",
- sc->sc_virtio.sc_features);
-
- if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
- dev_err(sc->sc_dev, CE_NOTE,
- "Host does not support RING_INDIRECT_DESC, bye.");
+ vioblk_t *vib = arg;
+ size_t len;
+
+ if ((len = strlen(vib->vib_devid)) == 0) {
+ /*
+ * The device has no ID.
+ */
return (DDI_FAILURE);
}
- return (DDI_SUCCESS);
+ return (ddi_devid_init(dip, DEVID_ATA_SERIAL, len, vib->vib_devid,
+ devid));
}
-/* ARGSUSED */
-uint_t
-vioblk_int_handler(caddr_t arg1, caddr_t arg2)
+/*
+ * As the device completes processing of a request, it returns the chain for
+ * that request to our I/O queue. This routine is called in two contexts:
+ * - from the interrupt handler, in response to notification from the device
+ * - synchronously in line with request processing when panicking
+ */
+static uint_t
+vioblk_poll(vioblk_t *vib)
{
- struct virtio_softc *vsc = (void *)arg1;
- struct vioblk_softc *sc = __containerof(vsc,
- struct vioblk_softc, sc_virtio);
- struct vq_entry *ve;
- uint32_t len;
- int i = 0, error;
-
- while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
- struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
- bd_xfer_t *xfer = req->xfer;
- uint8_t status = req->status;
- uint32_t type = req->hdr.type;
-
- if (req->xfer == (void *)VIOBLK_POISON) {
- dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
- virtio_free_chain(ve);
- return (DDI_INTR_CLAIMED);
- }
+ virtio_chain_t *vic;
+ uint_t count = 0;
+ boolean_t wakeup = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&vib->vib_mutex));
- req->xfer = (void *) VIOBLK_POISON;
+ while ((vic = virtio_queue_poll(vib->vib_vq)) != NULL) {
+ vioblk_req_t *vbr = virtio_chain_data(vic);
+ uint8_t status;
- /* Note: blkdev tears down the payload mapping for us. */
- virtio_free_chain(ve);
+ virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORCPU);
+
+ bcopy(virtio_dma_va(vbr->vbr_dma,
+ sizeof (struct vioblk_req_hdr)), &status, sizeof (status));
- /* returning payload back to blkdev */
switch (status) {
- case VIRTIO_BLK_S_OK:
- error = 0;
- break;
- case VIRTIO_BLK_S_IOERR:
- error = EIO;
- sc->sc_stats.io_errors++;
- break;
- case VIRTIO_BLK_S_UNSUPP:
- sc->sc_stats.unsupp_errors++;
- error = ENOTTY;
- break;
- default:
- sc->sc_stats.nxio_errors++;
- error = ENXIO;
- break;
+ case VIRTIO_BLK_S_OK:
+ vbr->vbr_error = 0;
+ break;
+ case VIRTIO_BLK_S_IOERR:
+ vbr->vbr_error = EIO;
+ vib->vib_stats->vbs_io_errors.value.ui64++;
+ break;
+ case VIRTIO_BLK_S_UNSUPP:
+ vbr->vbr_error = ENOTTY;
+ vib->vib_stats->vbs_unsupp_errors.value.ui64++;
+ break;
+ default:
+ vbr->vbr_error = ENXIO;
+ vib->vib_stats->vbs_nxio_errors.value.ui64++;
+ break;
+ }
+
+ count++;
+
+ if (vbr->vbr_status & VIOBLK_REQSTAT_POLLED) {
+ /*
+ * This request must not be freed as it is being held
+ * by a call to vioblk_common_submit().
+ */
+ VERIFY(!(vbr->vbr_status &
+ VIOBLK_REQSTAT_POLL_COMPLETE));
+ vbr->vbr_status |= VIOBLK_REQSTAT_POLL_COMPLETE;
+ wakeup = B_TRUE;
+ continue;
}
- if (type == VIRTIO_BLK_T_GET_ID) {
- /* notify devid_init */
- mutex_enter(&sc->lock_devid);
- cv_broadcast(&sc->cv_devid);
- mutex_exit(&sc->lock_devid);
- } else
- bd_xfer_done(xfer, error);
+ vioblk_complete(vib, vbr);
- i++;
+ vioblk_req_free(vib, vbr);
+ virtio_chain_free(vic);
}
- /* update stats */
- if (sc->sc_stats.intr_queuemax < i)
- sc->sc_stats.intr_queuemax = i;
- sc->sc_stats.intr_total++;
+ if (wakeup) {
+ /*
+ * Signal anybody waiting for polled command completion.
+ */
+ cv_broadcast(&vib->vib_cv);
+ }
- return (DDI_INTR_CLAIMED);
+ return (count);
}
-/* ARGSUSED */
uint_t
-vioblk_config_handler(caddr_t arg1, caddr_t arg2)
-{
- return (DDI_INTR_CLAIMED);
-}
-
-static int
-vioblk_register_ints(struct vioblk_softc *sc)
+vioblk_int_handler(caddr_t arg0, caddr_t arg1)
{
- int ret;
+ vioblk_t *vib = (vioblk_t *)arg0;
+ uint_t count;
- struct virtio_int_handler vioblk_conf_h = {
- vioblk_config_handler
- };
-
- struct virtio_int_handler vioblk_vq_h[] = {
- { vioblk_int_handler },
- { NULL },
- };
+ mutex_enter(&vib->vib_mutex);
+ if ((count = vioblk_poll(vib)) >
+ vib->vib_stats->vbs_intr_queuemax.value.ui32) {
+ vib->vib_stats->vbs_intr_queuemax.value.ui32 = count;
+ }
- ret = virtio_register_ints(&sc->sc_virtio,
- &vioblk_conf_h, vioblk_vq_h);
+ vib->vib_stats->vbs_intr_total.value.ui64++;
+ mutex_exit(&vib->vib_mutex);
- return (ret);
+ return (DDI_INTR_CLAIMED);
}
static void
-vioblk_free_reqs(struct vioblk_softc *sc)
+vioblk_free_reqs(vioblk_t *vib)
{
- int i, qsize;
+ VERIFY3U(vib->vib_nreqs_alloc, ==, 0);
- qsize = sc->sc_vq->vq_num;
+ for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
+ struct vioblk_req *vbr = &vib->vib_reqs_mem[i];
- for (i = 0; i < qsize; i++) {
- struct vioblk_req *req = &sc->sc_reqs[i];
+ VERIFY(list_link_active(&vbr->vbr_link));
+ list_remove(&vib->vib_reqs, vbr);
- if (req->ndmac)
- (void) ddi_dma_unbind_handle(req->dmah);
+ VERIFY0(vbr->vbr_status);
- if (req->dmah)
- ddi_dma_free_handle(&req->dmah);
+ if (vbr->vbr_dma != NULL) {
+ virtio_dma_free(vbr->vbr_dma);
+ vbr->vbr_dma = NULL;
+ }
}
+ VERIFY(list_is_empty(&vib->vib_reqs));
- kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
+ if (vib->vib_reqs_mem != NULL) {
+ kmem_free(vib->vib_reqs_mem,
+ sizeof (struct vioblk_req) * vib->vib_reqs_capacity);
+ vib->vib_reqs_mem = NULL;
+ vib->vib_reqs_capacity = 0;
+ }
}
static int
-vioblk_alloc_reqs(struct vioblk_softc *sc)
+vioblk_alloc_reqs(vioblk_t *vib)
{
- int i, qsize;
- int ret;
-
- qsize = sc->sc_vq->vq_num;
-
- sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
-
- for (i = 0; i < qsize; i++) {
- struct vioblk_req *req = &sc->sc_reqs[i];
-
- ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
- DDI_DMA_SLEEP, NULL, &req->dmah);
- if (ret != DDI_SUCCESS) {
-
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate dma handle for req "
- "buffer %d", i);
- goto exit;
- }
+ vib->vib_reqs_capacity = MIN(virtio_queue_size(vib->vib_vq),
+ VIRTIO_BLK_REQ_BUFS);
+ vib->vib_reqs_mem = kmem_zalloc(
+ sizeof (struct vioblk_req) * vib->vib_reqs_capacity, KM_SLEEP);
+ vib->vib_nreqs_alloc = 0;
+
+ for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
+ list_insert_tail(&vib->vib_reqs, &vib->vib_reqs_mem[i]);
+ }
- ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
- (caddr_t)&req->hdr,
+ for (vioblk_req_t *vbr = list_head(&vib->vib_reqs); vbr != NULL;
+ vbr = list_next(&vib->vib_reqs, vbr)) {
+ if ((vbr->vbr_dma = virtio_dma_alloc(vib->vib_virtio,
sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
- DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
- NULL, &req->dmac, &req->ndmac);
- if (ret != DDI_DMA_MAPPED) {
- dev_err(sc->sc_dev, CE_WARN,
- "Can't bind req buffer %d", i);
- goto exit;
+ &vioblk_dma_attr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
+ KM_SLEEP)) == NULL) {
+ goto fail;
}
}
return (0);
-exit:
- vioblk_free_reqs(sc);
+fail:
+ vioblk_free_reqs(vib);
return (ENOMEM);
}
-
-static int
-vioblk_ksupdate(kstat_t *ksp, int rw)
-{
- struct vioblk_softc *sc = ksp->ks_private;
-
- if (rw == KSTAT_WRITE)
- return (EACCES);
-
- sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
- sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
- sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
- sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
- sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
- sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
- sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
-
-
- return (0);
-}
-
static int
-vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
+vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- int ret = DDI_SUCCESS;
- int instance;
- struct vioblk_softc *sc;
- struct virtio_softc *vsc;
- struct vioblk_stats *ks_data;
+ int instance = ddi_get_instance(dip);
+ vioblk_t *vib;
+ virtio_t *vio;
+ boolean_t did_mutex = B_FALSE;
- instance = ddi_get_instance(devinfo);
-
- switch (cmd) {
- case DDI_ATTACH:
- break;
-
- case DDI_RESUME:
- case DDI_PM_RESUME:
- dev_err(devinfo, CE_WARN, "resume not supported yet");
+ if (cmd != DDI_ATTACH) {
return (DDI_FAILURE);
+ }
- default:
- dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
+ if ((vio = virtio_init(dip, VIRTIO_BLK_WANTED_FEATURES, B_TRUE)) ==
+ NULL) {
+ dev_err(dip, CE_WARN, "failed to start Virtio init");
return (DDI_FAILURE);
}
- sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
- ddi_set_driver_private(devinfo, sc);
-
- vsc = &sc->sc_virtio;
+ vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
+ vib->vib_dip = dip;
+ vib->vib_virtio = vio;
+ ddi_set_driver_private(dip, vib);
+ list_create(&vib->vib_reqs, sizeof (vioblk_req_t),
+ offsetof(vioblk_req_t, vbr_link));
- /* Duplicate for faster access / less typing */
- sc->sc_dev = devinfo;
- vsc->sc_dev = devinfo;
+ /*
+ * Determine how many scatter-gather entries we can use in a single
+ * request.
+ */
+ vib->vib_seg_max = VIRTIO_BLK_DEFAULT_MAX_SEG;
+ if (virtio_feature_present(vio, VIRTIO_BLK_F_SEG_MAX)) {
+ vib->vib_seg_max = virtio_dev_get32(vio,
+ VIRTIO_BLK_CONFIG_SEG_MAX);
- cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
- mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
+ if (vib->vib_seg_max == 0 || vib->vib_seg_max == PCI_EINVAL32) {
+ /*
+ * We need to be able to use at least one data segment,
+ * so we'll assume that this device is just poorly
+ * implemented and try for one.
+ */
+ vib->vib_seg_max = 1;
+ }
+ }
/*
- * Initialize interrupt kstat. This should not normally fail, since
- * we don't use a persistent stat. We do it this way to avoid having
- * to test for it at run time on the hot path.
+ * When allocating the request queue, we include two additional
+ * descriptors (beyond those required for request data) to account for
+ * the header and the status byte.
*/
- sc->sc_intrstat = kstat_create("vioblk", instance,
- "intrs", "controller", KSTAT_TYPE_NAMED,
+ if ((vib->vib_vq = virtio_queue_alloc(vio, VIRTIO_BLK_VIRTQ_IO, "io",
+ vioblk_int_handler, vib, B_FALSE, vib->vib_seg_max + 2)) == NULL) {
+ goto fail;
+ }
+
+ if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "failed to complete Virtio init");
+ goto fail;
+ }
+
+ cv_init(&vib->vib_cv, NULL, CV_DRIVER, NULL);
+ mutex_init(&vib->vib_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
+ did_mutex = B_TRUE;
+
+ if ((vib->vib_kstat = kstat_create("vioblk", instance,
+ "statistics", "controller", KSTAT_TYPE_NAMED,
sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
- KSTAT_FLAG_PERSISTENT);
- if (sc->sc_intrstat == NULL) {
- dev_err(devinfo, CE_WARN, "kstat_create failed");
- goto exit_intrstat;
+ KSTAT_FLAG_PERSISTENT)) == NULL) {
+ dev_err(dip, CE_WARN, "kstat_create failed");
+ goto fail;
}
- ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
- kstat_named_init(&ks_data->sts_rw_outofmemory,
+ vib->vib_stats = (vioblk_stats_t *)vib->vib_kstat->ks_data;
+ kstat_named_init(&vib->vib_stats->vbs_rw_outofmemory,
"total_rw_outofmemory", KSTAT_DATA_UINT64);
- kstat_named_init(&ks_data->sts_rw_badoffset,
+ kstat_named_init(&vib->vib_stats->vbs_rw_badoffset,
"total_rw_badoffset", KSTAT_DATA_UINT64);
- kstat_named_init(&ks_data->sts_intr_total,
+ kstat_named_init(&vib->vib_stats->vbs_intr_total,
"total_intr", KSTAT_DATA_UINT64);
- kstat_named_init(&ks_data->sts_io_errors,
- "total_io_errors", KSTAT_DATA_UINT32);
- kstat_named_init(&ks_data->sts_unsupp_errors,
- "total_unsupp_errors", KSTAT_DATA_UINT32);
- kstat_named_init(&ks_data->sts_nxio_errors,
- "total_nxio_errors", KSTAT_DATA_UINT32);
- kstat_named_init(&ks_data->sts_rw_cacheflush,
+ kstat_named_init(&vib->vib_stats->vbs_io_errors,
+ "total_io_errors", KSTAT_DATA_UINT64);
+ kstat_named_init(&vib->vib_stats->vbs_unsupp_errors,
+ "total_unsupp_errors", KSTAT_DATA_UINT64);
+ kstat_named_init(&vib->vib_stats->vbs_nxio_errors,
+ "total_nxio_errors", KSTAT_DATA_UINT64);
+ kstat_named_init(&vib->vib_stats->vbs_rw_cacheflush,
"total_rw_cacheflush", KSTAT_DATA_UINT64);
- kstat_named_init(&ks_data->sts_rw_cookiesmax,
+ kstat_named_init(&vib->vib_stats->vbs_rw_cookiesmax,
"max_rw_cookies", KSTAT_DATA_UINT32);
- kstat_named_init(&ks_data->sts_intr_queuemax,
+ kstat_named_init(&vib->vib_stats->vbs_intr_queuemax,
"max_intr_queue", KSTAT_DATA_UINT32);
- sc->ks_data = ks_data;
- sc->sc_intrstat->ks_private = sc;
- sc->sc_intrstat->ks_update = vioblk_ksupdate;
- kstat_install(sc->sc_intrstat);
-
- /* map BAR0 */
- ret = ddi_regs_map_setup(devinfo, 1,
- (caddr_t *)&sc->sc_virtio.sc_io_addr,
- 0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
- if (ret != DDI_SUCCESS) {
- dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
- goto exit_map;
+ kstat_install(vib->vib_kstat);
+
+ vib->vib_readonly = virtio_feature_present(vio, VIRTIO_BLK_F_RO);
+ if ((vib->vib_nblks = virtio_dev_get64(vio,
+ VIRTIO_BLK_CONFIG_CAPACITY)) == UINT64_MAX) {
+ dev_err(dip, CE_WARN, "invalid capacity");
+ goto fail;
}
- virtio_device_reset(&sc->sc_virtio);
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+ /*
+ * Determine the optimal logical block size recommended by the device.
+ * This size is advisory; the protocol always deals in 512 byte blocks.
+ */
+ vib->vib_blk_size = DEV_BSIZE;
+ if (virtio_feature_present(vio, VIRTIO_BLK_F_BLK_SIZE)) {
+ uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_BLK_SIZE);
- if (vioblk_register_ints(sc)) {
- dev_err(devinfo, CE_WARN, "Unable to add interrupt");
- goto exit_int;
+ if (v != 0 && v != PCI_EINVAL32) {
+ vib->vib_blk_size = v;
+ }
}
- ret = vioblk_dev_features(sc);
- if (ret)
- goto exit_features;
+ /*
+ * The device may also provide an advisory physical block size.
+ */
+ vib->vib_pblk_size = vib->vib_blk_size;
+ if (virtio_feature_present(vio, VIRTIO_BLK_F_TOPOLOGY)) {
+ uint8_t v = virtio_dev_get8(vio, VIRTIO_BLK_CONFIG_TOPO_PBEXP);
- if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
- sc->sc_readonly = B_TRUE;
- else
- sc->sc_readonly = B_FALSE;
+ if (v != PCI_EINVAL8) {
+ vib->vib_pblk_size <<= v;
+ }
+ }
- sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
- VIRTIO_BLK_CONFIG_CAPACITY);
- sc->sc_nblks = sc->sc_capacity;
+ /*
+ * The maximum size for a cookie in a request.
+ */
+ vib->vib_seg_size_max = VIRTIO_BLK_DEFAULT_MAX_SIZE;
+ if (virtio_feature_present(vio, VIRTIO_BLK_F_SIZE_MAX)) {
+ uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_SIZE_MAX);
- sc->sc_blk_size = DEV_BSIZE;
- if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
- sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
- VIRTIO_BLK_CONFIG_BLK_SIZE);
+ if (v != 0 && v != PCI_EINVAL32) {
+ vib->vib_seg_size_max = v;
+ }
}
- sc->sc_pblk_size = sc->sc_blk_size;
- if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_TOPOLOGY) {
- sc->sc_pblk_size <<= virtio_read_device_config_1(&sc->sc_virtio,
- VIRTIO_BLK_CONFIG_TOPO_PBEXP);
+ /*
+ * Set up the DMA attributes for blkdev to use for request data. The
+ * specification is not extremely clear about whether DMA-related
+ * parameters include or exclude the header and status descriptors.
+ * For now, we assume they cover only the request data and not the
+ * headers.
+ */
+ vib->vib_bd_dma_attr = vioblk_dma_attr;
+ vib->vib_bd_dma_attr.dma_attr_sgllen = vib->vib_seg_max;
+ vib->vib_bd_dma_attr.dma_attr_count_max = vib->vib_seg_size_max;
+ vib->vib_bd_dma_attr.dma_attr_maxxfer = vib->vib_seg_max *
+ vib->vib_seg_size_max;
+
+ if (vioblk_alloc_reqs(vib) != 0) {
+ goto fail;
}
- /* Flushing is not supported. */
- if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
- vioblk_ops.o_sync_cache = NULL;
+ /*
+ * The blkdev framework does not provide a way to specify that the
+ * device does not support write cache flushing, except by omitting the
+ * "o_sync_cache" member from the ops vector. As "bd_alloc_handle()"
+ * makes a copy of the ops vector, we can safely assemble one on the
+ * stack based on negotiated features.
+ */
+ bd_ops_t vioblk_bd_ops = {
+ .o_version = BD_OPS_VERSION_0,
+ .o_drive_info = vioblk_bd_driveinfo,
+ .o_media_info = vioblk_bd_mediainfo,
+ .o_devid_init = vioblk_bd_devid,
+ .o_sync_cache = vioblk_bd_flush,
+ .o_read = vioblk_bd_read,
+ .o_write = vioblk_bd_write,
+ };
+ if (!virtio_feature_present(vio, VIRTIO_BLK_F_FLUSH)) {
+ vioblk_bd_ops.o_sync_cache = NULL;
}
- sc->sc_seg_max = DEF_MAXINDIRECT;
- /* The max number of segments (cookies) in a request */
- if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
- sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
- VIRTIO_BLK_CONFIG_SEG_MAX);
-
- /* That's what Linux does. */
- if (!sc->sc_seg_max)
- sc->sc_seg_max = 1;
+ vib->vib_bd_h = bd_alloc_handle(vib, &vioblk_bd_ops,
+ &vib->vib_bd_dma_attr, KM_SLEEP);
- /*
- * SEG_MAX corresponds to the number of _data_
- * blocks in a request
- */
- sc->sc_seg_max += 2;
+ /*
+ * Enable interrupts now so that we can request the device identity.
+ */
+ if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
+ goto fail;
}
- /* 2 descriptors taken for header/status */
- vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
+ vioblk_get_id(vib);
- /* The maximum size for a cookie in a request. */
- sc->sc_seg_size_max = DEF_MAXSECTOR;
- if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
- sc->sc_seg_size_max = virtio_read_device_config_4(
- &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
+ if (bd_attach_handle(dip, vib->vib_bd_h) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "Failed to attach blkdev");
+ goto fail;
}
- /* The maximum request size */
- vioblk_bd_dma_attr.dma_attr_maxxfer =
- vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
-
- dev_debug(devinfo, CE_NOTE,
- "nblks=%" PRIu64 " blksize=%d (%d) num_seg=%d, "
- "seg_size=%d, maxxfer=%" PRIu64,
- sc->sc_nblks, sc->sc_blk_size, sc->sc_pblk_size,
- vioblk_bd_dma_attr.dma_attr_sgllen,
- sc->sc_seg_size_max,
- vioblk_bd_dma_attr.dma_attr_maxxfer);
-
+ return (DDI_SUCCESS);
- sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
- sc->sc_seg_max, "I/O request");
- if (sc->sc_vq == NULL) {
- goto exit_alloc1;
+fail:
+ if (vib->vib_bd_h != NULL) {
+ (void) bd_detach_handle(vib->vib_bd_h);
+ bd_free_handle(vib->vib_bd_h);
}
-
- ret = vioblk_alloc_reqs(sc);
- if (ret) {
- goto exit_alloc2;
+ if (vio != NULL) {
+ (void) virtio_fini(vio, B_TRUE);
}
-
- sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
- KM_SLEEP);
-
-
- virtio_set_status(&sc->sc_virtio,
- VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
- virtio_start_vq_intr(sc->sc_vq);
-
- ret = virtio_enable_ints(&sc->sc_virtio);
- if (ret)
- goto exit_enable_ints;
-
- ret = bd_attach_handle(devinfo, sc->bd_h);
- if (ret != DDI_SUCCESS) {
- dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
- goto exit_attach_bd;
+ if (did_mutex) {
+ mutex_destroy(&vib->vib_mutex);
+ cv_destroy(&vib->vib_cv);
}
-
- return (DDI_SUCCESS);
-
-exit_attach_bd:
- /*
- * There is no virtio_disable_ints(), it's done in virtio_release_ints.
- * If they ever get split, don't forget to add a call here.
- */
-exit_enable_ints:
- virtio_stop_vq_intr(sc->sc_vq);
- bd_free_handle(sc->bd_h);
- vioblk_free_reqs(sc);
-exit_alloc2:
- virtio_free_vq(sc->sc_vq);
-exit_alloc1:
-exit_features:
- virtio_release_ints(&sc->sc_virtio);
-exit_int:
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
- ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
-exit_map:
- kstat_delete(sc->sc_intrstat);
-exit_intrstat:
- mutex_destroy(&sc->lock_devid);
- cv_destroy(&sc->cv_devid);
- kmem_free(sc, sizeof (struct vioblk_softc));
+ if (vib->vib_kstat != NULL) {
+ kstat_delete(vib->vib_kstat);
+ }
+ vioblk_free_reqs(vib);
+ kmem_free(vib, sizeof (*vib));
return (DDI_FAILURE);
}
static int
-vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
+vioblk_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
- struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
+ vioblk_t *vib = ddi_get_driver_private(dip);
- switch (cmd) {
- case DDI_DETACH:
- break;
+ if (cmd != DDI_DETACH) {
+ return (DDI_FAILURE);
+ }
- case DDI_PM_SUSPEND:
- cmn_err(CE_WARN, "suspend not supported yet");
+ mutex_enter(&vib->vib_mutex);
+ if (vib->vib_nreqs_alloc > 0) {
+ /*
+ * Cannot detach while there are still outstanding requests.
+ */
+ mutex_exit(&vib->vib_mutex);
return (DDI_FAILURE);
+ }
- default:
- cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
+ if (bd_detach_handle(vib->vib_bd_h) != DDI_SUCCESS) {
+ mutex_exit(&vib->vib_mutex);
return (DDI_FAILURE);
}
- (void) bd_detach_handle(sc->bd_h);
- virtio_stop_vq_intr(sc->sc_vq);
- virtio_release_ints(&sc->sc_virtio);
- vioblk_free_reqs(sc);
- virtio_free_vq(sc->sc_vq);
- virtio_device_reset(&sc->sc_virtio);
- ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
- kstat_delete(sc->sc_intrstat);
- kmem_free(sc, sizeof (struct vioblk_softc));
+ /*
+ * Tear down the Virtio framework before freeing the rest of the
+ * resources. This will ensure the interrupt handlers are no longer
+ * running.
+ */
+ virtio_fini(vib->vib_virtio, B_FALSE);
+
+ vioblk_free_reqs(vib);
+ kstat_delete(vib->vib_kstat);
+
+ mutex_exit(&vib->vib_mutex);
+ mutex_destroy(&vib->vib_mutex);
+
+ kmem_free(vib, sizeof (*vib));
return (DDI_SUCCESS);
}
static int
-vioblk_quiesce(dev_info_t *devinfo)
+vioblk_quiesce(dev_info_t *dip)
{
- struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
+ vioblk_t *vib;
- virtio_stop_vq_intr(sc->sc_vq);
- virtio_device_reset(&sc->sc_virtio);
+ if ((vib = ddi_get_driver_private(dip)) == NULL) {
+ return (DDI_FAILURE);
+ }
- return (DDI_SUCCESS);
+ return (virtio_quiesce(vib->vib_virtio));
}
int
@@ -1080,7 +1030,7 @@ _init(void)
bd_mod_init(&vioblk_dev_ops);
- if ((rv = mod_install(&modlinkage)) != 0) {
+ if ((rv = mod_install(&vioblk_modlinkage)) != 0) {
bd_mod_fini(&vioblk_dev_ops);
}
@@ -1092,7 +1042,7 @@ _fini(void)
{
int rv;
- if ((rv = mod_remove(&modlinkage)) == 0) {
+ if ((rv = mod_remove(&vioblk_modlinkage)) == 0) {
bd_mod_fini(&vioblk_dev_ops);
}
@@ -1102,5 +1052,5 @@ _fini(void)
int
_info(struct modinfo *modinfop)
{
- return (mod_info(&modlinkage, modinfop));
+ return (mod_info(&vioblk_modlinkage, modinfop));
}
diff --git a/usr/src/uts/common/io/vioblk/vioblk.h b/usr/src/uts/common/io/vioblk/vioblk.h
new file mode 100644
index 0000000000..e08fc31e8f
--- /dev/null
+++ b/usr/src/uts/common/io/vioblk/vioblk.h
@@ -0,0 +1,212 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * VIRTIO BLOCK DRIVER
+ */
+
+#ifndef _VIOBLK_H
+#define _VIOBLK_H
+
+#include "virtio.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * VIRTIO BLOCK CONFIGURATION REGISTERS
+ *
+ * These are offsets into the device-specific configuration space available
+ * through the virtio_dev_*() family of functions.
+ */
+#define VIRTIO_BLK_CONFIG_CAPACITY 0x00 /* 64 R */
+#define VIRTIO_BLK_CONFIG_SIZE_MAX 0x08 /* 32 R */
+#define VIRTIO_BLK_CONFIG_SEG_MAX 0x0C /* 32 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_C 0x10 /* 16 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_H 0x12 /* 8 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_S 0x13 /* 8 R */
+#define VIRTIO_BLK_CONFIG_BLK_SIZE 0x14 /* 32 R */
+#define VIRTIO_BLK_CONFIG_TOPO_PBEXP 0x18 /* 8 R */
+#define VIRTIO_BLK_CONFIG_TOPO_ALIGN 0x19 /* 8 R */
+#define VIRTIO_BLK_CONFIG_TOPO_MIN_SZ 0x1A /* 16 R */
+#define VIRTIO_BLK_CONFIG_TOPO_OPT_SZ 0x1C /* 32 R */
+
+/*
+ * VIRTIO BLOCK VIRTQUEUES
+ *
+ * Virtio block devices have just one queue which is used to make the various
+ * supported I/O requests.
+ */
+#define VIRTIO_BLK_VIRTQ_IO 0
+
+/*
+ * VIRTIO BLOCK FEATURE BITS
+ */
+#define VIRTIO_BLK_F_BARRIER (1ULL << 0)
+#define VIRTIO_BLK_F_SIZE_MAX (1ULL << 1)
+#define VIRTIO_BLK_F_SEG_MAX (1ULL << 2)
+#define VIRTIO_BLK_F_GEOMETRY (1ULL << 4)
+#define VIRTIO_BLK_F_RO (1ULL << 5)
+#define VIRTIO_BLK_F_BLK_SIZE (1ULL << 6)
+#define VIRTIO_BLK_F_SCSI (1ULL << 7)
+#define VIRTIO_BLK_F_FLUSH (1ULL << 9)
+#define VIRTIO_BLK_F_TOPOLOGY (1ULL << 10)
+
+/*
+ * These features are supported by the driver and we will request them from the
+ * device.
+ */
+#define VIRTIO_BLK_WANTED_FEATURES (VIRTIO_BLK_F_RO | \
+ VIRTIO_BLK_F_BLK_SIZE | \
+ VIRTIO_BLK_F_FLUSH | \
+ VIRTIO_BLK_F_TOPOLOGY | \
+ VIRTIO_BLK_F_SEG_MAX | \
+ VIRTIO_BLK_F_SIZE_MAX)
+
+/*
+ * VIRTIO BLOCK REQUEST HEADER
+ *
+ * This structure appears at the start of each I/O request buffer. Note that
+ * neither the data payload nor the status byte appear in this structure as
+ * both are handled in separate descriptor entries.
+ */
+struct vioblk_req_hdr {
+ uint32_t vbh_type;
+ uint32_t vbh_ioprio;
+ uint64_t vbh_sector;
+} __packed;
+
+/*
+ * VIRTIO BLOCK REQUEST HEADER: COMMANDS (vbh_type)
+ *
+ * Each of these is a command type, except for BARRIER which is logically
+ * OR-ed with one of the other types.
+ */
+#define VIRTIO_BLK_T_IN 0
+#define VIRTIO_BLK_T_OUT 1
+#define VIRTIO_BLK_T_SCSI_CMD 2
+#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
+#define VIRTIO_BLK_T_FLUSH 4
+#define VIRTIO_BLK_T_FLUSH_OUT 5
+#define VIRTIO_BLK_T_GET_ID 8
+#define VIRTIO_BLK_T_BARRIER 0x80000000
+
+/*
+ * The GET_ID command type does not appear in the specification, but
+ * implementations in the wild use a 20 byte buffer into which the device will
+ * write an ASCII string. The string should not be assumed to be
+ * NUL-terminated.
+ */
+#define VIRTIO_BLK_ID_BYTES 20
+
+/*
+ * VIRTIO BLOCK REQUEST HEADER: STATUS CODES
+ *
+ * These are returned in the writeable status byte descriptor included at the
+ * end of each request passed to the device.
+ */
+#define VIRTIO_BLK_S_OK 0
+#define VIRTIO_BLK_S_IOERR 1
+#define VIRTIO_BLK_S_UNSUPP 2
+
+/*
+ * DRIVER PARAMETERS
+ */
+
+/*
+ * In the event that the device does not negotiate DMA parameters, we have to
+ * make a best guess.
+ */
+#define VIRTIO_BLK_DEFAULT_MAX_SEG 128
+#define VIRTIO_BLK_DEFAULT_MAX_SIZE 4096
+
+/*
+ * We allocate a fixed number of request buffers in advance and place them in a
+ * per-instance free list.
+ */
+#define VIRTIO_BLK_REQ_BUFS 256
+
+/*
+ * TYPE DEFINITIONS
+ */
+
+typedef enum vioblk_req_status {
+ VIOBLK_REQSTAT_ALLOCATED = (0x1 << 0),
+ VIOBLK_REQSTAT_INFLIGHT = (0x1 << 1),
+ VIOBLK_REQSTAT_COMPLETE = (0x1 << 2),
+ VIOBLK_REQSTAT_POLLED = (0x1 << 3),
+ VIOBLK_REQSTAT_POLL_COMPLETE = (0x1 << 4),
+} vioblk_req_status_t;
+
+typedef struct vioblk_req {
+ vioblk_req_status_t vbr_status;
+ uint64_t vbr_seqno;
+ int vbr_type;
+ int vbr_error;
+ virtio_dma_t *vbr_dma;
+ bd_xfer_t *vbr_xfer;
+ list_node_t vbr_link;
+} vioblk_req_t;
+
+typedef struct vioblk_stats {
+ struct kstat_named vbs_rw_outofmemory;
+ struct kstat_named vbs_rw_badoffset;
+ struct kstat_named vbs_rw_queuemax;
+ struct kstat_named vbs_rw_cookiesmax;
+ struct kstat_named vbs_rw_cacheflush;
+ struct kstat_named vbs_intr_queuemax;
+ struct kstat_named vbs_intr_total;
+ struct kstat_named vbs_io_errors;
+ struct kstat_named vbs_unsupp_errors;
+ struct kstat_named vbs_nxio_errors;
+} vioblk_stats_t;
+
+typedef struct vioblk {
+ dev_info_t *vib_dip;
+ virtio_t *vib_virtio;
+ virtio_queue_t *vib_vq;
+
+ kmutex_t vib_mutex;
+ kcondvar_t vib_cv;
+
+ bd_handle_t vib_bd_h;
+ ddi_dma_attr_t vib_bd_dma_attr;
+
+ list_t vib_reqs;
+ uint_t vib_nreqs_alloc;
+ uint_t vib_reqs_capacity;
+ vioblk_req_t *vib_reqs_mem;
+
+ kstat_t *vib_kstat;
+ vioblk_stats_t *vib_stats;
+
+ uint64_t vib_nblks;
+ boolean_t vib_readonly;
+ uint_t vib_blk_size;
+ uint_t vib_pblk_size;
+ uint_t vib_seg_max;
+ uint_t vib_seg_size_max;
+
+ boolean_t vib_devid_fetched;
+ char vib_devid[VIRTIO_BLK_ID_BYTES + 1];
+ uint8_t vib_rawid[VIRTIO_BLK_ID_BYTES];
+} vioblk_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIOBLK_H */
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c
index ec6684f040..201e84e11b 100644
--- a/usr/src/uts/common/io/vioif/vioif.c
+++ b/usr/src/uts/common/io/vioif/vioif.c
@@ -41,6 +41,10 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * VIRTIO NETWORK DRIVER
+ */
+
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h>
@@ -57,6 +61,7 @@
#include <sys/pci.h>
#include <sys/ethernet.h>
#include <sys/vlan.h>
+#include <sys/sysmacros.h>
#include <sys/dlpi.h>
#include <sys/taskq.h>
@@ -72,805 +77,625 @@
#include <sys/mac_provider.h>
#include <sys/mac_ether.h>
-#include "virtiovar.h"
-#include "virtioreg.h"
-
-/* Configuration registers */
-#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
-#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
-
-/* Feature bits */
-#define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
-#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
-#define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
-#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
-#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
-#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
-#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
-#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
-#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
-#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
-#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
-#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
-#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
-#define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
-
-#define VIRTIO_NET_FEATURE_BITS \
- "\020" \
- "\1CSUM" \
- "\2GUEST_CSUM" \
- "\6MAC" \
- "\7GSO" \
- "\10GUEST_TSO4" \
- "\11GUEST_TSO6" \
- "\12GUEST_ECN" \
- "\13GUEST_UFO" \
- "\14HOST_TSO4" \
- "\15HOST_TSO6" \
- "\16HOST_ECN" \
- "\17HOST_UFO" \
- "\20MRG_RXBUF" \
- "\21STATUS" \
- "\22CTRL_VQ" \
- "\23CTRL_RX" \
- "\24CTRL_VLAN" \
- "\25CTRL_RX_EXTRA"
-
-/* Status */
-#define VIRTIO_NET_S_LINK_UP 1
-
-#pragma pack(1)
-/* Packet header structure */
-struct virtio_net_hdr {
- uint8_t flags;
- uint8_t gso_type;
- uint16_t hdr_len;
- uint16_t gso_size;
- uint16_t csum_start;
- uint16_t csum_offset;
-};
-#pragma pack()
+#include "virtio.h"
+#include "vioif.h"
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
-#define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
-#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
-#define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
-#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
-#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
-
-/* Control virtqueue */
-#pragma pack(1)
-struct virtio_net_ctrl_cmd {
- uint8_t class;
- uint8_t command;
+static int vioif_quiesce(dev_info_t *);
+static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
+static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
+static boolean_t vioif_has_feature(vioif_t *, uint32_t);
+static void vioif_reclaim_restart(vioif_t *);
+static int vioif_m_stat(void *, uint_t, uint64_t *);
+static void vioif_m_stop(void *);
+static int vioif_m_start(void *);
+static int vioif_m_multicst(void *, boolean_t, const uint8_t *);
+static int vioif_m_setpromisc(void *, boolean_t);
+static int vioif_m_unicst(void *, const uint8_t *);
+static mblk_t *vioif_m_tx(void *, mblk_t *);
+static int vioif_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
+ const void *);
+static int vioif_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
+static void vioif_m_propinfo(void *, const char *, mac_prop_id_t,
+ mac_prop_info_handle_t);
+static boolean_t vioif_m_getcapab(void *, mac_capab_t, void *);
+static uint_t vioif_add_rx(vioif_t *);
+
+
+static struct cb_ops vioif_cb_ops = {
+ .cb_rev = CB_REV,
+ .cb_flag = D_MP | D_NEW,
+
+ .cb_open = nulldev,
+ .cb_close = nulldev,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = nodev,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_str = NULL,
+ .cb_aread = nodev,
+ .cb_awrite = nodev,
};
-#pragma pack()
-
-#define VIRTIO_NET_CTRL_RX 0
-#define VIRTIO_NET_CTRL_RX_PROMISC 0
-#define VIRTIO_NET_CTRL_RX_ALLMULTI 1
-#define VIRTIO_NET_CTRL_MAC 1
-#define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
+static struct dev_ops vioif_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
-#define VIRTIO_NET_CTRL_VLAN 2
-#define VIRTIO_NET_CTRL_VLAN_ADD 0
-#define VIRTIO_NET_CTRL_VLAN_DEL 1
+ .devo_attach = vioif_attach,
+ .devo_detach = vioif_detach,
+ .devo_quiesce = vioif_quiesce,
-#pragma pack(1)
-struct virtio_net_ctrl_status {
- uint8_t ack;
-};
+ .devo_cb_ops = &vioif_cb_ops,
-struct virtio_net_ctrl_rx {
- uint8_t onoff;
+ .devo_getinfo = NULL,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_reset = nodev,
+ .devo_bus_ops = NULL,
+ .devo_power = NULL,
};
-struct virtio_net_ctrl_mac_tbl {
- uint32_t nentries;
- uint8_t macs[][ETHERADDRL];
+static struct modldrv vioif_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "VIRTIO network driver",
+ .drv_dev_ops = &vioif_dev_ops
};
-struct virtio_net_ctrl_vlan {
- uint16_t id;
-};
-#pragma pack()
-
-static int vioif_quiesce(dev_info_t *);
-static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
-static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
-
-DDI_DEFINE_STREAM_OPS(vioif_ops,
- nulldev, /* identify */
- nulldev, /* probe */
- vioif_attach, /* attach */
- vioif_detach, /* detach */
- nodev, /* reset */
- NULL, /* cb_ops */
- D_MP, /* bus_ops */
- NULL, /* power */
- vioif_quiesce /* quiesce */);
-
-static char vioif_ident[] = "VirtIO ethernet driver";
-
-/* Standard Module linkage initialization for a Streams driver */
-extern struct mod_ops mod_driverops;
-
-static struct modldrv modldrv = {
- &mod_driverops, /* Type of module. This one is a driver */
- vioif_ident, /* short description */
- &vioif_ops /* driver specific ops */
+static struct modlinkage vioif_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &vioif_modldrv, NULL }
};
-static struct modlinkage modlinkage = {
- MODREV_1,
- {
- (void *)&modldrv,
- NULL,
- },
+static mac_callbacks_t vioif_mac_callbacks = {
+ .mc_getstat = vioif_m_stat,
+ .mc_start = vioif_m_start,
+ .mc_stop = vioif_m_stop,
+ .mc_setpromisc = vioif_m_setpromisc,
+ .mc_multicst = vioif_m_multicst,
+ .mc_unicst = vioif_m_unicst,
+ .mc_tx = vioif_m_tx,
+
+ .mc_callbacks = (MC_GETCAPAB | MC_SETPROP |
+ MC_GETPROP | MC_PROPINFO),
+ .mc_getcapab = vioif_m_getcapab,
+ .mc_setprop = vioif_m_setprop,
+ .mc_getprop = vioif_m_getprop,
+ .mc_propinfo = vioif_m_propinfo,
};
-/* Interval for the periodic TX reclaim */
-uint_t vioif_reclaim_ms = 200;
-
-ddi_device_acc_attr_t vioif_attr = {
- DDI_DEVICE_ATTR_V0,
- DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
- DDI_STORECACHING_OK_ACC,
- DDI_DEFAULT_ACC
+static const uchar_t vioif_broadcast[ETHERADDRL] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
};
/*
- * A mapping represents a binding for a single buffer that is contiguous in the
- * virtual address space.
+ * Interval for the periodic TX reclaim.
*/
-struct vioif_buf_mapping {
- caddr_t vbm_buf;
- ddi_dma_handle_t vbm_dmah;
- ddi_acc_handle_t vbm_acch;
- ddi_dma_cookie_t vbm_dmac;
- unsigned int vbm_ncookies;
-};
+uint_t vioif_reclaim_ms = 200;
/*
- * Rx buffers can be loaned upstream, so the code has
- * to allocate them dynamically.
+ * DMA attribute template for transmit and receive buffers. The SGL entry
+ * count will be modified before using the template. Note that these
+ * allocations are aligned so that VIOIF_HEADER_SKIP places the IP header in
+ * received frames at the correct offset for the networking stack.
*/
-struct vioif_rx_buf {
- struct vioif_softc *rb_sc;
- frtn_t rb_frtn;
-
- struct vioif_buf_mapping rb_mapping;
+ddi_dma_attr_t vioif_dma_attr_bufs = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = VIOIF_HEADER_ALIGN,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = 0,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
};
/*
- * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
- * used to hold the virtio_net_header. Small packets also get copied there, as
- * it's faster then mapping them. Bigger packets get mapped using the "external"
- * mapping array. An array is used, because a packet may consist of muptiple
- * fragments, so each fragment gets bound to an entry. According to my
- * observations, the number of fragments does not exceed 2, but just in case,
- * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
- * the dma handles are allocated lazily in the tx path.
+ * DMA attributes for mapping larger transmit buffers from the networking
+ * stack. The requirements are quite loose, but note that the SGL entry length
+ * field is 32-bit.
*/
-struct vioif_tx_buf {
- mblk_t *tb_mp;
-
- /* inline buffer */
- struct vioif_buf_mapping tb_inline_mapping;
-
- /* External buffers */
- struct vioif_buf_mapping *tb_external_mapping;
- unsigned int tb_external_num;
+ddi_dma_attr_t vioif_dma_attr_external = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = 1,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = VIOIF_MAX_SEGS - 1,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
};
-struct vioif_softc {
- dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
- struct virtio_softc sc_virtio;
-
- mac_handle_t sc_mac_handle;
- mac_register_t *sc_macp;
-
- struct virtqueue *sc_rx_vq;
- struct virtqueue *sc_tx_vq;
- struct virtqueue *sc_ctrl_vq;
-
- /* TX virtqueue management resources */
- kmutex_t sc_tx_lock;
- boolean_t sc_tx_corked;
- boolean_t sc_tx_drain;
- timeout_id_t sc_tx_reclaim_tid;
-
- /* Feature bits. */
- unsigned int sc_rx_csum:1;
- unsigned int sc_tx_csum:1;
- unsigned int sc_tx_tso4:1;
-
- /*
- * For debugging, it is useful to know whether the MAC address we
- * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
- * was otherwise generated or set from within the guest.
- */
- unsigned int sc_mac_from_host:1;
-
- int sc_mtu;
- uint8_t sc_mac[ETHERADDRL];
- /*
- * For rx buffers, we keep a pointer array, because the buffers
- * can be loaned upstream, and we have to repopulate the array with
- * new members.
- */
- struct vioif_rx_buf **sc_rxbufs;
-
- /*
- * For tx, we just allocate an array of buffers. The packet can
- * either be copied into the inline buffer, or the external mapping
- * could be used to map the packet
- */
- struct vioif_tx_buf *sc_txbufs;
-
- kstat_t *sc_intrstat;
- /*
- * We "loan" rx buffers upstream and reuse them after they are
- * freed. This lets us avoid allocations in the hot path.
- */
- kmem_cache_t *sc_rxbuf_cache;
- ulong_t sc_rxloan;
-
- /* Copying small packets turns out to be faster then mapping them. */
- unsigned long sc_rxcopy_thresh;
- unsigned long sc_txcopy_thresh;
-
- /*
- * Statistics visible through mac:
- */
- uint64_t sc_ipackets;
- uint64_t sc_opackets;
- uint64_t sc_rbytes;
- uint64_t sc_obytes;
- uint64_t sc_brdcstxmt;
- uint64_t sc_brdcstrcv;
- uint64_t sc_multixmt;
- uint64_t sc_multircv;
- uint64_t sc_norecvbuf;
- uint64_t sc_notxbuf;
- uint64_t sc_ierrors;
- uint64_t sc_oerrors;
-
- /*
- * Internal debugging statistics:
- */
- uint64_t sc_rxfail_dma_handle;
- uint64_t sc_rxfail_dma_buffer;
- uint64_t sc_rxfail_dma_bind;
- uint64_t sc_rxfail_chain_undersize;
- uint64_t sc_rxfail_no_descriptors;
- uint64_t sc_txfail_dma_handle;
- uint64_t sc_txfail_dma_bind;
- uint64_t sc_txfail_indirect_limit;
-};
-
-#define ETHER_HEADER_LEN sizeof (struct ether_header)
-
-/* MTU + the ethernet header. */
-#define MAX_PAYLOAD 65535
-#define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
-#define DEFAULT_MTU ETHERMTU
/*
- * Yeah, we spend 8M per device. Turns out, there is no point
- * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
- * because vhost does not support them, and we expect to be used with
- * vhost in production environment.
+ * VIRTIO NET MAC PROPERTIES
*/
-/* The buffer keeps both the packet data and the virtio_net_header. */
-#define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
+#define VIOIF_MACPROP_TXCOPY_THRESH "_txcopy_thresh"
+#define VIOIF_MACPROP_TXCOPY_THRESH_DEF 300
+#define VIOIF_MACPROP_TXCOPY_THRESH_MAX 640
-/*
- * We win a bit on header alignment, but the host wins a lot
- * more on moving aligned buffers. Might need more thought.
- */
-#define VIOIF_IP_ALIGN 0
-
-/* Maximum number of indirect descriptors, somewhat arbitrary. */
-#define VIOIF_INDIRECT_MAX 128
-
-/*
- * We pre-allocate a reasonably large buffer to copy small packets
- * there. Bigger packets are mapped, packets with multiple
- * cookies are mapped as indirect buffers.
- */
-#define VIOIF_TX_INLINE_SIZE 2048
-
-/* Native queue size for all queues */
-#define VIOIF_RX_QLEN 0
-#define VIOIF_TX_QLEN 0
-#define VIOIF_CTRL_QLEN 0
-
-static uchar_t vioif_broadcast[ETHERADDRL] = {
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-};
-
-#define VIOIF_TX_THRESH_MAX 640
-#define VIOIF_RX_THRESH_MAX 640
-
-#define CACHE_NAME_SIZE 32
-
-static char vioif_txcopy_thresh[] =
- "vioif_txcopy_thresh";
-static char vioif_rxcopy_thresh[] =
- "vioif_rxcopy_thresh";
+#define VIOIF_MACPROP_RXCOPY_THRESH "_rxcopy_thresh"
+#define VIOIF_MACPROP_RXCOPY_THRESH_DEF 300
+#define VIOIF_MACPROP_RXCOPY_THRESH_MAX 640
static char *vioif_priv_props[] = {
- vioif_txcopy_thresh,
- vioif_rxcopy_thresh,
+ VIOIF_MACPROP_TXCOPY_THRESH,
+ VIOIF_MACPROP_RXCOPY_THRESH,
NULL
};
-static void vioif_reclaim_restart(struct vioif_softc *);
-/* Add up to ddi? */
-static ddi_dma_cookie_t *
-vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
+static vioif_txbuf_t *
+vioif_txbuf_alloc(vioif_t *vif)
{
- ddi_dma_impl_t *dmah_impl = (void *) dmah;
- ASSERT(dmah_impl->dmai_cookie);
- return (dmah_impl->dmai_cookie);
-}
+ vioif_txbuf_t *tb;
-static void
-vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
-{
- ddi_dma_impl_t *dmah_impl = (void *) dmah;
- dmah_impl->dmai_cookie = dmac;
-}
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
-static link_state_t
-vioif_link_state(struct vioif_softc *sc)
-{
- if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
- if (virtio_read_device_config_2(&sc->sc_virtio,
- VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
- return (LINK_STATE_UP);
- } else {
- return (LINK_STATE_DOWN);
- }
+ if ((tb = list_remove_head(&vif->vif_txbufs)) != NULL) {
+ vif->vif_ntxbufs_alloc++;
}
- return (LINK_STATE_UP);
+ return (tb);
}
-static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
- DMA_ATTR_V0, /* Version number */
- 0, /* low address */
- 0xFFFFFFFFFFFFFFFF, /* high address */
- 0xFFFFFFFF, /* counter register max */
- 1, /* page alignment */
- 1, /* burst sizes: 1 - 32 */
- 1, /* minimum transfer size */
- 0xFFFFFFFF, /* max transfer size */
- 0xFFFFFFFFFFFFFFF, /* address register max */
- 1, /* scatter-gather capacity */
- 1, /* device operates on bytes */
- 0, /* attr flag: set to 0 */
-};
-
-static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
- DMA_ATTR_V0, /* Version number */
- 0, /* low address */
- 0xFFFFFFFFFFFFFFFF, /* high address */
- 0xFFFFFFFF, /* counter register max */
- 1, /* page alignment */
- 1, /* burst sizes: 1 - 32 */
- 1, /* minimum transfer size */
- 0xFFFFFFFF, /* max transfer size */
- 0xFFFFFFFFFFFFFFF, /* address register max */
-
- /* One entry is used for the virtio_net_hdr on the tx path */
- VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
- 1, /* device operates on bytes */
- 0, /* attr flag: set to 0 */
-};
-
-static ddi_device_acc_attr_t vioif_bufattr = {
- DDI_DEVICE_ATTR_V0,
- DDI_NEVERSWAP_ACC,
- DDI_STORECACHING_OK_ACC,
- DDI_DEFAULT_ACC
-};
-
static void
-vioif_rx_free(caddr_t free_arg)
+vioif_txbuf_free(vioif_t *vif, vioif_txbuf_t *tb)
{
- struct vioif_rx_buf *buf = (void *) free_arg;
- struct vioif_softc *sc = buf->rb_sc;
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
+
+ VERIFY3U(vif->vif_ntxbufs_alloc, >, 0);
+ vif->vif_ntxbufs_alloc--;
- kmem_cache_free(sc->sc_rxbuf_cache, buf);
- atomic_dec_ulong(&sc->sc_rxloan);
+ virtio_chain_clear(tb->tb_chain);
+ list_insert_head(&vif->vif_txbufs, tb);
}
-static int
-vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
+static vioif_rxbuf_t *
+vioif_rxbuf_alloc(vioif_t *vif)
{
- _NOTE(ARGUNUSED(kmflags));
- struct vioif_softc *sc = user_arg;
- struct vioif_rx_buf *buf = buffer;
- size_t len;
+ vioif_rxbuf_t *rb;
- if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
- DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
- sc->sc_rxfail_dma_handle++;
- goto exit_handle;
- }
-
- if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
- VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
- &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
- NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
- sc->sc_rxfail_dma_buffer++;
- goto exit_alloc;
- }
- ASSERT(len >= VIOIF_RX_SIZE);
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
- buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
- DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
- &buf->rb_mapping.vbm_ncookies)) {
- sc->sc_rxfail_dma_bind++;
- goto exit_bind;
+ if ((rb = list_remove_head(&vif->vif_rxbufs)) != NULL) {
+ vif->vif_nrxbufs_alloc++;
}
- ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
+ return (rb);
+}
- buf->rb_sc = sc;
- buf->rb_frtn.free_arg = (void *) buf;
- buf->rb_frtn.free_func = vioif_rx_free;
+static void
+vioif_rxbuf_free(vioif_t *vif, vioif_rxbuf_t *rb)
+{
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- return (0);
-exit_bind:
- ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
-exit_alloc:
- ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
-exit_handle:
+ VERIFY3U(vif->vif_nrxbufs_alloc, >, 0);
+ vif->vif_nrxbufs_alloc--;
- return (ENOMEM);
+ virtio_chain_clear(rb->rb_chain);
+ list_insert_head(&vif->vif_rxbufs, rb);
}
static void
-vioif_rx_destruct(void *buffer, void *user_arg)
+vioif_rx_free_callback(caddr_t free_arg)
{
- _NOTE(ARGUNUSED(user_arg));
- struct vioif_rx_buf *buf = buffer;
+ vioif_rxbuf_t *rb = (vioif_rxbuf_t *)free_arg;
+ vioif_t *vif = rb->rb_vioif;
+
+ mutex_enter(&vif->vif_mutex);
+
+ /*
+ * Return this receive buffer to the free list.
+ */
+ vioif_rxbuf_free(vif, rb);
+
+ VERIFY3U(vif->vif_nrxbufs_onloan, >, 0);
+ vif->vif_nrxbufs_onloan--;
- ASSERT(buf->rb_mapping.vbm_acch);
- ASSERT(buf->rb_mapping.vbm_acch);
+ /*
+ * Attempt to replenish the receive queue with at least the buffer we
+ * just freed. There isn't a great way to deal with failure here,
+ * though because we'll only loan at most half of the buffers there
+ * should always be at least some available even if this fails.
+ */
+ (void) vioif_add_rx(vif);
- (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
- ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
- ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
+ mutex_exit(&vif->vif_mutex);
}
static void
-vioif_free_mems(struct vioif_softc *sc)
+vioif_free_bufs(vioif_t *vif)
{
- int i;
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
- struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
- int j;
+ VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
+ for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
+ vioif_txbuf_t *tb = &vif->vif_txbufs_mem[i];
- /* Tear down the internal mapping. */
+ /*
+ * Ensure that this txbuf is now in the free list:
+ */
+ VERIFY(list_link_active(&tb->tb_link));
+ list_remove(&vif->vif_txbufs, tb);
- ASSERT(buf->tb_inline_mapping.vbm_acch);
- ASSERT(buf->tb_inline_mapping.vbm_dmah);
+ /*
+ * We should not have an mblk chain at this point.
+ */
+ VERIFY3P(tb->tb_mp, ==, NULL);
- (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
- ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
- ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
+ if (tb->tb_dma != NULL) {
+ virtio_dma_free(tb->tb_dma);
+ tb->tb_dma = NULL;
+ }
- /* We should not see any in-flight buffers at this point. */
- ASSERT(!buf->tb_mp);
+ if (tb->tb_chain != NULL) {
+ virtio_chain_free(tb->tb_chain);
+ tb->tb_chain = NULL;
+ }
+
+ if (tb->tb_dmaext != NULL) {
+ for (uint_t j = 0; j < tb->tb_dmaext_capacity; j++) {
+ if (tb->tb_dmaext[j] != NULL) {
+ virtio_dma_free(
+ tb->tb_dmaext[j]);
+ tb->tb_dmaext[j] = NULL;
+ }
+ }
- /* Free all the dma hdnales we allocated lazily. */
- for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
- ddi_dma_free_handle(
- &buf->tb_external_mapping[j].vbm_dmah);
- /* Free the external mapping array. */
- kmem_free(buf->tb_external_mapping,
- sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
+ kmem_free(tb->tb_dmaext,
+ sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity);
+ tb->tb_dmaext = NULL;
+ tb->tb_dmaext_capacity = 0;
+ }
+ }
+ VERIFY(list_is_empty(&vif->vif_txbufs));
+ if (vif->vif_txbufs_mem != NULL) {
+ kmem_free(vif->vif_txbufs_mem,
+ sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity);
+ vif->vif_txbufs_mem = NULL;
+ vif->vif_txbufs_capacity = 0;
}
- kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
- sc->sc_tx_vq->vq_num);
+ VERIFY3U(vif->vif_nrxbufs_alloc, ==, 0);
+ for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
+ vioif_rxbuf_t *rb = &vif->vif_rxbufs_mem[i];
- for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
- struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
+ /*
+ * Ensure that this rxbuf is now in the free list:
+ */
+ VERIFY(list_link_active(&rb->rb_link));
+ list_remove(&vif->vif_rxbufs, rb);
- if (buf)
- kmem_cache_free(sc->sc_rxbuf_cache, buf);
+ if (rb->rb_dma != NULL) {
+ virtio_dma_free(rb->rb_dma);
+ rb->rb_dma = NULL;
+ }
+
+ if (rb->rb_chain != NULL) {
+ virtio_chain_free(rb->rb_chain);
+ rb->rb_chain = NULL;
+ }
+ }
+ VERIFY(list_is_empty(&vif->vif_rxbufs));
+ if (vif->vif_rxbufs_mem != NULL) {
+ kmem_free(vif->vif_rxbufs_mem,
+ sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity);
+ vif->vif_rxbufs_mem = NULL;
+ vif->vif_rxbufs_capacity = 0;
}
- kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
- sc->sc_rx_vq->vq_num);
}
static int
-vioif_alloc_mems(struct vioif_softc *sc)
+vioif_alloc_bufs(vioif_t *vif)
{
- int i, txqsize, rxqsize;
- size_t len;
- unsigned int nsegments;
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- txqsize = sc->sc_tx_vq->vq_num;
- rxqsize = sc->sc_rx_vq->vq_num;
+ /*
+ * Allocate one contiguous chunk of memory for the transmit and receive
+ * buffer tracking objects. If the ring is unusually small, we'll
+ * reduce our target buffer count accordingly.
+ */
+ vif->vif_txbufs_capacity = MIN(VIRTIO_NET_TX_BUFS,
+ virtio_queue_size(vif->vif_tx_vq));
+ vif->vif_txbufs_mem = kmem_zalloc(
+ sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity, KM_SLEEP);
+ list_create(&vif->vif_txbufs, sizeof (vioif_txbuf_t),
+ offsetof(vioif_txbuf_t, tb_link));
+
+ vif->vif_rxbufs_capacity = MIN(VIRTIO_NET_RX_BUFS,
+ virtio_queue_size(vif->vif_rx_vq));
+ vif->vif_rxbufs_mem = kmem_zalloc(
+ sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity, KM_SLEEP);
+ list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t),
+ offsetof(vioif_rxbuf_t, rb_link));
- sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
- KM_SLEEP);
- if (sc->sc_txbufs == NULL) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate the tx buffers array");
- goto exit_txalloc;
- }
+ /*
+ * Do not loan more than half of our allocated receive buffers into
+ * the networking stack.
+ */
+ vif->vif_nrxbufs_onloan_max = vif->vif_rxbufs_capacity / 2;
/*
- * We don't allocate the rx vioif_bufs, just the pointers, as
- * rx vioif_bufs can be loaned upstream, and we don't know the
- * total number we need.
+ * Put everything in the free list straight away in order to simplify
+ * the use of vioif_free_bufs() for cleanup on allocation failure.
*/
- sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
- KM_SLEEP);
- if (sc->sc_rxbufs == NULL) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate the rx buffers pointer array");
- goto exit_rxalloc;
+ for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
+ list_insert_tail(&vif->vif_txbufs, &vif->vif_txbufs_mem[i]);
+ }
+ for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
+ list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]);
}
- for (i = 0; i < txqsize; i++) {
- struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
-
- /* Allocate and bind an inline mapping. */
-
- if (ddi_dma_alloc_handle(sc->sc_dev,
- &vioif_inline_buf_dma_attr,
- DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
+ /*
+ * Start from the DMA attribute template common to both transmit and
+ * receive buffers. The SGL entry count will be modified for each
+ * buffer type.
+ */
+ ddi_dma_attr_t attr = vioif_dma_attr_bufs;
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate dma handle for tx buffer %d", i);
- goto exit_tx;
+ /*
+ * The transmit inline buffer is small (less than a page), so it's
+ * reasonable to request a single cookie.
+ */
+ attr.dma_attr_sgllen = 1;
+
+ for (vioif_txbuf_t *tb = list_head(&vif->vif_txbufs); tb != NULL;
+ tb = list_next(&vif->vif_txbufs, tb)) {
+ if ((tb->tb_dma = virtio_dma_alloc(vif->vif_virtio,
+ VIOIF_TX_INLINE_SIZE, &attr,
+ DDI_DMA_STREAMING | DDI_DMA_WRITE, KM_SLEEP)) == NULL) {
+ goto fail;
}
+ VERIFY3U(virtio_dma_ncookies(tb->tb_dma), ==, 1);
- if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
- VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
- DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
- &len, &buf->tb_inline_mapping.vbm_acch)) {
-
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate tx buffer %d", i);
- goto exit_tx;
+ if ((tb->tb_chain = virtio_chain_alloc(vif->vif_tx_vq,
+ KM_SLEEP)) == NULL) {
+ goto fail;
}
- ASSERT(len >= VIOIF_TX_INLINE_SIZE);
+ virtio_chain_data_set(tb->tb_chain, tb);
- if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
- NULL, buf->tb_inline_mapping.vbm_buf, len,
- DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
- &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
+ tb->tb_dmaext_capacity = VIOIF_MAX_SEGS - 1;
+ tb->tb_dmaext = kmem_zalloc(
+ sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity,
+ KM_SLEEP);
+ }
- dev_err(sc->sc_dev, CE_WARN,
- "Can't bind tx buffer %d", i);
- goto exit_tx;
+ /*
+ * The receive buffers are larger, and we can tolerate a large number
+ * of segments. Adjust the SGL entry count, setting aside one segment
+ * for the virtio net header.
+ */
+ attr.dma_attr_sgllen = VIOIF_MAX_SEGS - 1;
+
+ for (vioif_rxbuf_t *rb = list_head(&vif->vif_rxbufs); rb != NULL;
+ rb = list_next(&vif->vif_rxbufs, rb)) {
+ if ((rb->rb_dma = virtio_dma_alloc(vif->vif_virtio,
+ VIOIF_RX_BUF_SIZE, &attr, DDI_DMA_STREAMING | DDI_DMA_READ,
+ KM_SLEEP)) == NULL) {
+ goto fail;
}
- /* We asked for a single segment */
- ASSERT(nsegments == 1);
+ if ((rb->rb_chain = virtio_chain_alloc(vif->vif_rx_vq,
+ KM_SLEEP)) == NULL) {
+ goto fail;
+ }
+ virtio_chain_data_set(rb->rb_chain, rb);
/*
- * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
- * In reality, I don't expect more then 2-3 used, but who
- * knows.
+ * Ensure that the first cookie is sufficient to cover the
+ * header skip region plus one byte.
*/
- buf->tb_external_mapping = kmem_zalloc(
- sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
- KM_SLEEP);
+ VERIFY3U(virtio_dma_cookie_size(rb->rb_dma, 0), >=,
+ VIOIF_HEADER_SKIP + 1);
/*
- * The external mapping's dma handles are allocate lazily,
- * as we don't expect most of them to be used..
+ * Ensure that the frame data begins at a location with a
+ * correctly aligned IP header.
*/
- }
-
- return (0);
-
-exit_tx:
- for (i = 0; i < txqsize; i++) {
- struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
+ VERIFY3U((uintptr_t)virtio_dma_va(rb->rb_dma,
+ VIOIF_HEADER_SKIP) % 4, ==, 2);
- if (buf->tb_inline_mapping.vbm_dmah)
- (void) ddi_dma_unbind_handle(
- buf->tb_inline_mapping.vbm_dmah);
-
- if (buf->tb_inline_mapping.vbm_acch)
- ddi_dma_mem_free(
- &buf->tb_inline_mapping.vbm_acch);
-
- if (buf->tb_inline_mapping.vbm_dmah)
- ddi_dma_free_handle(
- &buf->tb_inline_mapping.vbm_dmah);
-
- if (buf->tb_external_mapping)
- kmem_free(buf->tb_external_mapping,
- sizeof (struct vioif_tx_buf) *
- VIOIF_INDIRECT_MAX - 1);
+ rb->rb_vioif = vif;
+ rb->rb_frtn.free_func = vioif_rx_free_callback;
+ rb->rb_frtn.free_arg = (caddr_t)rb;
}
- kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
+ return (0);
-exit_rxalloc:
- kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
-exit_txalloc:
+fail:
+ vioif_free_bufs(vif);
return (ENOMEM);
}
-/* ARGSUSED */
static int
-vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
+vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr)
{
- return (DDI_SUCCESS);
+ /*
+ * Even though we currently do not have support for programming
+ * multicast filters, or even enabling promiscuous mode, we return
+ * success here to avoid the networking stack falling back to link
+ * layer broadcast for multicast traffic. Some hypervisors already
+ * pass received multicast frames onto the guest, so at least on those
+ * systems multicast will work as expected anyway.
+ */
+ return (0);
}
-/* ARGSUSED */
static int
-vioif_promisc(void *arg, boolean_t on)
+vioif_m_setpromisc(void *arg, boolean_t on)
{
- return (DDI_SUCCESS);
+ /*
+ * Even though we cannot currently enable promiscuous mode, we return
+ * success here to allow tools like snoop(1M) to continue to function.
+ */
+ return (0);
}
-/* ARGSUSED */
static int
-vioif_unicst(void *arg, const uint8_t *macaddr)
+vioif_m_unicst(void *arg, const uint8_t *mac)
{
- return (DDI_FAILURE);
+ return (ENOTSUP);
}
static uint_t
-vioif_add_rx(struct vioif_softc *sc, int kmflag)
+vioif_add_rx(vioif_t *vif)
{
- uint_t num_added = 0;
- struct vq_entry *ve;
-
- while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
- struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
-
- if (buf == NULL) {
- /* First run, allocate the buffer. */
- buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
- sc->sc_rxbufs[ve->qe_index] = buf;
- }
-
- /* Still nothing? Bye. */
- if (buf == NULL) {
- sc->sc_norecvbuf++;
- vq_free_entry(sc->sc_rx_vq, ve);
- break;
- }
-
- ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
+ if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
/*
- * For an unknown reason, the virtio_net_hdr must be placed
- * as a separate virtio queue entry.
+ * If the NIC is not running, do not give the device any
+ * receive buffers.
*/
- virtio_ve_add_indirect_buf(ve,
- buf->rb_mapping.vbm_dmac.dmac_laddress,
- sizeof (struct virtio_net_hdr), B_FALSE);
+ return (0);
+ }
- /* Add the rest of the first cookie. */
- virtio_ve_add_indirect_buf(ve,
- buf->rb_mapping.vbm_dmac.dmac_laddress +
- sizeof (struct virtio_net_hdr),
- buf->rb_mapping.vbm_dmac.dmac_size -
- sizeof (struct virtio_net_hdr), B_FALSE);
+ uint_t num_added = 0;
+ vioif_rxbuf_t *rb;
+ while ((rb = vioif_rxbuf_alloc(vif)) != NULL) {
/*
- * If the buffer consists of a single cookie (unlikely for a
- * 64-k buffer), we are done. Otherwise, add the rest of the
- * cookies using indirect entries.
+ * For legacy devices, and those that have not negotiated
+ * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a
+ * separate descriptor entry to the rest of the buffer.
*/
- if (buf->rb_mapping.vbm_ncookies > 1) {
- ddi_dma_cookie_t *first_extra_dmac;
- ddi_dma_cookie_t dmac;
- first_extra_dmac =
- vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
-
- ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
- virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
- dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
- vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
- first_extra_dmac);
+ if (virtio_chain_append(rb->rb_chain,
+ virtio_dma_cookie_pa(rb->rb_dma, 0),
+ sizeof (struct virtio_net_hdr),
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ goto fail;
}
- virtio_push_chain(ve, B_FALSE);
- num_added++;
- }
+ for (uint_t n = 0; n < virtio_dma_ncookies(rb->rb_dma); n++) {
+ uint64_t pa = virtio_dma_cookie_pa(rb->rb_dma, n);
+ size_t sz = virtio_dma_cookie_size(rb->rb_dma, n);
- return (num_added);
-}
+ if (n == 0) {
+ pa += VIOIF_HEADER_SKIP;
+ VERIFY3U(sz, >, VIOIF_HEADER_SKIP);
+ sz -= VIOIF_HEADER_SKIP;
+ }
-static uint_t
-vioif_populate_rx(struct vioif_softc *sc, int kmflag)
-{
- uint_t num_added = vioif_add_rx(sc, kmflag);
+ if (virtio_chain_append(rb->rb_chain, pa, sz,
+ VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
+ goto fail;
+ }
+ }
+
+ virtio_chain_submit(rb->rb_chain, B_FALSE);
+ num_added++;
+ continue;
- if (num_added > 0)
- virtio_sync_vq(sc->sc_rx_vq);
+fail:
+ vioif_rxbuf_free(vif, rb);
+ vif->vif_norecvbuf++;
+ break;
+ }
+
+ if (num_added > 0) {
+ virtio_queue_flush(vif->vif_rx_vq);
+ }
return (num_added);
}
static uint_t
-vioif_process_rx(struct vioif_softc *sc)
+vioif_process_rx(vioif_t *vif)
{
- struct vq_entry *ve;
- struct vioif_rx_buf *buf;
+ virtio_chain_t *vic;
mblk_t *mphead = NULL, *lastmp = NULL, *mp;
- uint32_t len;
uint_t num_processed = 0;
- while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- buf = sc->sc_rxbufs[ve->qe_index];
- ASSERT(buf);
+ while ((vic = virtio_queue_poll(vif->vif_rx_vq)) != NULL) {
+ /*
+ * We have to use the chain received length here, as the device
+ * does not tell us the received frame length any other way.
+ * In a limited survey of hypervisors, virtio network devices
+ * appear to provide the right value here.
+ */
+ size_t len = virtio_chain_received_length(vic);
+ vioif_rxbuf_t *rb = virtio_chain_data(vic);
- if (len < sizeof (struct virtio_net_hdr)) {
- sc->sc_rxfail_chain_undersize++;
- sc->sc_ierrors++;
- virtio_free_chain(ve);
+ virtio_dma_sync(rb->rb_dma, DDI_DMA_SYNC_FORCPU);
+
+ /*
+ * If the NIC is not running, discard any received frames.
+ */
+ if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
+ vioif_rxbuf_free(vif, rb);
continue;
}
+ if (len < sizeof (struct virtio_net_hdr)) {
+ vif->vif_rxfail_chain_undersize++;
+ vif->vif_ierrors++;
+ vioif_rxbuf_free(vif, rb);
+ continue;
+ }
len -= sizeof (struct virtio_net_hdr);
+
/*
* We copy small packets that happen to fit into a single
* cookie and reuse the buffers. For bigger ones, we loan
* the buffers upstream.
*/
- if (len < sc->sc_rxcopy_thresh) {
- mp = allocb(len, 0);
- if (mp == NULL) {
- sc->sc_norecvbuf++;
- sc->sc_ierrors++;
-
- virtio_free_chain(ve);
- break;
+ if (len < vif->vif_rxcopy_thresh ||
+ vif->vif_nrxbufs_onloan >= vif->vif_nrxbufs_onloan_max) {
+ mutex_exit(&vif->vif_mutex);
+ if ((mp = allocb(len, 0)) == NULL) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_norecvbuf++;
+ vif->vif_ierrors++;
+
+ vioif_rxbuf_free(vif, rb);
+ continue;
}
- bcopy((char *)buf->rb_mapping.vbm_buf +
- sizeof (struct virtio_net_hdr), mp->b_rptr, len);
+ bcopy(virtio_dma_va(rb->rb_dma, VIOIF_HEADER_SKIP),
+ mp->b_rptr, len);
mp->b_wptr = mp->b_rptr + len;
+ /*
+ * As the packet contents was copied rather than
+ * loaned, we can return the receive buffer resources
+ * to the free list.
+ */
+ mutex_enter(&vif->vif_mutex);
+ vioif_rxbuf_free(vif, rb);
+
} else {
- mp = desballoc((unsigned char *)
- buf->rb_mapping.vbm_buf +
- sizeof (struct virtio_net_hdr) +
- VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
- if (mp == NULL) {
- sc->sc_norecvbuf++;
- sc->sc_ierrors++;
-
- virtio_free_chain(ve);
- break;
+ mutex_exit(&vif->vif_mutex);
+ if ((mp = desballoc(virtio_dma_va(rb->rb_dma,
+ VIOIF_HEADER_SKIP), len, 0,
+ &rb->rb_frtn)) == NULL) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_norecvbuf++;
+ vif->vif_ierrors++;
+
+ vioif_rxbuf_free(vif, rb);
+ continue;
}
mp->b_wptr = mp->b_rptr + len;
- atomic_inc_ulong(&sc->sc_rxloan);
- /*
- * Buffer loaned, we will have to allocate a new one
- * for this slot.
- */
- sc->sc_rxbufs[ve->qe_index] = NULL;
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_nrxbufs_onloan++;
}
/*
@@ -879,15 +704,13 @@ vioif_process_rx(struct vioif_softc *sc)
*/
if (mp->b_rptr[0] & 0x1) {
if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
- sc->sc_multircv++;
+ vif->vif_multircv++;
else
- sc->sc_brdcstrcv++;
+ vif->vif_brdcstrcv++;
}
- sc->sc_rbytes += len;
- sc->sc_ipackets++;
-
- virtio_free_chain(ve);
+ vif->vif_rbytes += len;
+ vif->vif_ipackets++;
if (lastmp == NULL) {
mphead = mp;
@@ -899,42 +722,56 @@ vioif_process_rx(struct vioif_softc *sc)
}
if (mphead != NULL) {
- mac_rx(sc->sc_mac_handle, NULL, mphead);
+ if (vif->vif_runstate == VIOIF_RUNSTATE_RUNNING) {
+ mutex_exit(&vif->vif_mutex);
+ mac_rx(vif->vif_mac_handle, NULL, mphead);
+ mutex_enter(&vif->vif_mutex);
+ } else {
+ /*
+ * The NIC was disabled part way through our execution,
+ * so free the messages we allocated.
+ */
+ freemsgchain(mphead);
+ }
}
return (num_processed);
}
static uint_t
-vioif_reclaim_used_tx(struct vioif_softc *sc)
+vioif_reclaim_used_tx(vioif_t *vif)
{
- struct vq_entry *ve;
- uint32_t len;
+ virtio_chain_t *vic;
uint_t num_reclaimed = 0;
- while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
- struct vioif_tx_buf *buf;
- mblk_t *mp;
+ VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
- /* We don't chain descriptors for tx, so don't expect any. */
- ASSERT(ve->qe_next == NULL);
+ while ((vic = virtio_queue_poll(vif->vif_tx_vq)) != NULL) {
+ vioif_txbuf_t *tb = virtio_chain_data(vic);
- buf = &sc->sc_txbufs[ve->qe_index];
- mp = buf->tb_mp;
- buf->tb_mp = NULL;
+ if (tb->tb_mp != NULL) {
+ /*
+ * Unbind the external mapping.
+ */
+ for (uint_t i = 0; i < tb->tb_dmaext_capacity; i++) {
+ if (tb->tb_dmaext[i] == NULL) {
+ continue;
+ }
- if (mp != NULL) {
- for (uint_t i = 0; i < buf->tb_external_num; i++) {
- (void) ddi_dma_unbind_handle(
- buf->tb_external_mapping[i].vbm_dmah);
+ virtio_dma_unbind(tb->tb_dmaext[i]);
}
+
+ freemsg(tb->tb_mp);
+ tb->tb_mp = NULL;
}
- virtio_free_chain(ve);
+ /*
+ * Return this transmit buffer to the free list for reuse.
+ */
+ mutex_enter(&vif->vif_mutex);
+ vioif_txbuf_free(vif, tb);
+ mutex_exit(&vif->vif_mutex);
- /* External mapping used, mp was not freed in vioif_send() */
- if (mp != NULL)
- freemsg(mp);
num_reclaimed++;
}
@@ -942,24 +779,24 @@ vioif_reclaim_used_tx(struct vioif_softc *sc)
if (num_reclaimed > 0) {
boolean_t do_update = B_FALSE;
- mutex_enter(&sc->sc_tx_lock);
- if (sc->sc_tx_corked) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_stat_tx_reclaim += num_reclaimed;
+ if (vif->vif_tx_corked) {
/*
* TX was corked on a lack of available descriptors.
* That dire state has passed so the TX interrupt can
* be disabled and MAC can be notified that
* transmission is possible again.
*/
- sc->sc_tx_corked = B_FALSE;
- virtio_stop_vq_intr(sc->sc_tx_vq);
+ vif->vif_tx_corked = B_FALSE;
+ virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
do_update = B_TRUE;
}
- mutex_exit(&sc->sc_tx_lock);
- /* Notify MAC outside the above lock */
if (do_update) {
- mac_tx_update(sc->sc_mac_handle);
+ mac_tx_update(vif->vif_mac_handle);
}
+ mutex_exit(&vif->vif_mutex);
}
return (num_reclaimed);
@@ -968,208 +805,196 @@ vioif_reclaim_used_tx(struct vioif_softc *sc)
static void
vioif_reclaim_periodic(void *arg)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
uint_t num_reclaimed;
- num_reclaimed = vioif_reclaim_used_tx(sc);
+ num_reclaimed = vioif_reclaim_used_tx(vif);
- mutex_enter(&sc->sc_tx_lock);
- sc->sc_tx_reclaim_tid = 0;
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_tx_reclaim_tid = 0;
/*
* If used descriptors were reclaimed or TX descriptors appear to be
* outstanding, the ring is considered active and periodic reclamation
* is necessary for now.
*/
- if (num_reclaimed != 0 || vq_num_used(sc->sc_tx_vq) != 0) {
+ if (num_reclaimed != 0 || virtio_queue_nactive(vif->vif_tx_vq) != 0) {
/* Do not reschedule if the ring is being drained. */
- if (!sc->sc_tx_drain) {
- vioif_reclaim_restart(sc);
+ if (!vif->vif_tx_drain) {
+ vioif_reclaim_restart(vif);
}
}
- mutex_exit(&sc->sc_tx_lock);
+ mutex_exit(&vif->vif_mutex);
}
static void
-vioif_reclaim_restart(struct vioif_softc *sc)
+vioif_reclaim_restart(vioif_t *vif)
{
- ASSERT(MUTEX_HELD(&sc->sc_tx_lock));
- ASSERT(!sc->sc_tx_drain);
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
+ VERIFY(!vif->vif_tx_drain);
- if (sc->sc_tx_reclaim_tid == 0) {
- sc->sc_tx_reclaim_tid = timeout(vioif_reclaim_periodic, sc,
+ if (vif->vif_tx_reclaim_tid == 0) {
+ vif->vif_tx_reclaim_tid = timeout(vioif_reclaim_periodic, vif,
MSEC_TO_TICK_ROUNDUP(vioif_reclaim_ms));
}
}
static void
-vioif_tx_drain(struct vioif_softc *sc)
+vioif_tx_drain(vioif_t *vif)
{
- mutex_enter(&sc->sc_tx_lock);
- sc->sc_tx_drain = B_TRUE;
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
+ VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPING);
+
+ vif->vif_tx_drain = B_TRUE;
/* Put a stop to the periodic reclaim if it is running */
- if (sc->sc_tx_reclaim_tid != 0) {
- timeout_id_t tid = sc->sc_tx_reclaim_tid;
+ if (vif->vif_tx_reclaim_tid != 0) {
+ timeout_id_t tid = vif->vif_tx_reclaim_tid;
/*
- * With sc_tx_drain set, there is no risk that a racing
+ * With vif_tx_drain set, there is no risk that a racing
* vioif_reclaim_periodic() call will reschedule itself.
*
* Being part of the mc_stop hook also guarantees that
- * vioif_tx() will not be called to restart it.
+ * vioif_m_tx() will not be called to restart it.
*/
- sc->sc_tx_reclaim_tid = 0;
- mutex_exit(&sc->sc_tx_lock);
+ vif->vif_tx_reclaim_tid = 0;
+ mutex_exit(&vif->vif_mutex);
(void) untimeout(tid);
- mutex_enter(&sc->sc_tx_lock);
+ mutex_enter(&vif->vif_mutex);
}
- virtio_stop_vq_intr(sc->sc_tx_vq);
- mutex_exit(&sc->sc_tx_lock);
+ virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
/*
* Wait for all of the TX descriptors to be processed by the host so
* they can be reclaimed.
*/
- while (vq_num_used(sc->sc_tx_vq) != 0) {
- (void) vioif_reclaim_used_tx(sc);
+ while (vif->vif_ntxbufs_alloc > 0) {
+ mutex_exit(&vif->vif_mutex);
+ (void) vioif_reclaim_used_tx(vif);
delay(5);
+ mutex_enter(&vif->vif_mutex);
}
-
- VERIFY(!sc->sc_tx_corked);
- VERIFY3U(sc->sc_tx_reclaim_tid, ==, 0);
- VERIFY3U(vq_num_used(sc->sc_tx_vq), ==, 0);
+ VERIFY(!vif->vif_tx_corked);
+ VERIFY3U(vif->vif_tx_reclaim_tid, ==, 0);
+ VERIFY3U(virtio_queue_nactive(vif->vif_tx_vq), ==, 0);
}
-/* sc will be used to update stat counters. */
-/* ARGSUSED */
-static inline void
-vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
- size_t msg_size)
+static int
+vioif_tx_inline(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
{
- struct vioif_tx_buf *buf;
- buf = &sc->sc_txbufs[ve->qe_index];
-
- ASSERT(buf);
+ VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
- /* Frees mp */
- mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
- sizeof (struct virtio_net_hdr));
+ VERIFY3U(msg_size, <=, virtio_dma_size(tb->tb_dma) - VIOIF_HEADER_SKIP);
- virtio_ve_add_indirect_buf(ve,
- buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
- sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
-}
+ /*
+ * Copy the message into the inline buffer and then free the message.
+ */
+ mcopymsg(mp, virtio_dma_va(tb->tb_dma, VIOIF_HEADER_SKIP));
-static inline int
-vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
- int i)
-{
- int ret = DDI_SUCCESS;
-
- if (!buf->tb_external_mapping[i].vbm_dmah) {
- ret = ddi_dma_alloc_handle(sc->sc_dev,
- &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
- &buf->tb_external_mapping[i].vbm_dmah);
- if (ret != DDI_SUCCESS) {
- sc->sc_txfail_dma_handle++;
- }
+ if (virtio_chain_append(tb->tb_chain,
+ virtio_dma_cookie_pa(tb->tb_dma, 0) + VIOIF_HEADER_SKIP,
+ msg_size, VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
}
- return (ret);
+ return (DDI_SUCCESS);
}
-static inline int
-vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
- size_t msg_size)
+static int
+vioif_tx_external(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
{
- _NOTE(ARGUNUSED(msg_size));
+ VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
- struct vioif_tx_buf *buf;
- mblk_t *nmp;
- int i, j;
- int ret = DDI_SUCCESS;
+ mblk_t *nmp = mp;
+ tb->tb_ndmaext = 0;
- buf = &sc->sc_txbufs[ve->qe_index];
-
- ASSERT(buf);
-
- buf->tb_external_num = 0;
- i = 0;
- nmp = mp;
-
- while (nmp) {
+ while (nmp != NULL) {
size_t len;
- ddi_dma_cookie_t dmac;
- unsigned int ncookies;
- len = MBLKL(nmp);
- /*
- * For some reason, the network stack can
- * actually send us zero-length fragments.
- */
- if (len == 0) {
+ if ((len = MBLKL(nmp)) == 0) {
+ /*
+ * Skip any zero-length entries in the chain.
+ */
nmp = nmp->b_cont;
continue;
}
- ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
- if (ret != DDI_SUCCESS) {
- sc->sc_notxbuf++;
- sc->sc_oerrors++;
- goto exit_lazy_alloc;
- }
- ret = ddi_dma_addr_bind_handle(
- buf->tb_external_mapping[i].vbm_dmah, NULL,
- (caddr_t)nmp->b_rptr, len,
- DDI_DMA_WRITE | DDI_DMA_STREAMING,
- DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
-
- if (ret != DDI_SUCCESS) {
- sc->sc_txfail_dma_bind++;
- sc->sc_oerrors++;
- goto exit_bind;
+ if (tb->tb_ndmaext >= tb->tb_dmaext_capacity) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_txfail_indirect_limit++;
+ vif->vif_notxbuf++;
+ mutex_exit(&vif->vif_mutex);
+ goto fail;
}
- /* Check if we still fit into the indirect table. */
- if (virtio_ve_indirect_available(ve) < ncookies) {
- sc->sc_txfail_indirect_limit++;
- sc->sc_notxbuf++;
- sc->sc_oerrors++;
-
- ret = DDI_FAILURE;
- goto exit_limit;
+ if (tb->tb_dmaext[tb->tb_ndmaext] == NULL) {
+ /*
+ * Allocate a DMA handle for this slot.
+ */
+ if ((tb->tb_dmaext[tb->tb_ndmaext] =
+ virtio_dma_alloc_nomem(vif->vif_virtio,
+ &vioif_dma_attr_external, KM_SLEEP)) == NULL) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_notxbuf++;
+ mutex_exit(&vif->vif_mutex);
+ goto fail;
+ }
+ }
+ virtio_dma_t *extdma = tb->tb_dmaext[tb->tb_ndmaext++];
+
+ if (virtio_dma_bind(extdma, nmp->b_rptr, len,
+ DDI_DMA_WRITE | DDI_DMA_STREAMING, KM_SLEEP) !=
+ DDI_SUCCESS) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_txfail_dma_bind++;
+ mutex_exit(&vif->vif_mutex);
+ goto fail;
}
- virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
- dmac, ncookies, B_TRUE);
+ for (uint_t n = 0; n < virtio_dma_ncookies(extdma); n++) {
+ uint64_t pa = virtio_dma_cookie_pa(extdma, n);
+ size_t sz = virtio_dma_cookie_size(extdma, n);
+
+ if (virtio_chain_append(tb->tb_chain, pa, sz,
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_txfail_indirect_limit++;
+ vif->vif_notxbuf++;
+ mutex_exit(&vif->vif_mutex);
+ goto fail;
+ }
+ }
nmp = nmp->b_cont;
- i++;
}
- buf->tb_external_num = i;
- /* Save the mp to free it when the packet is sent. */
- buf->tb_mp = mp;
+ /*
+ * We need to keep the message around until we reclaim the buffer from
+ * the device before freeing it.
+ */
+ tb->tb_mp = mp;
return (DDI_SUCCESS);
-exit_limit:
-exit_bind:
-exit_lazy_alloc:
-
- for (j = 0; j < i; j++) {
- (void) ddi_dma_unbind_handle(
- buf->tb_external_mapping[j].vbm_dmah);
+fail:
+ for (uint_t n = 0; n < tb->tb_ndmaext; n++) {
+ if (tb->tb_dmaext[n] != NULL) {
+ virtio_dma_unbind(tb->tb_dmaext[n]);
+ }
}
+ tb->tb_ndmaext = 0;
- return (ret);
+ freemsg(mp);
+
+ return (DDI_FAILURE);
}
static boolean_t
-vioif_send(struct vioif_softc *sc, mblk_t *mp)
+vioif_send(vioif_t *vif, mblk_t *mp)
{
- struct vq_entry *ve;
- struct vioif_tx_buf *buf;
- struct virtio_net_hdr *net_header = NULL;
+ VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
+
+ vioif_txbuf_t *tb = NULL;
+ struct virtio_net_hdr *vnh = NULL;
size_t msg_size = 0;
uint32_t csum_start;
uint32_t csum_stuff;
@@ -1179,133 +1004,159 @@ vioif_send(struct vioif_softc *sc, mblk_t *mp)
mblk_t *nmp;
int ret;
boolean_t lso_required = B_FALSE;
+ struct ether_header *ether = (void *)mp->b_rptr;
for (nmp = mp; nmp; nmp = nmp->b_cont)
msg_size += MBLKL(nmp);
- if (sc->sc_tx_tso4) {
+ if (vif->vif_tx_tso4) {
mac_lso_get(mp, &lso_mss, &lso_flags);
- lso_required = (lso_flags & HW_LSO);
+ lso_required = (lso_flags & HW_LSO) != 0;
}
- ve = vq_alloc_entry(sc->sc_tx_vq);
-
- if (ve == NULL) {
- sc->sc_notxbuf++;
- /* Out of free descriptors - try later. */
- return (B_FALSE);
+ mutex_enter(&vif->vif_mutex);
+ if ((tb = vioif_txbuf_alloc(vif)) == NULL) {
+ vif->vif_notxbuf++;
+ goto fail;
}
- buf = &sc->sc_txbufs[ve->qe_index];
+ mutex_exit(&vif->vif_mutex);
- /* Use the inline buffer of the first entry for the virtio_net_hdr. */
- (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
- sizeof (struct virtio_net_hdr));
+ /*
+ * Use the inline buffer for the virtio net header. Zero the portion
+ * of our DMA allocation prior to the packet data.
+ */
+ vnh = virtio_dma_va(tb->tb_dma, 0);
+ bzero(vnh, VIOIF_HEADER_SKIP);
- net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
+ /*
+ * For legacy devices, and those that have not negotiated
+ * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a separate
+ * descriptor entry to the rest of the buffer.
+ */
+ if (virtio_chain_append(tb->tb_chain,
+ virtio_dma_cookie_pa(tb->tb_dma, 0), sizeof (struct virtio_net_hdr),
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_notxbuf++;
+ goto fail;
+ }
- mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
- NULL, &csum_flags);
+ mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, NULL, &csum_flags);
- /* They want us to do the TCP/UDP csum calculation. */
+ /*
+ * They want us to do the TCP/UDP csum calculation.
+ */
if (csum_flags & HCK_PARTIALCKSUM) {
- struct ether_header *eth_header;
int eth_hsize;
- /* Did we ask for it? */
- ASSERT(sc->sc_tx_csum);
+ /*
+ * Did we ask for it?
+ */
+ ASSERT(vif->vif_tx_csum);
- /* We only asked for partial csum packets. */
+ /*
+ * We only asked for partial csum packets.
+ */
ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
ASSERT(!(csum_flags & HCK_FULLCKSUM));
- eth_header = (void *) mp->b_rptr;
- if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
+ if (ether->ether_type == htons(ETHERTYPE_VLAN)) {
eth_hsize = sizeof (struct ether_vlan_header);
} else {
eth_hsize = sizeof (struct ether_header);
}
- net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- net_header->csum_start = eth_hsize + csum_start;
- net_header->csum_offset = csum_stuff - csum_start;
+
+ vnh->vnh_flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnh->vnh_csum_start = eth_hsize + csum_start;
+ vnh->vnh_csum_offset = csum_stuff - csum_start;
}
- /* setup LSO fields if required */
+ /*
+ * Setup LSO fields if required.
+ */
if (lso_required) {
- net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
- net_header->gso_size = (uint16_t)lso_mss;
+ vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ vnh->vnh_gso_size = (uint16_t)lso_mss;
}
- virtio_ve_add_indirect_buf(ve,
- buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
- sizeof (struct virtio_net_hdr), B_TRUE);
-
- /* meanwhile update the statistic */
- if (mp->b_rptr[0] & 0x1) {
- if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
- sc->sc_multixmt++;
- else
- sc->sc_brdcstxmt++;
+ /*
+ * The device does not maintain its own statistics about broadcast or
+ * multicast packets, so we have to check the destination address
+ * ourselves.
+ */
+ if ((ether->ether_dhost.ether_addr_octet[0] & 0x01) != 0) {
+ mutex_enter(&vif->vif_mutex);
+ if (ether_cmp(&ether->ether_dhost, vioif_broadcast) == 0) {
+ vif->vif_brdcstxmt++;
+ } else {
+ vif->vif_multixmt++;
+ }
+ mutex_exit(&vif->vif_mutex);
}
/*
- * We copy small packets into the inline buffer. The bigger ones
- * get mapped using the mapped buffer.
+ * For small packets, copy into the preallocated inline buffer rather
+ * than incur the overhead of mapping. Note that both of these
+ * functions ensure that "mp" is freed before returning.
*/
- if (msg_size < sc->sc_txcopy_thresh) {
- vioif_tx_inline(sc, ve, mp, msg_size);
+ if (msg_size < vif->vif_txcopy_thresh) {
+ ret = vioif_tx_inline(vif, tb, mp, msg_size);
} else {
- /* statistic gets updated by vioif_tx_external when fail */
- ret = vioif_tx_external(sc, ve, mp, msg_size);
- if (ret != DDI_SUCCESS)
- goto exit_tx_external;
+ ret = vioif_tx_external(vif, tb, mp, msg_size);
}
+ mp = NULL;
- virtio_push_chain(ve, B_TRUE);
-
- sc->sc_opackets++;
- sc->sc_obytes += msg_size;
+ mutex_enter(&vif->vif_mutex);
- return (B_TRUE);
+ if (ret != DDI_SUCCESS) {
+ goto fail;
+ }
-exit_tx_external:
+ vif->vif_opackets++;
+ vif->vif_obytes += msg_size;
+ mutex_exit(&vif->vif_mutex);
- vq_free_entry(sc->sc_tx_vq, ve);
- /*
- * vioif_tx_external can fail when the buffer does not fit into the
- * indirect descriptor table. Free the mp. I don't expect this ever
- * to happen.
- */
- freemsg(mp);
+ virtio_dma_sync(tb->tb_dma, DDI_DMA_SYNC_FORDEV);
+ virtio_chain_submit(tb->tb_chain, B_TRUE);
return (B_TRUE);
+
+fail:
+ vif->vif_oerrors++;
+ if (tb != NULL) {
+ vioif_txbuf_free(vif, tb);
+ }
+ mutex_exit(&vif->vif_mutex);
+
+ return (mp == NULL);
}
static mblk_t *
-vioif_tx(void *arg, mblk_t *mp)
+vioif_m_tx(void *arg, mblk_t *mp)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
mblk_t *nmp;
/*
* Prior to attempting to send any more frames, do a reclaim to pick up
* any descriptors which have been processed by the host.
*/
- if (vq_num_used(sc->sc_tx_vq) != 0) {
- (void) vioif_reclaim_used_tx(sc);
+ if (virtio_queue_nactive(vif->vif_tx_vq) != 0) {
+ (void) vioif_reclaim_used_tx(vif);
}
while (mp != NULL) {
nmp = mp->b_next;
mp->b_next = NULL;
- if (!vioif_send(sc, mp)) {
+ if (!vioif_send(vif, mp)) {
/*
* If there are no descriptors available, try to
* reclaim some, allowing a retry of the send if some
* are found.
*/
mp->b_next = nmp;
- if (vioif_reclaim_used_tx(sc) != 0) {
+ if (vioif_reclaim_used_tx(vif) != 0) {
continue;
}
@@ -1315,106 +1166,116 @@ vioif_tx(void *arg, mblk_t *mp)
* can begin again. For safety, make sure the periodic
* reclaim is running as well.
*/
- mutex_enter(&sc->sc_tx_lock);
- sc->sc_tx_corked = B_TRUE;
- virtio_start_vq_intr(sc->sc_tx_vq);
- vioif_reclaim_restart(sc);
- mutex_exit(&sc->sc_tx_lock);
+ mutex_enter(&vif->vif_mutex);
+ vif->vif_tx_corked = B_TRUE;
+ virtio_queue_no_interrupt(vif->vif_tx_vq, B_FALSE);
+ vioif_reclaim_restart(vif);
+ mutex_exit(&vif->vif_mutex);
return (mp);
}
mp = nmp;
}
/* Ensure the periodic reclaim has been started. */
- mutex_enter(&sc->sc_tx_lock);
- vioif_reclaim_restart(sc);
- mutex_exit(&sc->sc_tx_lock);
+ mutex_enter(&vif->vif_mutex);
+ vioif_reclaim_restart(vif);
+ mutex_exit(&vif->vif_mutex);
return (NULL);
}
static int
-vioif_start(void *arg)
+vioif_m_start(void *arg)
{
- struct vioif_softc *sc = arg;
- struct vq_entry *ve;
- uint32_t len;
+ vioif_t *vif = arg;
+
+ mutex_enter(&vif->vif_mutex);
+
+ VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPED);
+ vif->vif_runstate = VIOIF_RUNSTATE_RUNNING;
- mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
+ mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
- virtio_start_vq_intr(sc->sc_rx_vq);
+ virtio_queue_no_interrupt(vif->vif_rx_vq, B_FALSE);
/*
* Starting interrupts on the TX virtqueue is unnecessary at this time.
* Descriptor reclamation is handling during transmit, via a periodic
* timer, and when resources are tight, via the then-enabled interrupt.
*/
- sc->sc_tx_drain = B_FALSE;
+ vif->vif_tx_drain = B_FALSE;
/*
- * Clear any data that arrived early on the receive queue and populate
- * it with free buffers that the device can use moving forward.
+ * Add as many receive buffers as we can to the receive queue. If we
+ * cannot add any, it may be because we have stopped and started again
+ * and the descriptors are all in the queue already.
*/
- while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
- virtio_free_chain(ve);
- }
- (void) vioif_populate_rx(sc, KM_SLEEP);
+ (void) vioif_add_rx(vif);
+ mutex_exit(&vif->vif_mutex);
return (DDI_SUCCESS);
}
static void
-vioif_stop(void *arg)
+vioif_m_stop(void *arg)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
+
+ mutex_enter(&vif->vif_mutex);
+
+ VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_RUNNING);
+ vif->vif_runstate = VIOIF_RUNSTATE_STOPPING;
/* Ensure all TX descriptors have been processed and reclaimed */
- vioif_tx_drain(sc);
+ vioif_tx_drain(vif);
- virtio_stop_vq_intr(sc->sc_rx_vq);
+ virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
+
+ vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
+ mutex_exit(&vif->vif_mutex);
}
static int
-vioif_stat(void *arg, uint_t stat, uint64_t *val)
+vioif_m_stat(void *arg, uint_t stat, uint64_t *val)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
switch (stat) {
case MAC_STAT_IERRORS:
- *val = sc->sc_ierrors;
+ *val = vif->vif_ierrors;
break;
case MAC_STAT_OERRORS:
- *val = sc->sc_oerrors;
+ *val = vif->vif_oerrors;
break;
case MAC_STAT_MULTIRCV:
- *val = sc->sc_multircv;
+ *val = vif->vif_multircv;
break;
case MAC_STAT_BRDCSTRCV:
- *val = sc->sc_brdcstrcv;
+ *val = vif->vif_brdcstrcv;
break;
case MAC_STAT_MULTIXMT:
- *val = sc->sc_multixmt;
+ *val = vif->vif_multixmt;
break;
case MAC_STAT_BRDCSTXMT:
- *val = sc->sc_brdcstxmt;
+ *val = vif->vif_brdcstxmt;
break;
case MAC_STAT_IPACKETS:
- *val = sc->sc_ipackets;
+ *val = vif->vif_ipackets;
break;
case MAC_STAT_RBYTES:
- *val = sc->sc_rbytes;
+ *val = vif->vif_rbytes;
break;
case MAC_STAT_OPACKETS:
- *val = sc->sc_opackets;
+ *val = vif->vif_opackets;
break;
case MAC_STAT_OBYTES:
- *val = sc->sc_obytes;
+ *val = vif->vif_obytes;
break;
case MAC_STAT_NORCVBUF:
- *val = sc->sc_norecvbuf;
+ *val = vif->vif_norecvbuf;
break;
case MAC_STAT_NOXMTBUF:
- *val = sc->sc_notxbuf;
+ *val = vif->vif_notxbuf;
break;
case MAC_STAT_IFSPEED:
/* always 1 Gbit */
@@ -1433,651 +1294,490 @@ vioif_stat(void *arg, uint_t stat, uint64_t *val)
}
static int
-vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
+vioif_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, const void *pr_val)
{
- _NOTE(ARGUNUSED(pr_valsize));
-
- long result;
+ vioif_t *vif = arg;
- if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
+ switch (pr_num) {
+ case MAC_PROP_MTU: {
+ int r;
+ uint32_t mtu;
+ if (pr_valsize < sizeof (mtu)) {
+ return (EOVERFLOW);
+ }
+ bcopy(pr_val, &mtu, sizeof (mtu));
- if (pr_val == NULL)
+ if (mtu < ETHERMIN || mtu > vif->vif_mtu_max) {
return (EINVAL);
+ }
- (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ mutex_enter(&vif->vif_mutex);
+ if ((r = mac_maxsdu_update(vif->vif_mac_handle, mtu)) == 0) {
+ vif->vif_mtu = mtu;
+ }
+ mutex_exit(&vif->vif_mutex);
- if (result < 0 || result > VIOIF_TX_THRESH_MAX)
- return (EINVAL);
- sc->sc_txcopy_thresh = result;
+ return (r);
}
- if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
- if (pr_val == NULL)
- return (EINVAL);
-
- (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+ case MAC_PROP_PRIVATE: {
+ long max, result;
+ uint_t *resp;
+ char *endptr;
+
+ if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
+ max = VIOIF_MACPROP_TXCOPY_THRESH_MAX;
+ resp = &vif->vif_txcopy_thresh;
+ } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
+ max = VIOIF_MACPROP_RXCOPY_THRESH_MAX;
+ resp = &vif->vif_rxcopy_thresh;
+ } else {
+ return (ENOTSUP);
+ }
- if (result < 0 || result > VIOIF_RX_THRESH_MAX)
+ if (pr_val == NULL) {
return (EINVAL);
- sc->sc_rxcopy_thresh = result;
- }
- return (0);
-}
-
-static int
-vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
- uint_t pr_valsize, const void *pr_val)
-{
- struct vioif_softc *sc = arg;
- const uint32_t *new_mtu;
- int err;
-
- switch (pr_num) {
- case MAC_PROP_MTU:
- new_mtu = pr_val;
+ }
- if (*new_mtu > MAX_MTU) {
+ if (ddi_strtol(pr_val, &endptr, 10, &result) != 0 ||
+ *endptr != '\0' || result < 0 || result > max) {
return (EINVAL);
}
- err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
- if (err) {
- return (err);
- }
- break;
- case MAC_PROP_PRIVATE:
- err = vioif_set_prop_private(sc, pr_name,
- pr_valsize, pr_val);
- if (err)
- return (err);
- break;
+ mutex_enter(&vif->vif_mutex);
+ *resp = result;
+ mutex_exit(&vif->vif_mutex);
+
+ return (0);
+ }
+
default:
return (ENOTSUP);
}
-
- return (0);
}
static int
-vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
+vioif_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, void *pr_val)
{
- int err = ENOTSUP;
- int value;
+ vioif_t *vif = arg;
- if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
+ switch (pr_num) {
+ case MAC_PROP_PRIVATE: {
+ uint_t value;
- value = sc->sc_txcopy_thresh;
- err = 0;
- goto done;
- }
- if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
+ if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
+ value = vif->vif_txcopy_thresh;
+ } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
+ value = vif->vif_rxcopy_thresh;
+ } else {
+ return (ENOTSUP);
+ }
- value = sc->sc_rxcopy_thresh;
- err = 0;
- goto done;
- }
-done:
- if (err == 0) {
- (void) snprintf(pr_val, pr_valsize, "%d", value);
- }
- return (err);
-}
+ if (snprintf(pr_val, pr_valsize, "%u", value) >= pr_valsize) {
+ return (EOVERFLOW);
+ }
-static int
-vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
- uint_t pr_valsize, void *pr_val)
-{
- struct vioif_softc *sc = arg;
- int err = ENOTSUP;
+ return (0);
+ }
- switch (pr_num) {
- case MAC_PROP_PRIVATE:
- err = vioif_get_prop_private(sc, pr_name,
- pr_valsize, pr_val);
- break;
default:
- break;
+ return (ENOTSUP);
}
- return (err);
}
static void
-vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+vioif_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_handle_t prh)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
char valstr[64];
int value;
switch (pr_num) {
case MAC_PROP_MTU:
- mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
- break;
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ mac_prop_info_set_range_uint32(prh, ETHERMIN, vif->vif_mtu_max);
+ return;
case MAC_PROP_PRIVATE:
- bzero(valstr, sizeof (valstr));
- if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
- value = sc->sc_txcopy_thresh;
- } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
- value = sc->sc_rxcopy_thresh;
+ if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
+ value = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
+ } else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
+ value = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
} else {
+ /*
+ * We do not recognise this private property name.
+ */
return;
}
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
(void) snprintf(valstr, sizeof (valstr), "%d", value);
- break;
+ mac_prop_info_set_default_str(prh, valstr);
+ return;
default:
- break;
+ return;
}
}
static boolean_t
-vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
+vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
{
- struct vioif_softc *sc = arg;
+ vioif_t *vif = arg;
switch (cap) {
- case MAC_CAPAB_HCKSUM:
- if (sc->sc_tx_csum) {
- uint32_t *txflags = cap_data;
-
- *txflags = HCKSUM_INET_PARTIAL;
- return (B_TRUE);
+ case MAC_CAPAB_HCKSUM: {
+ if (!vif->vif_tx_csum) {
+ return (B_FALSE);
}
- return (B_FALSE);
- case MAC_CAPAB_LSO:
- if (sc->sc_tx_tso4) {
- mac_capab_lso_t *cap_lso = cap_data;
- cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
- cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
- return (B_TRUE);
- }
- return (B_FALSE);
- default:
- break;
+ *(uint32_t *)cap_data = HCKSUM_INET_PARTIAL;
+
+ return (B_TRUE);
}
- return (B_FALSE);
-}
-static mac_callbacks_t vioif_m_callbacks = {
- .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
- .mc_getstat = vioif_stat,
- .mc_start = vioif_start,
- .mc_stop = vioif_stop,
- .mc_setpromisc = vioif_promisc,
- .mc_multicst = vioif_multicst,
- .mc_unicst = vioif_unicst,
- .mc_tx = vioif_tx,
- /* Optional callbacks */
- .mc_reserved = NULL, /* reserved */
- .mc_ioctl = NULL, /* mc_ioctl */
- .mc_getcapab = vioif_getcapab, /* mc_getcapab */
- .mc_open = NULL, /* mc_open */
- .mc_close = NULL, /* mc_close */
- .mc_setprop = vioif_setprop,
- .mc_getprop = vioif_getprop,
- .mc_propinfo = vioif_propinfo,
-};
+ case MAC_CAPAB_LSO: {
+ if (!vif->vif_tx_tso4) {
+ return (B_FALSE);
+ }
-static void
-vioif_show_features(struct vioif_softc *sc, const char *prefix,
- uint32_t features)
-{
- char buf[512];
- char *bufp = buf;
- char *bufend = buf + sizeof (buf);
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += snprintf(bufp, bufend - bufp, prefix);
- /* LINTED E_PTRDIFF_OVERFLOW */
- bufp += virtio_show_features(features, bufp, bufend - bufp);
- *bufp = '\0';
-
- /* Using '!' to only CE_NOTE this to the system log. */
- dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
- VIRTIO_NET_FEATURE_BITS);
-}
+ mac_capab_lso_t *lso = cap_data;
+ lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
+ lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE;
-/*
- * Find out which features are supported by the device and
- * choose which ones we wish to use.
- */
-static int
-vioif_dev_features(struct vioif_softc *sc)
-{
- uint32_t host_features;
-
- host_features = virtio_negotiate_features(&sc->sc_virtio,
- VIRTIO_NET_F_CSUM |
- VIRTIO_NET_F_HOST_TSO4 |
- VIRTIO_NET_F_HOST_ECN |
- VIRTIO_NET_F_MAC |
- VIRTIO_NET_F_STATUS |
- VIRTIO_F_RING_INDIRECT_DESC);
-
- vioif_show_features(sc, "Host features: ", host_features);
- vioif_show_features(sc, "Negotiated features: ",
- sc->sc_virtio.sc_features);
-
- if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
- dev_err(sc->sc_dev, CE_WARN,
- "Host does not support RING_INDIRECT_DESC. Cannot attach.");
- return (DDI_FAILURE);
+ return (B_TRUE);
}
- return (DDI_SUCCESS);
+ default:
+ return (B_FALSE);
+ }
}
static boolean_t
-vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
+vioif_has_feature(vioif_t *vif, uint32_t feature)
{
- return (virtio_has_feature(&sc->sc_virtio, feature));
+ return (virtio_feature_present(vif->vif_virtio, feature));
}
+/*
+ * Read the primary MAC address from the device if one is provided. If not,
+ * generate a random locally administered MAC address and write it back to the
+ * device.
+ */
static void
-vioif_set_mac(struct vioif_softc *sc)
+vioif_get_mac(vioif_t *vif)
{
- int i;
-
- for (i = 0; i < ETHERADDRL; i++) {
- virtio_write_device_config_1(&sc->sc_virtio,
- VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
- }
- sc->sc_mac_from_host = 0;
-}
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
-/* Get the mac address out of the hardware, or make up one. */
-static void
-vioif_get_mac(struct vioif_softc *sc)
-{
- int i;
- if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
- for (i = 0; i < ETHERADDRL; i++) {
- sc->sc_mac[i] = virtio_read_device_config_1(
- &sc->sc_virtio,
+ if (vioif_has_feature(vif, VIRTIO_NET_F_MAC)) {
+ for (uint_t i = 0; i < ETHERADDRL; i++) {
+ vif->vif_mac[i] = virtio_dev_get8(vif->vif_virtio,
VIRTIO_NET_CONFIG_MAC + i);
}
- sc->sc_mac_from_host = 1;
- } else {
- /* Get a few random bytes */
- (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
- /* Make sure it's a unicast MAC */
- sc->sc_mac[0] &= ~1;
- /* Set the "locally administered" bit */
- sc->sc_mac[1] |= 2;
+ vif->vif_mac_from_host = 1;
- vioif_set_mac(sc);
+ return;
+ }
- dev_err(sc->sc_dev, CE_NOTE,
- "!Generated a random MAC address: %s",
- ether_sprintf((struct ether_addr *)sc->sc_mac));
+ /* Get a few random bytes */
+ (void) random_get_pseudo_bytes(vif->vif_mac, ETHERADDRL);
+ /* Make sure it's a unicast MAC */
+ vif->vif_mac[0] &= ~1;
+ /* Set the "locally administered" bit */
+ vif->vif_mac[1] |= 2;
+
+ /*
+ * Write the random MAC address back to the device.
+ */
+ for (uint_t i = 0; i < ETHERADDRL; i++) {
+ virtio_dev_put8(vif->vif_virtio, VIRTIO_NET_CONFIG_MAC + i,
+ vif->vif_mac[i]);
}
+ vif->vif_mac_from_host = 0;
+
+ dev_err(vif->vif_dip, CE_NOTE, "!Generated a random MAC address: "
+ "%02x:%02x:%02x:%02x:%02x:%02x",
+ (uint_t)vif->vif_mac[0], (uint_t)vif->vif_mac[1],
+ (uint_t)vif->vif_mac[2], (uint_t)vif->vif_mac[3],
+ (uint_t)vif->vif_mac[4], (uint_t)vif->vif_mac[5]);
}
/*
* Virtqueue interrupt handlers
*/
-/* ARGSUSED */
static uint_t
-vioif_rx_handler(caddr_t arg1, caddr_t arg2)
+vioif_rx_handler(caddr_t arg0, caddr_t arg1)
{
- struct virtio_softc *vsc = (void *) arg1;
- struct vioif_softc *sc = __containerof(vsc,
- struct vioif_softc, sc_virtio);
+ vioif_t *vif = (vioif_t *)arg0;
+
+ mutex_enter(&vif->vif_mutex);
+ (void) vioif_process_rx(vif);
/*
- * The return values of these functions are not needed but they make
- * debugging interrupts simpler because you can use them to detect when
- * stuff was processed and repopulated in this handler.
+ * Attempt to replenish the receive queue. If we cannot add any
+ * descriptors here, it may be because all of the recently received
+ * packets were loaned up to the networking stack.
*/
- (void) vioif_process_rx(sc);
- (void) vioif_populate_rx(sc, KM_NOSLEEP);
+ (void) vioif_add_rx(vif);
+ mutex_exit(&vif->vif_mutex);
return (DDI_INTR_CLAIMED);
}
-/* ARGSUSED */
static uint_t
-vioif_tx_handler(caddr_t arg1, caddr_t arg2)
+vioif_tx_handler(caddr_t arg0, caddr_t arg1)
{
- struct virtio_softc *vsc = (void *)arg1;
- struct vioif_softc *sc = __containerof(vsc,
- struct vioif_softc, sc_virtio);
+ vioif_t *vif = (vioif_t *)arg0;
/*
* The TX interrupt could race with other reclamation activity, so
* interpreting the return value is unimportant.
*/
- (void) vioif_reclaim_used_tx(sc);
+ (void) vioif_reclaim_used_tx(vif);
return (DDI_INTR_CLAIMED);
}
-static int
-vioif_register_ints(struct vioif_softc *sc)
-{
- int ret;
-
- struct virtio_int_handler vioif_vq_h[] = {
- { vioif_rx_handler },
- { vioif_tx_handler },
- { NULL }
- };
-
- ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
-
- return (ret);
-}
-
-
static void
-vioif_check_features(struct vioif_softc *sc)
+vioif_check_features(vioif_t *vif)
{
- if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
- /* The GSO/GRO featured depend on CSUM, check them here. */
- sc->sc_tx_csum = 1;
- sc->sc_rx_csum = 1;
+ VERIFY(MUTEX_HELD(&vif->vif_mutex));
- if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
- sc->sc_rx_csum = 0;
- }
- dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
+ vif->vif_tx_csum = 0;
+ vif->vif_tx_tso4 = 0;
- if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
+ if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) {
+ /*
+ * The host will accept packets with partial checksums from us.
+ */
+ vif->vif_tx_csum = 1;
- sc->sc_tx_tso4 = 1;
- /*
- * We don't seem to have a way to ask the system
- * not to send us LSO packets with Explicit
- * Congestion Notification bit set, so we require
- * the device to support it in order to do
- * LSO.
- */
- if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
- dev_err(sc->sc_dev, CE_NOTE,
- "!TSO4 supported, but not ECN. "
- "Not using LSO.");
- sc->sc_tx_tso4 = 0;
- } else {
- dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
- }
+ /*
+ * The legacy GSO feature represents the combination of
+ * HOST_TSO4, HOST_TSO6, and HOST_ECN.
+ */
+ boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO);
+ boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4);
+ boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN);
+
+ /*
+ * Explicit congestion notification (ECN) is configured
+ * globally; see "tcp_ecn_permitted". As we cannot currently
+ * request that the stack disable ECN on a per interface basis,
+ * we require the device to support the combination of
+ * segmentation offload and ECN support.
+ */
+ if (gso || (tso4 && ecn)) {
+ vif->vif_tx_tso4 = 1;
}
}
}
static int
-vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
+vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- int ret, instance;
- struct vioif_softc *sc;
- struct virtio_softc *vsc;
- mac_register_t *macp;
- char cache_name[CACHE_NAME_SIZE];
-
- instance = ddi_get_instance(devinfo);
-
- switch (cmd) {
- case DDI_ATTACH:
- break;
-
- case DDI_RESUME:
- case DDI_PM_RESUME:
- /* We do not support suspend/resume for vioif. */
- goto exit;
+ int ret;
+ vioif_t *vif;
+ virtio_t *vio;
+ mac_register_t *macp = NULL;
- default:
- goto exit;
+ if (cmd != DDI_ATTACH) {
+ return (DDI_FAILURE);
}
- sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
- ddi_set_driver_private(devinfo, sc);
-
- vsc = &sc->sc_virtio;
+ if ((vio = virtio_init(dip, VIRTIO_NET_WANTED_FEATURES, B_TRUE)) ==
+ NULL) {
+ return (DDI_FAILURE);
+ }
- /* Duplicate for less typing */
- sc->sc_dev = devinfo;
- vsc->sc_dev = devinfo;
+ vif = kmem_zalloc(sizeof (*vif), KM_SLEEP);
+ vif->vif_dip = dip;
+ vif->vif_virtio = vio;
+ vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
+ ddi_set_driver_private(dip, vif);
+
+ if ((vif->vif_rx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_RX,
+ "rx", vioif_rx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL ||
+ (vif->vif_tx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_TX,
+ "tx", vioif_tx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL) {
+ goto fail;
+ }
- /*
- * Initialize interrupt kstat.
- */
- sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
- KSTAT_TYPE_INTR, 1, 0);
- if (sc->sc_intrstat == NULL) {
- dev_err(devinfo, CE_WARN, "kstat_create failed");
- goto exit_intrstat;
- }
- kstat_install(sc->sc_intrstat);
-
- /* map BAR 0 */
- ret = ddi_regs_map_setup(devinfo, 1,
- (caddr_t *)&sc->sc_virtio.sc_io_addr,
- 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
- if (ret != DDI_SUCCESS) {
- dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
- goto exit_map;
+ if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "failed to complete Virtio init");
+ goto fail;
}
- virtio_device_reset(&sc->sc_virtio);
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
+ virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
+ virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
- ret = vioif_dev_features(sc);
- if (ret)
- goto exit_features;
+ mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
+ mutex_enter(&vif->vif_mutex);
- vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+ vioif_get_mac(vif);
- (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
- sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
- sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
- vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
- if (sc->sc_rxbuf_cache == NULL) {
- dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
- goto exit_cache;
- }
+ vif->vif_rxcopy_thresh = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
+ vif->vif_txcopy_thresh = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
- ret = vioif_register_ints(sc);
- if (ret) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate interrupt(s)!");
- goto exit_ints;
+ if (vioif_has_feature(vif, VIRTIO_NET_F_MTU)) {
+ vif->vif_mtu_max = virtio_dev_get16(vio, VIRTIO_NET_CONFIG_MTU);
+ } else {
+ vif->vif_mtu_max = ETHERMTU;
}
- /*
- * Register layout determined, can now access the
- * device-specific bits
- */
- vioif_get_mac(sc);
-
- sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
- VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
- if (!sc->sc_rx_vq)
- goto exit_alloc1;
- virtio_stop_vq_intr(sc->sc_rx_vq);
-
- sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
- VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
- if (!sc->sc_tx_vq)
- goto exit_alloc2;
- virtio_stop_vq_intr(sc->sc_tx_vq);
-
- mutex_init(&sc->sc_tx_lock, NULL, MUTEX_DRIVER,
- DDI_INTR_PRI(sc->sc_virtio.sc_intr_prio));
-
- if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
- sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
- VIOIF_CTRL_QLEN, 0, "ctrl");
- if (!sc->sc_ctrl_vq) {
- goto exit_alloc3;
- }
- virtio_stop_vq_intr(sc->sc_ctrl_vq);
+ vif->vif_mtu = ETHERMTU;
+ if (vif->vif_mtu > vif->vif_mtu_max) {
+ vif->vif_mtu = vif->vif_mtu_max;
}
- virtio_set_status(&sc->sc_virtio,
- VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
-
- sc->sc_rxloan = 0;
+ vioif_check_features(vif);
- /* set some reasonable-small default values */
- sc->sc_rxcopy_thresh = 300;
- sc->sc_txcopy_thresh = 300;
- sc->sc_mtu = ETHERMTU;
+ if (vioif_alloc_bufs(vif) != 0) {
+ mutex_exit(&vif->vif_mutex);
+ dev_err(dip, CE_WARN, "failed to allocate memory");
+ goto fail;
+ }
- vioif_check_features(sc);
+ mutex_exit(&vif->vif_mutex);
- if (vioif_alloc_mems(sc) != 0)
- goto exit_alloc_mems;
+ if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "failed to enable interrupts");
+ goto fail;
+ }
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
- dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
- goto exit_macalloc;
+ dev_err(dip, CE_WARN, "failed to allocate a mac_register");
+ goto fail;
}
macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
- macp->m_driver = sc;
- macp->m_dip = devinfo;
- macp->m_src_addr = sc->sc_mac;
- macp->m_callbacks = &vioif_m_callbacks;
+ macp->m_driver = vif;
+ macp->m_dip = dip;
+ macp->m_src_addr = vif->vif_mac;
+ macp->m_callbacks = &vioif_mac_callbacks;
macp->m_min_sdu = 0;
- macp->m_max_sdu = sc->sc_mtu;
+ macp->m_max_sdu = vif->vif_mtu;
macp->m_margin = VLAN_TAGSZ;
macp->m_priv_props = vioif_priv_props;
- sc->sc_macp = macp;
-
- /* Pre-fill the rx ring. */
- (void) vioif_populate_rx(sc, KM_SLEEP);
-
- ret = mac_register(macp, &sc->sc_mac_handle);
- if (ret != 0) {
- dev_err(devinfo, CE_WARN, "vioif_attach: "
- "mac_register() failed, ret=%d", ret);
- goto exit_register;
+ if ((ret = mac_register(macp, &vif->vif_mac_handle)) != 0) {
+ dev_err(dip, CE_WARN, "mac_register() failed (%d)", ret);
+ goto fail;
}
+ mac_free(macp);
- ret = virtio_enable_ints(&sc->sc_virtio);
- if (ret) {
- dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
- goto exit_enable_ints;
- }
+ mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
- mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
return (DDI_SUCCESS);
-exit_enable_ints:
- (void) mac_unregister(sc->sc_mac_handle);
-exit_register:
- mac_free(macp);
-exit_macalloc:
- vioif_free_mems(sc);
-exit_alloc_mems:
- virtio_release_ints(&sc->sc_virtio);
- if (sc->sc_ctrl_vq)
- virtio_free_vq(sc->sc_ctrl_vq);
-exit_alloc3:
- virtio_free_vq(sc->sc_tx_vq);
-exit_alloc2:
- virtio_free_vq(sc->sc_rx_vq);
-exit_alloc1:
-exit_ints:
- kmem_cache_destroy(sc->sc_rxbuf_cache);
-exit_cache:
-exit_features:
- virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
- ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
-exit_intrstat:
-exit_map:
- kstat_delete(sc->sc_intrstat);
- kmem_free(sc, sizeof (struct vioif_softc));
-exit:
+fail:
+ vioif_free_bufs(vif);
+ if (macp != NULL) {
+ mac_free(macp);
+ }
+ (void) virtio_fini(vio, B_TRUE);
+ kmem_free(vif, sizeof (*vif));
return (DDI_FAILURE);
}
static int
-vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
+vioif_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
- struct vioif_softc *sc;
+ int r;
+ vioif_t *vif;
- if ((sc = ddi_get_driver_private(devinfo)) == NULL)
+ if (cmd != DDI_DETACH) {
return (DDI_FAILURE);
+ }
- switch (cmd) {
- case DDI_DETACH:
- break;
+ if ((vif = ddi_get_driver_private(dip)) == NULL) {
+ return (DDI_FAILURE);
+ }
- case DDI_PM_SUSPEND:
- /* We do not support suspend/resume for vioif. */
+ mutex_enter(&vif->vif_mutex);
+ if (vif->vif_runstate != VIOIF_RUNSTATE_STOPPED) {
+ dev_err(dip, CE_WARN, "!NIC still running, cannot detach");
+ mutex_exit(&vif->vif_mutex);
return (DDI_FAILURE);
+ }
- default:
+ /*
+ * There should be no outstanding transmit buffers once the NIC is
+ * completely stopped.
+ */
+ VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
+
+ /*
+ * Though we cannot claw back all of the receive buffers until we reset
+ * the device, we must ensure all those loaned to MAC have been
+ * returned before calling mac_unregister().
+ */
+ if (vif->vif_nrxbufs_onloan > 0) {
+ dev_err(dip, CE_WARN, "!%u receive buffers still loaned, "
+ "cannot detach", vif->vif_nrxbufs_onloan);
+ mutex_exit(&vif->vif_mutex);
return (DDI_FAILURE);
}
- if (sc->sc_rxloan > 0) {
- dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
- " not detaching.");
+ if ((r = mac_unregister(vif->vif_mac_handle)) != 0) {
+ dev_err(dip, CE_WARN, "!MAC unregister failed (%d)", r);
return (DDI_FAILURE);
}
+ mac_free(vif->vif_macp);
- virtio_stop_vq_intr(sc->sc_rx_vq);
- virtio_stop_vq_intr(sc->sc_tx_vq);
+ /*
+ * Shut down the device so that we can recover any previously
+ * submitted receive buffers.
+ */
+ virtio_shutdown(vif->vif_virtio);
+ for (;;) {
+ virtio_chain_t *vic;
- virtio_release_ints(&sc->sc_virtio);
+ if ((vic = virtio_queue_evacuate(vif->vif_rx_vq)) == NULL) {
+ break;
+ }
- if (mac_unregister(sc->sc_mac_handle)) {
- return (DDI_FAILURE);
+ vioif_rxbuf_t *rb = virtio_chain_data(vic);
+ vioif_rxbuf_free(vif, rb);
}
- mac_free(sc->sc_macp);
-
- vioif_free_mems(sc);
- virtio_free_vq(sc->sc_rx_vq);
- virtio_free_vq(sc->sc_tx_vq);
+ (void) virtio_fini(vif->vif_virtio, B_FALSE);
- virtio_device_reset(&sc->sc_virtio);
+ vioif_free_bufs(vif);
- ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
+ mutex_exit(&vif->vif_mutex);
+ mutex_destroy(&vif->vif_mutex);
- kmem_cache_destroy(sc->sc_rxbuf_cache);
- kstat_delete(sc->sc_intrstat);
- kmem_free(sc, sizeof (struct vioif_softc));
+ kmem_free(vif, sizeof (*vif));
return (DDI_SUCCESS);
}
static int
-vioif_quiesce(dev_info_t *devinfo)
+vioif_quiesce(dev_info_t *dip)
{
- struct vioif_softc *sc;
+ vioif_t *vif;
- if ((sc = ddi_get_driver_private(devinfo)) == NULL)
+ if ((vif = ddi_get_driver_private(dip)) == NULL)
return (DDI_FAILURE);
- virtio_stop_vq_intr(sc->sc_rx_vq);
- virtio_stop_vq_intr(sc->sc_tx_vq);
- virtio_device_reset(&sc->sc_virtio);
-
- return (DDI_SUCCESS);
+ return (virtio_quiesce(vif->vif_virtio));
}
int
_init(void)
{
- int ret = 0;
+ int ret;
- mac_init_ops(&vioif_ops, "vioif");
+ mac_init_ops(&vioif_dev_ops, "vioif");
- ret = mod_install(&modlinkage);
- if (ret != DDI_SUCCESS) {
- mac_fini_ops(&vioif_ops);
- return (ret);
+ if ((ret = mod_install(&vioif_modlinkage)) != DDI_SUCCESS) {
+ mac_fini_ops(&vioif_dev_ops);
}
- return (0);
+ return (ret);
}
int
@@ -2085,16 +1785,15 @@ _fini(void)
{
int ret;
- ret = mod_remove(&modlinkage);
- if (ret == DDI_SUCCESS) {
- mac_fini_ops(&vioif_ops);
+ if ((ret = mod_remove(&vioif_modlinkage)) == DDI_SUCCESS) {
+ mac_fini_ops(&vioif_dev_ops);
}
return (ret);
}
int
-_info(struct modinfo *pModinfo)
+_info(struct modinfo *modinfop)
{
- return (mod_info(&modlinkage, pModinfo));
+ return (mod_info(&vioif_modlinkage, modinfop));
}
diff --git a/usr/src/uts/common/io/vioif/vioif.h b/usr/src/uts/common/io/vioif/vioif.h
new file mode 100644
index 0000000000..51dbc1acd4
--- /dev/null
+++ b/usr/src/uts/common/io/vioif/vioif.h
@@ -0,0 +1,432 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * VIRTIO NETWORK DRIVER
+ */
+
+#ifndef _VIOIF_H
+#define _VIOIF_H
+
+#include "virtio.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * VIRTIO NETWORK CONFIGURATION REGISTERS
+ *
+ * These are offsets into the device-specific configuration space available
+ * through the virtio_dev_*() family of functions.
+ */
+#define VIRTIO_NET_CONFIG_MAC 0x00 /* 48 R/W */
+#define VIRTIO_NET_CONFIG_STATUS 0x06 /* 16 R */
+#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS 0x08 /* 16 R */
+#define VIRTIO_NET_CONFIG_MTU 0x0A /* 16 R */
+
+/*
+ * VIRTIO NETWORK VIRTQUEUES
+ *
+ * Note that the control queue is only present if VIRTIO_NET_F_CTRL_VQ is
+ * negotiated with the device.
+ */
+#define VIRTIO_NET_VIRTQ_RX 0
+#define VIRTIO_NET_VIRTQ_TX 1
+#define VIRTIO_NET_VIRTQ_CONTROL 2
+
+/*
+ * VIRTIO NETWORK FEATURE BITS
+ */
+
+/*
+ * CSUM, GUEST_CSUM:
+ * Partial checksum support. These features signal that the device will
+ * accept packets with partial checksums (CSUM), and that the driver will
+ * accept packets with partial checksums (GUEST_CSUM). These features
+ * combine the use of the VIRTIO_NET_HDR_F_NEEDS_CSUM flag, and the
+ * "csum_start" and "csum_offset" fields, in the virtio net header.
+ */
+#define VIRTIO_NET_F_CSUM (1ULL << 0)
+#define VIRTIO_NET_F_GUEST_CSUM (1ULL << 1)
+
+/*
+ * MTU:
+ * The device offers a maximum MTU value at VIRTIO_NET_CONFIG_MTU. If
+ * this is not negotiated, we allow the largest possible MTU that our
+ * buffer allocations support in case jumbo frames are tacitly supported
+ * by the device. The default MTU is always 1500.
+ */
+#define VIRTIO_NET_F_MTU (1ULL << 3)
+
+/*
+ * MAC:
+ * The device has an assigned primary MAC address. If this feature bit is
+ * not set, the driver must provide a locally assigned MAC address. See
+ * IEEE 802, "48-bit universal LAN MAC addresses" for more details on
+ * assignment.
+ */
+#define VIRTIO_NET_F_MAC (1ULL << 5)
+
+/*
+ * GUEST_TSO4, GUEST_TSO6, GUEST_UFO:
+ * Inbound segmentation offload support. These features depend on having
+ * VIRTIO_NET_F_GUEST_CSUM and signal that the driver can accept large
+ * combined TCP (v4 or v6) packets, or reassembled UDP fragments.
+ */
+#define VIRTIO_NET_F_GUEST_TSO4 (1ULL << 7)
+#define VIRTIO_NET_F_GUEST_TSO6 (1ULL << 8)
+#define VIRTIO_NET_F_GUEST_UFO (1ULL << 10)
+
+/*
+ * GUEST_ECN:
+ * Depends on either VIRTIO_NET_F_GUEST_TSO4 or VIRTIO_NET_F_GUEST_TSO6.
+ * This feature means the driver will look for the VIRTIO_NET_HDR_GSO_ECN
+ * bit in the "gso_type" of the virtio net header. This bit tells the
+ * driver that the Explicit Congestion Notification (ECN) bit was set in
+ * the original TCP packets.
+ */
+#define VIRTIO_NET_F_GUEST_ECN (1ULL << 9)
+
+/*
+ * HOST_TSO4, HOST_TSO6, HOST_UFO:
+ * Outbound segmentation offload support. These features depend on having
+ * VIRTIO_NET_F_CSUM and signal that the device will accept large combined
+ * TCP (v4 or v6) packets that require segmentation offload, or large
+ * combined UDP packets that require fragmentation offload.
+ */
+#define VIRTIO_NET_F_HOST_TSO4 (1ULL << 11)
+#define VIRTIO_NET_F_HOST_TSO6 (1ULL << 12)
+#define VIRTIO_NET_F_HOST_UFO (1ULL << 14)
+
+/*
+ * HOST_ECN:
+ * Depends on either VIRTIO_NET_F_HOST_TSO4 or VIRTIO_NET_F_HOST_TSO6.
+ * This features means the device will accept packets that both require
+ * segmentation offload and have the Explicit Congestion Notification
+ * (ECN) bit set. If this feature is not present, the device must not
+ * send large segments that require ECN to be set.
+ */
+#define VIRTIO_NET_F_HOST_ECN (1ULL << 13)
+
+/*
+ * GSO:
+ * The GSO feature is, in theory, the combination of HOST_TSO4, HOST_TSO6,
+ * and HOST_ECN. This is only useful for legacy devices; newer devices
+ * should be using the more specific bits above.
+ */
+#define VIRTIO_NET_F_GSO (1ULL << 6)
+
+/*
+ * MRG_RXBUF:
+ * This feature allows the receipt of large packets without needing to
+ * allocate large buffers. The "virtio_net_hdr" will include an extra
+ * value: the number of buffers to gang together.
+ */
+#define VIRTIO_NET_F_MRG_RXBUF (1ULL << 15)
+
+/*
+ * STATUS:
+ * The VIRTIO_NET_CONFIG_STATUS configuration register is available, which
+ * allows the driver to read the link state from the device.
+ */
+#define VIRTIO_NET_F_STATUS (1ULL << 16)
+
+/*
+ * CTRL_VQ, CTRL_RX, CTRL_VLAN:
+ * These features signal that the device exposes the control queue
+ * (VIRTIO_NET_VIRTQ_CONTROL), in the case of CTRL_VQ; and that the
+ * control queue supports extra commands (CTRL_RX, CTRL_VLAN).
+ */
+#define VIRTIO_NET_F_CTRL_VQ (1ULL << 17)
+#define VIRTIO_NET_F_CTRL_RX (1ULL << 18)
+#define VIRTIO_NET_F_CTRL_VLAN (1ULL << 19)
+#define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL << 20)
+
+/*
+ * These features are supported by the driver and we will request them from the
+ * device. Note that we do not currently request GUEST_CSUM, as the driver
+ * does not presently support receiving frames with any offload features from
+ * the device.
+ */
+#define VIRTIO_NET_WANTED_FEATURES (VIRTIO_NET_F_CSUM | \
+ VIRTIO_NET_F_GSO | \
+ VIRTIO_NET_F_HOST_TSO4 | \
+ VIRTIO_NET_F_HOST_ECN | \
+ VIRTIO_NET_F_MAC | \
+ VIRTIO_NET_F_MTU)
+
+/*
+ * VIRTIO NETWORK HEADER
+ *
+ * This structure appears at the start of each transmit or receive packet
+ * buffer.
+ */
+struct virtio_net_hdr {
+ uint8_t vnh_flags;
+ uint8_t vnh_gso_type;
+ uint16_t vnh_hdr_len;
+ uint16_t vnh_gso_size;
+ uint16_t vnh_csum_start;
+ uint16_t vnh_csum_offset;
+} __packed;
+
+/*
+ * VIRTIO NETWORK HEADER: FLAGS (vnh_flags)
+ */
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 0x01
+
+/*
+ * VIRTIO NETWORK HEADER: OFFLOAD OPTIONS (vnh_gso_type)
+ *
+ * Each of these is an offload type, except for the ECN value which is
+ * logically OR-ed with one of the other types.
+ */
+#define VIRTIO_NET_HDR_GSO_NONE 0
+#define VIRTIO_NET_HDR_GSO_TCPV4 1
+#define VIRTIO_NET_HDR_GSO_UDP 3
+#define VIRTIO_NET_HDR_GSO_TCPV6 4
+#define VIRTIO_NET_HDR_GSO_ECN 0x80
+
+
+/*
+ * DRIVER PARAMETERS
+ */
+
+/*
+ * At attach, we allocate a fixed pool of buffers for receipt and transmission
+ * of frames. The maximum number of buffers of each type that we will allocate
+ * is specified here. If the ring size is smaller than this number, we will
+ * use the ring size instead.
+ */
+#define VIRTIO_NET_TX_BUFS 256
+#define VIRTIO_NET_RX_BUFS 256
+
+/*
+ * The virtio net header and the first buffer segment share the same DMA
+ * allocation. We round up the virtio header size to a multiple of 4 and add 2
+ * bytes so that the IP header, which starts immediately after the 14 or 18
+ * byte Ethernet header, is then correctly aligned:
+ *
+ * 0 10 16 18 32/36
+ * | virtio_net_hdr | %4==0 | +2 | Ethernet header (14/18 bytes) | IPv4 ...
+ *
+ * Note that for this to work correctly, the DMA allocation must also be 4 byte
+ * aligned.
+ */
+#define VIOIF_HEADER_ALIGN 4
+#define VIOIF_HEADER_SKIP (P2ROUNDUP( \
+ sizeof (struct virtio_net_hdr), \
+ VIOIF_HEADER_ALIGN) + 2)
+
+/*
+ * Given we are not negotiating VIRTIO_NET_F_MRG_RXBUF, the specification says
+ * we must be able to accept a 1514 byte packet, or if any segmentation offload
+ * features have been negotiated a 65550 byte packet. To keep things simple,
+ * we'll assume segmentation offload is possible in most cases. In addition to
+ * the packet payload, we need to account for the Ethernet header and the
+ * virtio_net_hdr.
+ */
+#define VIOIF_RX_DATA_SIZE 65550
+#define VIOIF_RX_BUF_SIZE (VIOIF_RX_DATA_SIZE + \
+ sizeof (struct ether_header) + \
+ VIOIF_HEADER_SKIP)
+
+/*
+ * If we assume that a large allocation will probably have mostly 4K page sized
+ * cookies, 64 segments allows us 256KB for a single frame. We're in control
+ * of the allocation we use for receive buffers, so this value only has an
+ * impact on the length of chain we're able to create for external transmit
+ * buffer mappings.
+ */
+#define VIOIF_MAX_SEGS 64
+
+/*
+ * We pre-allocate a reasonably large buffer to copy small packets
+ * there. Bigger packets are mapped, packets with multiple
+ * cookies are mapped as indirect buffers.
+ */
+#define VIOIF_TX_INLINE_SIZE (2 * 1024)
+
+
+/*
+ * TYPE DEFINITIONS
+ */
+
+typedef struct vioif vioif_t;
+
+/*
+ * Receive buffers are allocated in advance as a combination of DMA memory and
+ * a descriptor chain. Receive buffers can be loaned to the networking stack
+ * to avoid copying, and this object contains the free routine to pass to
+ * desballoc().
+ *
+ * When receive buffers are not in use, they are linked into the per-instance
+ * free list, "vif_rxbufs" via "rb_link". Under normal conditions, we expect
+ * the free list to be empty much of the time; most buffers will be in the ring
+ * or on loan.
+ */
+typedef struct vioif_rxbuf {
+ vioif_t *rb_vioif;
+ frtn_t rb_frtn;
+
+ virtio_dma_t *rb_dma;
+ virtio_chain_t *rb_chain;
+
+ list_node_t rb_link;
+} vioif_rxbuf_t;
+
+/*
+ * Transmit buffers are also allocated in advance. DMA memory is allocated for
+ * the virtio net header, and to hold small packets. Larger packets are mapped
+ * from storage loaned to the driver by the network stack.
+ *
+ * When transmit buffers are not in use, they are linked into the per-instance
+ * free list, "vif_txbufs" via "tb_link".
+ */
+typedef struct vioif_txbuf {
+ mblk_t *tb_mp;
+
+ /*
+ * Inline buffer space (VIOIF_TX_INLINE_SIZE) for storage of the virtio
+ * net header, and to hold copied (rather than mapped) packet data.
+ */
+ virtio_dma_t *tb_dma;
+ virtio_chain_t *tb_chain;
+
+ /*
+ * External buffer mapping. The capacity is fixed at allocation time,
+ * and "tb_ndmaext" tracks the current number of mappings.
+ */
+ virtio_dma_t **tb_dmaext;
+ uint_t tb_dmaext_capacity;
+ uint_t tb_ndmaext;
+
+ list_node_t tb_link;
+} vioif_txbuf_t;
+
+typedef enum vioif_runstate {
+ VIOIF_RUNSTATE_STOPPED = 1,
+ VIOIF_RUNSTATE_STOPPING,
+ VIOIF_RUNSTATE_RUNNING
+} vioif_runstate_t;
+
+/*
+ * Per-instance driver object.
+ */
+struct vioif {
+ dev_info_t *vif_dip;
+ virtio_t *vif_virtio;
+
+ kmutex_t vif_mutex;
+
+ /*
+ * The NIC is considered RUNNING between the mc_start(9E) and
+ * mc_stop(9E) calls. Otherwise it is STOPPING (while draining
+ * resources) then STOPPED. When not RUNNING, we will drop incoming
+ * frames and refuse to insert more receive buffers into the receive
+ * queue.
+ */
+ vioif_runstate_t vif_runstate;
+
+ mac_handle_t vif_mac_handle;
+ mac_register_t *vif_macp;
+
+ virtio_queue_t *vif_rx_vq;
+ virtio_queue_t *vif_tx_vq;
+
+ /* TX virtqueue management resources */
+ boolean_t vif_tx_corked;
+ boolean_t vif_tx_drain;
+ timeout_id_t vif_tx_reclaim_tid;
+
+ /*
+ * Configured offload features:
+ */
+ unsigned int vif_tx_csum:1;
+ unsigned int vif_tx_tso4:1;
+
+ /*
+ * For debugging, it is useful to know whether the MAC address we
+ * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
+ * was otherwise generated or set from within the guest.
+ */
+ unsigned int vif_mac_from_host:1;
+
+ uint_t vif_mtu;
+ uint_t vif_mtu_max;
+ uint8_t vif_mac[ETHERADDRL];
+
+ /*
+ * Receive buffer free list and accounting:
+ */
+ list_t vif_rxbufs;
+ uint_t vif_nrxbufs_alloc;
+ uint_t vif_nrxbufs_onloan;
+ uint_t vif_nrxbufs_onloan_max;
+ uint_t vif_rxbufs_capacity;
+ vioif_rxbuf_t *vif_rxbufs_mem;
+
+ /*
+ * Transmit buffer free list and accounting:
+ */
+ list_t vif_txbufs;
+ uint_t vif_ntxbufs_alloc;
+ uint_t vif_txbufs_capacity;
+ vioif_txbuf_t *vif_txbufs_mem;
+
+ /*
+ * These copy size thresholds are exposed as private MAC properties so
+ * that they can be tuned without rebooting.
+ */
+ uint_t vif_rxcopy_thresh;
+ uint_t vif_txcopy_thresh;
+
+ /*
+ * Statistics visible through mac:
+ */
+ uint64_t vif_ipackets;
+ uint64_t vif_opackets;
+ uint64_t vif_rbytes;
+ uint64_t vif_obytes;
+ uint64_t vif_brdcstxmt;
+ uint64_t vif_brdcstrcv;
+ uint64_t vif_multixmt;
+ uint64_t vif_multircv;
+ uint64_t vif_norecvbuf;
+ uint64_t vif_notxbuf;
+ uint64_t vif_ierrors;
+ uint64_t vif_oerrors;
+
+ /*
+ * Internal debugging statistics:
+ */
+ uint64_t vif_rxfail_dma_handle;
+ uint64_t vif_rxfail_dma_buffer;
+ uint64_t vif_rxfail_dma_bind;
+ uint64_t vif_rxfail_chain_undersize;
+ uint64_t vif_rxfail_no_descriptors;
+ uint64_t vif_txfail_dma_handle;
+ uint64_t vif_txfail_dma_bind;
+ uint64_t vif_txfail_indirect_limit;
+
+ uint64_t vif_stat_tx_reclaim;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIOIF_H */
diff --git a/usr/src/uts/common/io/virtio/virtio.c b/usr/src/uts/common/io/virtio/virtio.c
deleted file mode 100644
index 19a66b8f38..0000000000
--- a/usr/src/uts/common/io/virtio/virtio.c
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
- * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com>
- * Copyright (c) 2016 by Delphix. All rights reserved.
- * Copyright 2017 Joyent, Inc.
- */
-
-/* Based on the NetBSD virtio driver by Minoura Makoto. */
-/*
- * Copyright (c) 2010 Minoura Makoto.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/debug.h>
-#include <sys/modctl.h>
-#include <sys/autoconf.h>
-#include <sys/ddi_impldefs.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/sunndi.h>
-#include <sys/avintr.h>
-#include <sys/spl.h>
-#include <sys/promif.h>
-#include <sys/list.h>
-#include <sys/bootconf.h>
-#include <sys/bootsvcs.h>
-#include <sys/sysmacros.h>
-#include <sys/pci.h>
-
-#include "virtiovar.h"
-#include "virtioreg.h"
-
-#define NDEVNAMES (sizeof (virtio_device_name) / sizeof (char *))
-#define MINSEG_INDIRECT 2 /* use indirect if nsegs >= this value */
-#define VIRTQUEUE_ALIGN(n) (((n)+(VIRTIO_PAGE_SIZE-1)) & \
- ~(VIRTIO_PAGE_SIZE-1))
-
-void
-virtio_set_status(struct virtio_softc *sc, unsigned int status)
-{
- int old = 0;
-
- if (status != 0) {
- old = ddi_get8(sc->sc_ioh, (uint8_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_DEVICE_STATUS));
- }
-
- ddi_put8(sc->sc_ioh, (uint8_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_DEVICE_STATUS), status | old);
-}
-
-/*
- * Negotiate features, save the result in sc->sc_features
- */
-uint32_t
-virtio_negotiate_features(struct virtio_softc *sc, uint32_t guest_features)
-{
- uint32_t host_features;
- uint32_t features;
-
- host_features = ddi_get32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_DEVICE_FEATURES));
-
- dev_debug(sc->sc_dev, CE_NOTE, "host features: %x, guest features: %x",
- host_features, guest_features);
-
- features = host_features & guest_features;
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_GUEST_FEATURES),
- features);
-
- sc->sc_features = features;
-
- return (host_features);
-}
-
-size_t
-virtio_show_features(uint32_t features, char *buf, size_t len)
-{
- char *orig_buf = buf;
- char *bufend = buf + len;
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- buf += snprintf(buf, bufend - buf, "Generic ( ");
- if (features & VIRTIO_F_RING_INDIRECT_DESC)
- /* LINTED E_PTRDIFF_OVERFLOW */
- buf += snprintf(buf, bufend - buf, "INDIRECT_DESC ");
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- buf += snprintf(buf, bufend - buf, ") ");
-
- /* LINTED E_PTRDIFF_OVERFLOW */
- return (buf - orig_buf);
-}
-
-boolean_t
-virtio_has_feature(struct virtio_softc *sc, uint32_t feature)
-{
- return (sc->sc_features & feature);
-}
-
-/*
- * Device configuration registers.
- */
-uint8_t
-virtio_read_device_config_1(struct virtio_softc *sc, unsigned int index)
-{
- ASSERT(sc->sc_config_offset);
- return ddi_get8(sc->sc_ioh,
- (uint8_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
-}
-
-uint16_t
-virtio_read_device_config_2(struct virtio_softc *sc, unsigned int index)
-{
- ASSERT(sc->sc_config_offset);
- return ddi_get16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
-}
-
-uint32_t
-virtio_read_device_config_4(struct virtio_softc *sc, unsigned int index)
-{
- ASSERT(sc->sc_config_offset);
- return ddi_get32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
-}
-
-uint64_t
-virtio_read_device_config_8(struct virtio_softc *sc, unsigned int index)
-{
- uint64_t r;
-
- ASSERT(sc->sc_config_offset);
- r = ddi_get32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset +
- index + sizeof (uint32_t)));
-
- r <<= 32;
-
- r += ddi_get32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index));
- return (r);
-}
-
-void
-virtio_write_device_config_1(struct virtio_softc *sc, unsigned int index,
- uint8_t value)
-{
- ASSERT(sc->sc_config_offset);
- ddi_put8(sc->sc_ioh,
- (uint8_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
-}
-
-void
-virtio_write_device_config_2(struct virtio_softc *sc, unsigned int index,
- uint16_t value)
-{
- ASSERT(sc->sc_config_offset);
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
-}
-
-void
-virtio_write_device_config_4(struct virtio_softc *sc, unsigned int index,
- uint32_t value)
-{
- ASSERT(sc->sc_config_offset);
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index), value);
-}
-
-void
-virtio_write_device_config_8(struct virtio_softc *sc, unsigned int index,
- uint64_t value)
-{
- ASSERT(sc->sc_config_offset);
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset + index),
- value & 0xFFFFFFFF);
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + sc->sc_config_offset +
- index + sizeof (uint32_t)), value >> 32);
-}
-
-/*
- * Start/stop vq interrupt. No guarantee.
- */
-void
-virtio_stop_vq_intr(struct virtqueue *vq)
-{
- vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-void
-virtio_start_vq_intr(struct virtqueue *vq)
-{
- vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
-}
-
-static ddi_dma_attr_t virtio_vq_dma_attr = {
- DMA_ATTR_V0, /* Version number */
- 0, /* low address */
- 0x00000FFFFFFFFFFF, /* high address. Has to fit into 32 bits */
- /* after page-shifting */
- 0xFFFFFFFF, /* counter register max */
- VIRTIO_PAGE_SIZE, /* page alignment required */
- 0x3F, /* burst sizes: 1 - 32 */
- 0x1, /* minimum transfer size */
- 0xFFFFFFFF, /* max transfer size */
- 0xFFFFFFFF, /* address register max */
- 1, /* no scatter-gather */
- 1, /* device operates on bytes */
- 0, /* attr flag: set to 0 */
-};
-
-static ddi_dma_attr_t virtio_vq_indirect_dma_attr = {
- DMA_ATTR_V0, /* Version number */
- 0, /* low address */
- 0xFFFFFFFFFFFFFFFF, /* high address */
- 0xFFFFFFFF, /* counter register max */
- 1, /* No specific alignment */
- 0x3F, /* burst sizes: 1 - 32 */
- 0x1, /* minimum transfer size */
- 0xFFFFFFFF, /* max transfer size */
- 0xFFFFFFFF, /* address register max */
- 1, /* no scatter-gather */
- 1, /* device operates on bytes */
- 0, /* attr flag: set to 0 */
-};
-
-/* Same for direct and indirect descriptors. */
-static ddi_device_acc_attr_t virtio_vq_devattr = {
- DDI_DEVICE_ATTR_V0,
- DDI_NEVERSWAP_ACC,
- DDI_STORECACHING_OK_ACC,
- DDI_DEFAULT_ACC
-};
-
-static void
-virtio_free_indirect(struct vq_entry *entry)
-{
-
- (void) ddi_dma_unbind_handle(entry->qe_indirect_dma_handle);
- ddi_dma_mem_free(&entry->qe_indirect_dma_acch);
- ddi_dma_free_handle(&entry->qe_indirect_dma_handle);
-
- entry->qe_indirect_descs = NULL;
-}
-
-
-static int
-virtio_alloc_indirect(struct virtio_softc *sc, struct vq_entry *entry)
-{
- int allocsize, num;
- size_t len;
- unsigned int ncookies;
- int ret;
-
- num = entry->qe_queue->vq_indirect_num;
- ASSERT(num > 1);
-
- allocsize = sizeof (struct vring_desc) * num;
-
- ret = ddi_dma_alloc_handle(sc->sc_dev, &virtio_vq_indirect_dma_attr,
- DDI_DMA_SLEEP, NULL, &entry->qe_indirect_dma_handle);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate dma handle for indirect descriptors, "
- "entry %d, vq %d", entry->qe_index,
- entry->qe_queue->vq_index);
- goto out_alloc_handle;
- }
-
- ret = ddi_dma_mem_alloc(entry->qe_indirect_dma_handle, allocsize,
- &virtio_vq_devattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
- (caddr_t *)&entry->qe_indirect_descs, &len,
- &entry->qe_indirect_dma_acch);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate dma memory for indirect descriptors, "
- "entry %d, vq %d,", entry->qe_index,
- entry->qe_queue->vq_index);
- goto out_alloc;
- }
-
- (void) memset(entry->qe_indirect_descs, 0xff, allocsize);
-
- ret = ddi_dma_addr_bind_handle(entry->qe_indirect_dma_handle, NULL,
- (caddr_t)entry->qe_indirect_descs, len,
- DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
- &entry->qe_indirect_dma_cookie, &ncookies);
- if (ret != DDI_DMA_MAPPED) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to bind dma memory for indirect descriptors, "
- "entry %d, vq %d", entry->qe_index,
- entry->qe_queue->vq_index);
- goto out_bind;
- }
-
- /* We asked for a single segment */
- ASSERT(ncookies == 1);
-
- return (0);
-
-out_bind:
- ddi_dma_mem_free(&entry->qe_indirect_dma_acch);
-out_alloc:
- ddi_dma_free_handle(&entry->qe_indirect_dma_handle);
-out_alloc_handle:
-
- return (ret);
-}
-
-/*
- * Initialize the vq structure.
- */
-static int
-virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
-{
- int ret;
- uint16_t i;
- int vq_size = vq->vq_num;
- int indirect_num = vq->vq_indirect_num;
-
- /* free slot management */
- list_create(&vq->vq_freelist, sizeof (struct vq_entry),
- offsetof(struct vq_entry, qe_list));
-
- for (i = 0; i < vq_size; i++) {
- struct vq_entry *entry = &vq->vq_entries[i];
- list_insert_tail(&vq->vq_freelist, entry);
- entry->qe_index = i;
- entry->qe_desc = &vq->vq_descs[i];
- entry->qe_queue = vq;
-
- if (indirect_num) {
- ret = virtio_alloc_indirect(sc, entry);
- if (ret)
- goto out_indirect;
- }
- }
-
- mutex_init(&vq->vq_freelist_lock, "virtio-freelist", MUTEX_DRIVER,
- DDI_INTR_PRI(sc->sc_intr_prio));
- mutex_init(&vq->vq_avail_lock, "virtio-avail", MUTEX_DRIVER,
- DDI_INTR_PRI(sc->sc_intr_prio));
- mutex_init(&vq->vq_used_lock, "virtio-used", MUTEX_DRIVER,
- DDI_INTR_PRI(sc->sc_intr_prio));
-
- return (0);
-
-out_indirect:
- for (i = 0; i < vq_size; i++) {
- struct vq_entry *entry = &vq->vq_entries[i];
- if (entry->qe_indirect_descs)
- virtio_free_indirect(entry);
- }
-
- return (ret);
-}
-
-/*
- * Allocate/free a vq.
- */
-struct virtqueue *
-virtio_alloc_vq(struct virtio_softc *sc, unsigned int index, unsigned int size,
- unsigned int indirect_num, const char *name)
-{
- int vq_size, allocsize1, allocsize2, allocsize = 0;
- int ret;
- unsigned int ncookies;
- size_t len;
- struct virtqueue *vq;
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SELECT), index);
- vq_size = ddi_get16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SIZE));
- if (vq_size == 0) {
- dev_err(sc->sc_dev, CE_WARN,
- "virtqueue dest not exist, index %d for %s\n", index, name);
- goto out;
- }
-
- vq = kmem_zalloc(sizeof (struct virtqueue), KM_SLEEP);
-
- /* size 0 => use native vq size, good for receive queues. */
- if (size)
- vq_size = MIN(vq_size, size);
-
- /* allocsize1: descriptor table + avail ring + pad */
- allocsize1 = VIRTQUEUE_ALIGN(sizeof (struct vring_desc) * vq_size +
- sizeof (struct vring_avail) + sizeof (uint16_t) * vq_size);
- /* allocsize2: used ring + pad */
- allocsize2 = VIRTQUEUE_ALIGN(sizeof (struct vring_used) +
- sizeof (struct vring_used_elem) * vq_size);
-
- allocsize = allocsize1 + allocsize2;
-
- ret = ddi_dma_alloc_handle(sc->sc_dev, &virtio_vq_dma_attr,
- DDI_DMA_SLEEP, NULL, &vq->vq_dma_handle);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate dma handle for vq %d", index);
- goto out_alloc_handle;
- }
-
- ret = ddi_dma_mem_alloc(vq->vq_dma_handle, allocsize,
- &virtio_vq_devattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
- (caddr_t *)&vq->vq_vaddr, &len, &vq->vq_dma_acch);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate dma memory for vq %d", index);
- goto out_alloc;
- }
-
- ret = ddi_dma_addr_bind_handle(vq->vq_dma_handle, NULL,
- (caddr_t)vq->vq_vaddr, len, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
- DDI_DMA_SLEEP, NULL, &vq->vq_dma_cookie, &ncookies);
- if (ret != DDI_DMA_MAPPED) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to bind dma memory for vq %d", index);
- goto out_bind;
- }
-
- /* We asked for a single segment */
- ASSERT(ncookies == 1);
- /* and page-ligned buffers. */
- ASSERT(vq->vq_dma_cookie.dmac_laddress % VIRTIO_PAGE_SIZE == 0);
-
- (void) memset(vq->vq_vaddr, 0, allocsize);
-
- /* Make sure all zeros hit the buffer before we point the host to it */
- membar_producer();
-
- /* set the vq address */
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_ADDRESS),
- (vq->vq_dma_cookie.dmac_laddress / VIRTIO_PAGE_SIZE));
-
- /* remember addresses and offsets for later use */
- vq->vq_owner = sc;
- vq->vq_num = vq_size;
- vq->vq_index = index;
- vq->vq_descs = vq->vq_vaddr;
- vq->vq_availoffset = sizeof (struct vring_desc)*vq_size;
- vq->vq_avail = (void *)(((char *)vq->vq_descs) + vq->vq_availoffset);
- vq->vq_usedoffset = allocsize1;
- vq->vq_used = (void *)(((char *)vq->vq_descs) + vq->vq_usedoffset);
-
- ASSERT(indirect_num == 0 ||
- virtio_has_feature(sc, VIRTIO_F_RING_INDIRECT_DESC));
- vq->vq_indirect_num = indirect_num;
-
- /* free slot management */
- vq->vq_entries = kmem_zalloc(sizeof (struct vq_entry) * vq_size,
- KM_SLEEP);
-
- ret = virtio_init_vq(sc, vq);
- if (ret)
- goto out_init;
-
- dev_debug(sc->sc_dev, CE_NOTE,
- "Allocated %d entries for vq %d:%s (%d indirect descs)",
- vq_size, index, name, indirect_num * vq_size);
-
- return (vq);
-
-out_init:
- kmem_free(vq->vq_entries, sizeof (struct vq_entry) * vq_size);
- (void) ddi_dma_unbind_handle(vq->vq_dma_handle);
-out_bind:
- ddi_dma_mem_free(&vq->vq_dma_acch);
-out_alloc:
- ddi_dma_free_handle(&vq->vq_dma_handle);
-out_alloc_handle:
- kmem_free(vq, sizeof (struct virtqueue));
-out:
- return (NULL);
-}
-
-void
-virtio_free_vq(struct virtqueue *vq)
-{
- struct virtio_softc *sc = vq->vq_owner;
- int i;
-
- /* tell device that there's no virtqueue any longer */
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_SELECT),
- vq->vq_index);
- ddi_put32(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint32_t *)(sc->sc_io_addr + VIRTIO_CONFIG_QUEUE_ADDRESS), 0);
-
- /* Free the indirect descriptors, if any. */
- for (i = 0; i < vq->vq_num; i++) {
- struct vq_entry *entry = &vq->vq_entries[i];
- if (entry->qe_indirect_descs)
- virtio_free_indirect(entry);
- }
-
- kmem_free(vq->vq_entries, sizeof (struct vq_entry) * vq->vq_num);
-
- (void) ddi_dma_unbind_handle(vq->vq_dma_handle);
- ddi_dma_mem_free(&vq->vq_dma_acch);
- ddi_dma_free_handle(&vq->vq_dma_handle);
-
- mutex_destroy(&vq->vq_used_lock);
- mutex_destroy(&vq->vq_avail_lock);
- mutex_destroy(&vq->vq_freelist_lock);
-
- kmem_free(vq, sizeof (struct virtqueue));
-}
-
-/*
- * Free descriptor management.
- */
-struct vq_entry *
-vq_alloc_entry(struct virtqueue *vq)
-{
- struct vq_entry *qe;
-
- mutex_enter(&vq->vq_freelist_lock);
- if (list_is_empty(&vq->vq_freelist)) {
- mutex_exit(&vq->vq_freelist_lock);
- return (NULL);
- }
- qe = list_remove_head(&vq->vq_freelist);
-
- ASSERT(vq->vq_used_entries >= 0);
- vq->vq_used_entries++;
-
- mutex_exit(&vq->vq_freelist_lock);
-
- qe->qe_next = NULL;
- qe->qe_indirect_next = 0;
- (void) memset(qe->qe_desc, 0, sizeof (struct vring_desc));
-
- return (qe);
-}
-
-void
-vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
-{
- mutex_enter(&vq->vq_freelist_lock);
-
- list_insert_head(&vq->vq_freelist, qe);
- vq->vq_used_entries--;
- ASSERT(vq->vq_used_entries >= 0);
- mutex_exit(&vq->vq_freelist_lock);
-}
-
-/*
- * We (intentionally) don't have a global vq mutex, so you are
- * responsible for external locking to avoid allocting/freeing any
- * entries before using the returned value. Have fun.
- */
-uint_t
-vq_num_used(struct virtqueue *vq)
-{
- /* vq->vq_freelist_lock would not help here. */
- return (vq->vq_used_entries);
-}
-
-static inline void
-virtio_ve_set_desc(struct vring_desc *desc, uint64_t paddr, uint32_t len,
- boolean_t write)
-{
- desc->addr = paddr;
- desc->len = len;
- desc->next = 0;
- desc->flags = 0;
-
- /* 'write' - from the driver's point of view */
- if (!write)
- desc->flags = VRING_DESC_F_WRITE;
-}
-
-void
-virtio_ve_set(struct vq_entry *qe, uint64_t paddr, uint32_t len,
- boolean_t write)
-{
- virtio_ve_set_desc(qe->qe_desc, paddr, len, write);
-}
-
-unsigned int
-virtio_ve_indirect_available(struct vq_entry *qe)
-{
- return (qe->qe_queue->vq_indirect_num - qe->qe_indirect_next);
-}
-
-void
-virtio_ve_add_indirect_buf(struct vq_entry *qe, uint64_t paddr, uint32_t len,
- boolean_t write)
-{
- struct vring_desc *indirect_desc;
-
- ASSERT(qe->qe_queue->vq_indirect_num);
- ASSERT(qe->qe_indirect_next < qe->qe_queue->vq_indirect_num);
-
- indirect_desc = &qe->qe_indirect_descs[qe->qe_indirect_next];
- virtio_ve_set_desc(indirect_desc, paddr, len, write);
- qe->qe_indirect_next++;
-}
-
-void
-virtio_ve_add_cookie(struct vq_entry *qe, ddi_dma_handle_t dma_handle,
- ddi_dma_cookie_t dma_cookie, unsigned int ncookies, boolean_t write)
-{
- int i;
-
- for (i = 0; i < ncookies; i++) {
- virtio_ve_add_indirect_buf(qe, dma_cookie.dmac_laddress,
- dma_cookie.dmac_size, write);
- ddi_dma_nextcookie(dma_handle, &dma_cookie);
- }
-}
-
-void
-virtio_sync_vq(struct virtqueue *vq)
-{
- struct virtio_softc *vsc = vq->vq_owner;
-
- /* Make sure the avail ring update hit the buffer */
- membar_producer();
-
- vq->vq_avail->idx = vq->vq_avail_idx;
-
- /* Make sure the avail idx update hits the buffer */
- membar_producer();
-
- /* Make sure we see the flags update */
- membar_consumer();
-
- if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY)) {
- ddi_put16(vsc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(vsc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_NOTIFY),
- vq->vq_index);
- }
-}
-
-void
-virtio_push_chain(struct vq_entry *qe, boolean_t sync)
-{
- struct virtqueue *vq = qe->qe_queue;
- struct vq_entry *head = qe;
- struct vring_desc *desc;
- int idx;
-
- ASSERT(qe);
-
- /*
- * Bind the descs together, paddr and len should be already
- * set with virtio_ve_set
- */
- do {
- /* Bind the indirect descriptors */
- if (qe->qe_indirect_next > 1) {
- uint16_t i = 0;
-
- /*
- * Set the pointer/flags to the
- * first indirect descriptor
- */
- virtio_ve_set_desc(qe->qe_desc,
- qe->qe_indirect_dma_cookie.dmac_laddress,
- sizeof (struct vring_desc) * qe->qe_indirect_next,
- B_FALSE);
- qe->qe_desc->flags |= VRING_DESC_F_INDIRECT;
-
- /* For all but the last one, add the next index/flag */
- do {
- desc = &qe->qe_indirect_descs[i];
- i++;
-
- desc->flags |= VRING_DESC_F_NEXT;
- desc->next = i;
- } while (i < qe->qe_indirect_next - 1);
-
- }
-
- if (qe->qe_next) {
- qe->qe_desc->flags |= VRING_DESC_F_NEXT;
- qe->qe_desc->next = qe->qe_next->qe_index;
- }
-
- qe = qe->qe_next;
- } while (qe);
-
- mutex_enter(&vq->vq_avail_lock);
- idx = vq->vq_avail_idx;
- vq->vq_avail_idx++;
-
- /* Make sure the bits hit the descriptor(s) */
- membar_producer();
- vq->vq_avail->ring[idx % vq->vq_num] = head->qe_index;
-
- /* Notify the device, if needed. */
- if (sync)
- virtio_sync_vq(vq);
-
- mutex_exit(&vq->vq_avail_lock);
-}
-
-/*
- * Get a chain of descriptors from the used ring, if one is available.
- */
-struct vq_entry *
-virtio_pull_chain(struct virtqueue *vq, uint32_t *len)
-{
- struct vq_entry *head;
- int slot;
- int usedidx;
-
- mutex_enter(&vq->vq_used_lock);
-
- /* No used entries? Bye. */
- if (vq->vq_used_idx == vq->vq_used->idx) {
- mutex_exit(&vq->vq_used_lock);
- return (NULL);
- }
-
- usedidx = vq->vq_used_idx;
- vq->vq_used_idx++;
- mutex_exit(&vq->vq_used_lock);
-
- usedidx %= vq->vq_num;
-
- /* Make sure we do the next step _after_ checking the idx. */
- membar_consumer();
-
- slot = vq->vq_used->ring[usedidx].id;
- *len = vq->vq_used->ring[usedidx].len;
-
- head = &vq->vq_entries[slot];
-
- return (head);
-}
-
-void
-virtio_free_chain(struct vq_entry *qe)
-{
- struct vq_entry *tmp;
- struct virtqueue *vq = qe->qe_queue;
-
- ASSERT(qe);
-
- do {
- ASSERT(qe->qe_queue == vq);
- tmp = qe->qe_next;
- vq_free_entry(vq, qe);
- qe = tmp;
- } while (tmp != NULL);
-}
-
-void
-virtio_ventry_stick(struct vq_entry *first, struct vq_entry *second)
-{
- first->qe_next = second;
-}
-
-static int
-virtio_register_msi(struct virtio_softc *sc,
- struct virtio_int_handler *config_handler,
- struct virtio_int_handler vq_handlers[], int intr_types)
-{
- int count, actual;
- int int_type;
- int i;
- int handler_count;
- int ret;
-
- /* If both MSI and MSI-x are reported, prefer MSI-x. */
- int_type = DDI_INTR_TYPE_MSI;
- if (intr_types & DDI_INTR_TYPE_MSIX)
- int_type = DDI_INTR_TYPE_MSIX;
-
- /* Walk the handler table to get the number of handlers. */
- for (handler_count = 0;
- vq_handlers && vq_handlers[handler_count].vh_func;
- handler_count++)
- ;
-
- /* +1 if there is a config change handler. */
- if (config_handler != NULL)
- handler_count++;
-
- /* Number of MSIs supported by the device. */
- ret = ddi_intr_get_nintrs(sc->sc_dev, int_type, &count);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_nintrs failed");
- return (ret);
- }
-
- /*
- * Those who try to register more handlers then the device
- * supports shall suffer.
- */
- ASSERT(handler_count <= count);
-
- sc->sc_intr_htable = kmem_zalloc(sizeof (ddi_intr_handle_t) *
- handler_count, KM_SLEEP);
-
- ret = ddi_intr_alloc(sc->sc_dev, sc->sc_intr_htable, int_type, 0,
- handler_count, &actual, DDI_INTR_ALLOC_NORMAL);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "Failed to allocate MSI: %d", ret);
- goto out_msi_alloc;
- }
-
- if (actual != handler_count) {
- dev_err(sc->sc_dev, CE_WARN,
- "Not enough MSI available: need %d, available %d",
- handler_count, actual);
- goto out_msi_available;
- }
-
- sc->sc_intr_num = handler_count;
- sc->sc_intr_config = B_FALSE;
- if (config_handler != NULL) {
- sc->sc_intr_config = B_TRUE;
- }
-
- /* Assume they are all same priority */
- ret = ddi_intr_get_pri(sc->sc_intr_htable[0], &sc->sc_intr_prio);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_pri failed");
- goto out_msi_prio;
- }
-
- /* Add the vq handlers */
- for (i = 0; vq_handlers[i].vh_func; i++) {
- ret = ddi_intr_add_handler(sc->sc_intr_htable[i],
- vq_handlers[i].vh_func, sc, vq_handlers[i].vh_priv);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "ddi_intr_add_handler failed");
- /* Remove the handlers that succeeded. */
- while (--i >= 0) {
- (void) ddi_intr_remove_handler(
- sc->sc_intr_htable[i]);
- }
- goto out_add_handlers;
- }
- }
-
- /* Don't forget the config handler */
- if (config_handler != NULL) {
- ret = ddi_intr_add_handler(sc->sc_intr_htable[i],
- config_handler->vh_func, sc, config_handler->vh_priv);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "ddi_intr_add_handler failed");
- /* Remove the handlers that succeeded. */
- while (--i >= 0) {
- (void) ddi_intr_remove_handler(
- sc->sc_intr_htable[i]);
- }
- goto out_add_handlers;
- }
- }
-
- ret = ddi_intr_get_cap(sc->sc_intr_htable[0], &sc->sc_intr_cap);
- if (ret == DDI_SUCCESS) {
- sc->sc_int_type = int_type;
- return (DDI_SUCCESS);
- }
-
-out_add_handlers:
-out_msi_prio:
-out_msi_available:
- for (i = 0; i < actual; i++)
- (void) ddi_intr_free(sc->sc_intr_htable[i]);
-out_msi_alloc:
- kmem_free(sc->sc_intr_htable,
- sizeof (ddi_intr_handle_t) * handler_count);
-
- return (ret);
-}
-
-struct virtio_handler_container {
- int nhandlers;
- struct virtio_int_handler config_handler;
- struct virtio_int_handler vq_handlers[];
-};
-
-uint_t
-virtio_intx_dispatch(caddr_t arg1, caddr_t arg2)
-{
- struct virtio_softc *sc = (void *)arg1;
- struct virtio_handler_container *vhc = (void *)arg2;
- uint8_t isr_status;
- int i;
-
- isr_status = ddi_get8(sc->sc_ioh, (uint8_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_ISR_STATUS));
-
- if (!isr_status)
- return (DDI_INTR_UNCLAIMED);
-
- if ((isr_status & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
- vhc->config_handler.vh_func) {
- vhc->config_handler.vh_func((void *)sc,
- vhc->config_handler.vh_priv);
- }
-
- /* Notify all handlers */
- for (i = 0; i < vhc->nhandlers; i++) {
- vhc->vq_handlers[i].vh_func((void *)sc,
- vhc->vq_handlers[i].vh_priv);
- }
-
- return (DDI_INTR_CLAIMED);
-}
-
-/*
- * config_handler and vq_handlers may be allocated on stack.
- * Take precautions not to loose them.
- */
-static int
-virtio_register_intx(struct virtio_softc *sc,
- struct virtio_int_handler *config_handler,
- struct virtio_int_handler vq_handlers[])
-{
- int vq_handler_count;
- int actual;
- struct virtio_handler_container *vhc;
- size_t vhc_sz;
- int ret = DDI_FAILURE;
-
- /* Walk the handler table to get the number of handlers. */
- for (vq_handler_count = 0;
- vq_handlers && vq_handlers[vq_handler_count].vh_func;
- vq_handler_count++)
- ;
-
- vhc_sz = sizeof (struct virtio_handler_container) +
- sizeof (struct virtio_int_handler) * vq_handler_count;
- vhc = kmem_zalloc(vhc_sz, KM_SLEEP);
-
- vhc->nhandlers = vq_handler_count;
- (void) memcpy(vhc->vq_handlers, vq_handlers,
- sizeof (struct virtio_int_handler) * vq_handler_count);
-
- if (config_handler != NULL) {
- (void) memcpy(&vhc->config_handler, config_handler,
- sizeof (struct virtio_int_handler));
- }
-
- /* Just a single entry for a single interrupt. */
- sc->sc_intr_htable = kmem_zalloc(sizeof (ddi_intr_handle_t), KM_SLEEP);
-
- ret = ddi_intr_alloc(sc->sc_dev, sc->sc_intr_htable,
- DDI_INTR_TYPE_FIXED, 0, 1, &actual, DDI_INTR_ALLOC_NORMAL);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to allocate a fixed interrupt: %d", ret);
- goto out_int_alloc;
- }
-
- ASSERT(actual == 1);
- sc->sc_intr_num = 1;
-
- ret = ddi_intr_get_pri(sc->sc_intr_htable[0], &sc->sc_intr_prio);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "ddi_intr_get_pri failed");
- goto out_prio;
- }
-
- ret = ddi_intr_add_handler(sc->sc_intr_htable[0],
- virtio_intx_dispatch, sc, vhc);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "ddi_intr_add_handler failed");
- goto out_add_handlers;
- }
-
- sc->sc_int_type = DDI_INTR_TYPE_FIXED;
-
- return (DDI_SUCCESS);
-
-out_add_handlers:
-out_prio:
- (void) ddi_intr_free(sc->sc_intr_htable[0]);
-out_int_alloc:
- kmem_free(sc->sc_intr_htable, sizeof (ddi_intr_handle_t));
- kmem_free(vhc, vhc_sz);
- return (ret);
-}
-
-/*
- * We find out if we support MSI during this, and the register layout
- * depends on the MSI (doh). Don't acces the device specific bits in
- * BAR 0 before calling it!
- */
-int
-virtio_register_ints(struct virtio_softc *sc,
- struct virtio_int_handler *config_handler,
- struct virtio_int_handler vq_handlers[])
-{
- int ret;
- int intr_types;
-
- /* Default offset until MSI-X is enabled, if ever. */
- sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSIX;
-
- /* Determine which types of interrupts are supported */
- ret = ddi_intr_get_supported_types(sc->sc_dev, &intr_types);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN, "Can't get supported int types");
- goto out_inttype;
- }
-
- /* If we have msi, let's use them. */
- if (intr_types & (DDI_INTR_TYPE_MSIX | DDI_INTR_TYPE_MSI)) {
- ret = virtio_register_msi(sc, config_handler,
- vq_handlers, intr_types);
- if (!ret)
- return (0);
- }
-
- /* Fall back to old-fashioned interrupts. */
- if (intr_types & DDI_INTR_TYPE_FIXED) {
- dev_debug(sc->sc_dev, CE_WARN,
- "Using legacy interrupts");
-
- return (virtio_register_intx(sc, config_handler, vq_handlers));
- }
-
- dev_err(sc->sc_dev, CE_WARN,
- "MSI failed and fixed interrupts not supported. Giving up.");
- ret = DDI_FAILURE;
-
-out_inttype:
- return (ret);
-}
-
-static int
-virtio_enable_msi(struct virtio_softc *sc)
-{
- int ret, i;
- int vq_handler_count = sc->sc_intr_num;
-
- /* Number of handlers, not counting the counfig. */
- if (sc->sc_intr_config)
- vq_handler_count--;
-
- /* Enable the interrupts. Either the whole block, or one by one. */
- if (sc->sc_intr_cap & DDI_INTR_FLAG_BLOCK) {
- ret = ddi_intr_block_enable(sc->sc_intr_htable,
- sc->sc_intr_num);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to enable MSI, falling back to INTx");
- goto out_enable;
- }
- } else {
- for (i = 0; i < sc->sc_intr_num; i++) {
- ret = ddi_intr_enable(sc->sc_intr_htable[i]);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to enable MSI %d, "
- "falling back to INTx", i);
-
- while (--i >= 0) {
- (void) ddi_intr_disable(
- sc->sc_intr_htable[i]);
- }
- goto out_enable;
- }
- }
- }
-
- /* Bind the allocated MSI to the queues and config */
- for (i = 0; i < vq_handler_count; i++) {
- int check;
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_SELECT), i);
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_VECTOR), i);
-
- check = ddi_get16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_VECTOR));
- if (check != i) {
- dev_err(sc->sc_dev, CE_WARN, "Failed to bind handler "
- "for VQ %d, MSI %d. Check = %x", i, i, check);
- ret = ENODEV;
- goto out_bind;
- }
- }
-
- if (sc->sc_intr_config) {
- int check;
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_CONFIG_VECTOR), i);
-
- check = ddi_get16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_CONFIG_VECTOR));
- if (check != i) {
- dev_err(sc->sc_dev, CE_WARN, "Failed to bind handler "
- "for Config updates, MSI %d", i);
- ret = ENODEV;
- goto out_bind;
- }
- }
-
- /* Configuration offset depends on whether MSI-X is used. */
- if (sc->sc_int_type == DDI_INTR_TYPE_MSIX)
- sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSIX;
- else
- ASSERT(sc->sc_int_type == DDI_INTR_TYPE_MSI);
-
- return (DDI_SUCCESS);
-
-out_bind:
- /* Unbind the vqs */
- for (i = 0; i < vq_handler_count - 1; i++) {
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_SELECT), i);
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_VECTOR),
- VIRTIO_MSI_NO_VECTOR);
- }
- /* And the config */
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- ddi_put16(sc->sc_ioh, (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_CONFIG_VECTOR), VIRTIO_MSI_NO_VECTOR);
-
- /* Disable the interrupts. Either the whole block, or one by one. */
- if (sc->sc_intr_cap & DDI_INTR_FLAG_BLOCK) {
- ret = ddi_intr_block_disable(sc->sc_intr_htable,
- sc->sc_intr_num);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to disable MSIs, won't be able to "
- "reuse next time");
- }
- } else {
- for (i = 0; i < sc->sc_intr_num; i++) {
- ret = ddi_intr_disable(sc->sc_intr_htable[i]);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to disable interrupt %d, "
- "won't be able to reuse", i);
- }
- }
- }
-
- ret = DDI_FAILURE;
-
-out_enable:
- return (ret);
-}
-
-static int
-virtio_enable_intx(struct virtio_softc *sc)
-{
- int ret;
-
- ret = ddi_intr_enable(sc->sc_intr_htable[0]);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to enable interrupt: %d", ret);
- }
-
- return (ret);
-}
-
-/*
- * We can't enable/disable individual handlers in the INTx case so do
- * the whole bunch even in the msi case.
- */
-int
-virtio_enable_ints(struct virtio_softc *sc)
-{
-
- ASSERT(sc->sc_config_offset == VIRTIO_CONFIG_DEVICE_CONFIG_NOMSIX);
-
- /* See if we are using MSI. */
- if (sc->sc_int_type == DDI_INTR_TYPE_MSIX ||
- sc->sc_int_type == DDI_INTR_TYPE_MSI)
- return (virtio_enable_msi(sc));
-
- ASSERT(sc->sc_int_type == DDI_INTR_TYPE_FIXED);
- return (virtio_enable_intx(sc));
-}
-
-void
-virtio_release_ints(struct virtio_softc *sc)
-{
- int i;
- int ret;
-
- /* We were running with MSI, unbind them. */
- if (sc->sc_int_type == DDI_INTR_TYPE_MSIX ||
- sc->sc_int_type == DDI_INTR_TYPE_MSI) {
- /* Unbind all vqs */
- for (i = 0; i < sc->sc_nvqs; i++) {
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_SELECT), i);
-
- ddi_put16(sc->sc_ioh,
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_QUEUE_VECTOR),
- VIRTIO_MSI_NO_VECTOR);
- }
- /* And the config */
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- ddi_put16(sc->sc_ioh, (uint16_t *)(sc->sc_io_addr +
- VIRTIO_CONFIG_CONFIG_VECTOR),
- VIRTIO_MSI_NO_VECTOR);
-
- }
-
- /* Disable the interrupts. Either the whole block, or one by one. */
- if (sc->sc_intr_cap & DDI_INTR_FLAG_BLOCK) {
- ret = ddi_intr_block_disable(sc->sc_intr_htable,
- sc->sc_intr_num);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to disable MSIs, won't be able to "
- "reuse next time");
- }
- } else {
- for (i = 0; i < sc->sc_intr_num; i++) {
- ret = ddi_intr_disable(sc->sc_intr_htable[i]);
- if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Failed to disable interrupt %d, "
- "won't be able to reuse", i);
- }
- }
- }
-
-
- for (i = 0; i < sc->sc_intr_num; i++) {
- (void) ddi_intr_remove_handler(sc->sc_intr_htable[i]);
- }
-
- for (i = 0; i < sc->sc_intr_num; i++)
- (void) ddi_intr_free(sc->sc_intr_htable[i]);
-
- kmem_free(sc->sc_intr_htable, sizeof (ddi_intr_handle_t) *
- sc->sc_intr_num);
-
- /* After disabling interrupts, the config offset is non-MSI-X. */
- sc->sc_config_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSIX;
-}
-
-/*
- * Module linkage information for the kernel.
- */
-static struct modlmisc modlmisc = {
- &mod_miscops, /* Type of module */
- "VirtIO common library module",
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1,
- {
- (void *)&modlmisc,
- NULL
- }
-};
-
-int
-_init(void)
-{
- return (mod_install(&modlinkage));
-}
-
-int
-_fini(void)
-{
- return (mod_remove(&modlinkage));
-}
-
-int
-_info(struct modinfo *modinfop)
-{
- return (mod_info(&modlinkage, modinfop));
-}
diff --git a/usr/src/uts/common/io/virtio/virtio.h b/usr/src/uts/common/io/virtio/virtio.h
new file mode 100644
index 0000000000..420f9ccfed
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtio.h
@@ -0,0 +1,342 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _VIRTIO_H
+#define _VIRTIO_H
+
+/*
+ * VIRTIO FRAMEWORK
+ *
+ * This framework handles the initialisation and operation common to all Virtio
+ * device types; e.g., Virtio Block (vioblk), Virtio Network (vioif), etc. The
+ * framework presently provides for what is now described as a "legacy" driver
+ * in the current issue of the "Virtual I/O Device (VIRTIO) Version 1.1"
+ * specification. Though several new specifications have been released, legacy
+ * devices are still the most widely available on current hypervisor platforms.
+ * Legacy devices make use of the native byte order of the host system.
+ *
+ * FRAMEWORK INITIALISATION: STARTING
+ *
+ * Client drivers will, in their attach(9E) routine, make an early call to
+ * virtio_init(). This causes the framework to allocate some base resources
+ * and begin initialising the device. This routine confirms that the device
+ * will operate in the supported legacy mode as per the specification. A
+ * failure here means that we cannot presently support this device.
+ *
+ * Once virtio_init() returns, the initialisation phase has begun and the
+ * driver can examine negotiated features and set up virtqueues. The
+ * initialisation phase ends when the driver calls either
+ * virtio_init_complete() or virtio_fini().
+ *
+ * FRAMEWORK INITIALISATION: FEATURE NEGOTIATION
+ *
+ * The virtio_init() call accepts a bitmask of desired features that the driver
+ * supports. The framework will negotiate the common set of features supported
+ * by both the driver and the device. The presence of any individual feature
+ * can be tested after the initialisation phase has begun using
+ * virtio_feature_present().
+ *
+ * The framework will additionally negotiate some set of features that are not
+ * specific to a device type on behalf of the client driver; e.g., support for
+ * indirect descriptors.
+ *
+ * Some features allow the driver to read additional configuration values from
+ * the device-specific regions of the device register space. These can be
+ * accessed via the virtio_dev_get*() and virtio_dev_put*() family of
+ * functions.
+ *
+ * FRAMEWORK INITIALISATION: VIRTQUEUE CONFIGURATION
+ *
+ * During the initialisation phase, the client driver may configure some number
+ * of virtqueues with virtio_queue_alloc(). Once initialisation has been
+ * completed, no further queues can be configured without destroying the
+ * framework object and beginning again from scratch.
+ *
+ * When configuring a queue, the driver must know the queue index number. This
+ * generally comes from the section of the specification describing the
+ * specific device type; e.g., Virtio Network devices have a receive queue at
+ * index 0, and a transmit queue at index 1. The name given to the queue is
+ * informational and has no impact on device operation.
+ *
+ * Most queues will require an interrupt handler function. When a queue
+ * notification interrupt is received, the provided handler will be called with
+ * two arguments: first, the provided user data argument; and second, a pointer
+ * to the "virtio_t" object for this instance.
+ *
+ * A maximum segment count must be selected for each queue. This count is the
+ * upper bound on the number of scatter-gather cookies that will be accepted,
+ * and applies to both direct and indirect descriptor based queues. This cap
+ * is usually either negotiated with the device, or determined structurally
+ * based on the shape of the buffers required for device operation.
+ *
+ * FRAMEWORK INITIALISATION: FINISHING
+ *
+ * Once queue configuration has been completed, the client driver calls
+ * virtio_init_complete() to finalise resource allocation and set the device to
+ * the running state (DRIVER_OK). The framework will allocate any interrupts
+ * needed for queue notifications at this time.
+ *
+ * If the client driver cannot complete initialisation, the instance may
+ * instead be torn down with virtio_fini(). Signalling failure to this routine
+ * will report failure to the device instead of resetting it, which may be
+ * reported by the hypervisor as a fault.
+ *
+ * DESCRIPTOR CHAINS
+ *
+ * Most devices accept I/O requests from the driver through a least one queue.
+ * Some devices are operated by submission of synchronous requests. The device
+ * is expected to process the request and return some kind of status; e.g., a
+ * block device accepts write requests from the file system and signals when
+ * they have completed or failed.
+ *
+ * Other devices operate by asynchronous delivery of I/O requests to the
+ * driver; e.g., a network device may receive incoming frames at any time.
+ * Inbound asynchronous delivery is usually achieved by populating a queue with
+ * a series of memory buffers where the incoming data will be written by the
+ * device at some later time.
+ *
+ * Whether for inbound or outbound transfers, buffers are inserted into the
+ * ring through chains of one or more descriptors. Each descriptor has a
+ * transfer direction (to or from the device), and a physical address and
+ * length (i.e., a DMA cookie). The framework automatically manages the slight
+ * differences in operation between direct and indirect descriptor usage on
+ * behalf of the client driver.
+ *
+ * A chain of descriptors is allocated by calling virtio_chain_alloc() against
+ * a particular queue. This function accepts a kmem flag as per
+ * kmem_alloc(9F). A client driver specific void pointer may be attached to
+ * the chain with virtio_chain_data_set() and read back later with
+ * virtio_chain_data(); e.g., after it is returned by a call to
+ * virtio_queue_poll().
+ *
+ * Cookies are added to a chain by calling virtio_chain_append() with the
+ * appropriate physical address and transfer direction. This function may fail
+ * if the chain is already using the maximum number of cookies for this queue.
+ * Client drivers are responsible for appropriate use of virtio_dma_sync()
+ * or ddi_dma_sync(9F) on any memory appended to a descriptor chain prior to
+ * chain submission.
+ *
+ * Once fully constructed and synced, a chain can be submitted to the device by
+ * calling virtio_chain_submit(). The caller may choose to flush the queue
+ * contents to the device on each submission, or to batch notifications until
+ * later to amortise the notification cost over more requests. If batching
+ * notifications, outstanding submissions can be flushed with a call to
+ * virtio_queue_flush(). Note that the framework will insert an appropriate
+ * memory barrier to ensure writes by the driver complete before making the
+ * submitted descriptor visible to the device.
+ *
+ * A chain may be reset for reuse with new cookies by calling
+ * virtio_chain_clear(). The chain may be freed completely by calling
+ * virtio_chain_free().
+ *
+ * When a descriptor chain is returned to the driver by the device, it may
+ * include a received data length value. This value can be accessed via
+ * virtio_chain_received_length(). There is some suggestion in more recent
+ * Virtio specifications that, depending on the device type and the hypervisor
+ * this value may not always be accurate or useful.
+ *
+ * VIRTQUEUE OPERATION
+ *
+ * The queue size (i.e., the number of direct descriptor entries) can be
+ * found with virtio_queue_size(). This value is static over the lifetime
+ * of the queue.
+ *
+ * The number of descriptor chains presently submitted to the device and not
+ * yet returned can be obtained via virtio_queue_nactive().
+ *
+ * Over time the device will return descriptor chains to the driver in response
+ * to device activity. Any newly returned chains may be retrieved by the
+ * driver by calling virtio_queue_poll(). See the DESCRIPTOR CHAINS section
+ * for more detail about managing descriptor chain objects. Note that the
+ * framework will insert an appropriate memory barrier to ensure that writes by
+ * the host are complete before returning the chain to the client driver.
+ *
+ * The NO_INTERRUPT flag on a queue may be set or cleared with
+ * virtio_queue_no_interrupt(). Note that this flag is purely advisory, and
+ * may not actually stop interrupts from the device in a timely fashion.
+ *
+ * INTERRUPT MANAGEMENT
+ *
+ * A mutex used within an interrupt handler must be initialised with the
+ * correct interrupt priority. After the initialisation phase is complete, the
+ * client should use virtio_intr_pri() to get a value suitable to pass to
+ * mutex_init(9F).
+ *
+ * When the driver is ready to receive notifications from the device, the
+ * virtio_interrupts_enable() routine may be called. Interrupts may be
+ * disabled again by calling virtio_interrupts_disable(). Interrupt resources
+ * will be deallocated as part of a subsequent call to virtio_fini().
+ *
+ * DMA MEMORY MANAGEMENT: ALLOCATION AND FREE
+ *
+ * Client drivers may allocate memory suitable for communication with the
+ * device by using virtio_dma_alloc(). This function accepts an allocation
+ * size, a DMA attribute template, a set of DMA flags, and a kmem flag.
+ * A "virtio_dma_t" object is returned to track and manage the allocation.
+ *
+ * The DMA flags value will be a combination of direction flags (e.g.,
+ * DDI_DMA_READ or DDI_DMA_WRITE) and mapping flags (e.g., DDI_DMA_CONSISTENT
+ * or DDI_DMA_STREAMING). The kmem flag is either KM_SLEEP or KM_NOSLEEP,
+ * as described in kmem_alloc(9F).
+ *
+ * Memory that is no longer required can be freed using virtio_dma_free().
+ *
+ * DMA MEMORY MANAGEMENT: BINDING WITHOUT ALLOCATION
+ *
+ * If another subsystem has loaned memory to your client driver, you may need
+ * to allocate and bind a handle without additional backing memory. The
+ * virtio_dma_alloc_nomem() function can be used for this purpose, returning a
+ * "virtio_dma_t" object.
+ *
+ * Once allocated, an arbitrary kernel memory location can be bound for DMA
+ * with virtio_dma_bind(). The binding can be subsequently undone with
+ * virtio_dma_unbind(), allowing the "virtio_dma_t" object to be reused for
+ * another binding.
+ *
+ * DMA MEMORY MANAGEMENT: VIRTUAL AND PHYSICAL ADDRESSES
+ *
+ * The total size of a mapping (with or without own backing memory) can be
+ * found with virtio_dma_size(). A void pointer to a kernel virtual address
+ * within the buffer can be obtained via virtio_dma_va(); this function accepts
+ * a linear offset into the VA range and performs bounds checking.
+ *
+ * The number of physical memory addresses (DMA cookies) can be found with
+ * virtio_dma_ncookies(). The physical address and length of each cookie can
+ * be found with virtio_dma_cookie_pa() and virtio_dma_cookie_size(); these
+ * functions are keyed on the zero-indexed cookie number.
+ *
+ * DMA MEMORY MANAGEMENT: SYNCHRONISATION
+ *
+ * When passing memory to the device, or reading memory returned from the
+ * device, DMA synchronisation must be performed in case it is required by the
+ * underlying platform. A convenience wrapper exists: virtio_dma_sync(). This
+ * routine synchronises the entire binding and accepts the same synchronisation
+ * type values as ddi_dma_sync(9F).
+ *
+ * QUIESCE
+ *
+ * As quiesce(9E) merely requires that the device come to a complete stop, most
+ * client drivers will be able to call virtio_quiesce() without additional
+ * actions. This will reset the device, immediately halting all queue
+ * activity, and return a value suitable for returning from the client driver
+ * quiesce(9E) entrypoint. This routine must only be called from quiesce
+ * context as it performs no synchronisation with other threads.
+ *
+ * DETACH
+ *
+ * Some devices are effectively long-polled; that is, they submit some number
+ * of descriptor chains to the device that are not returned to the driver until
+ * some asynchronous event occurs such as the receipt of an incoming packet or
+ * a device hot plug event. When detaching the device the return of these
+ * outstanding buffers must be arranged. Some device types may have task
+ * management commands that can force the orderly return of these chains, but
+ * the only way to do so uniformly is to reset the device and claw back the
+ * memory.
+ *
+ * If the client driver has outstanding descriptors and needs a hard stop on
+ * device activity it can call virtio_shutdown(). This routine will bring
+ * queue processing to an orderly stop and then reset the device, causing it to
+ * cease use of any DMA resources. Once this function returns, the driver may
+ * call virtio_queue_evacuate() on each queue to retrieve any previously
+ * submitted chains.
+ *
+ * To tear down resources (e.g., interrupts and allocated memory) the client
+ * driver must finally call virtio_fini(). If virtio_shutdown() was not
+ * needed, this routine will also reset the device.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct virtio virtio_t;
+typedef struct virtio_queue virtio_queue_t;
+typedef struct virtio_chain virtio_chain_t;
+typedef struct virtio_dma virtio_dma_t;
+
+typedef enum virtio_direction {
+ /*
+ * In the base specification, a descriptor is either set up to be
+ * written by the device or to be read by the device, but not both.
+ */
+ VIRTIO_DIR_DEVICE_WRITES = 1,
+ VIRTIO_DIR_DEVICE_READS
+} virtio_direction_t;
+
+void virtio_fini(virtio_t *, boolean_t);
+virtio_t *virtio_init(dev_info_t *, uint64_t, boolean_t);
+int virtio_init_complete(virtio_t *, int);
+int virtio_quiesce(virtio_t *);
+void virtio_shutdown(virtio_t *);
+
+void *virtio_intr_pri(virtio_t *);
+
+void virtio_device_reset(virtio_t *);
+
+uint8_t virtio_dev_get8(virtio_t *, uintptr_t);
+uint16_t virtio_dev_get16(virtio_t *, uintptr_t);
+uint32_t virtio_dev_get32(virtio_t *, uintptr_t);
+uint64_t virtio_dev_get64(virtio_t *, uintptr_t);
+
+void virtio_dev_put8(virtio_t *, uintptr_t, uint8_t);
+void virtio_dev_put16(virtio_t *, uintptr_t, uint16_t);
+void virtio_dev_put32(virtio_t *, uintptr_t, uint32_t);
+
+boolean_t virtio_feature_present(virtio_t *, uint64_t);
+
+virtio_queue_t *virtio_queue_alloc(virtio_t *, uint16_t, const char *,
+ ddi_intr_handler_t *, void *, boolean_t, uint_t);
+
+virtio_chain_t *virtio_queue_poll(virtio_queue_t *);
+virtio_chain_t *virtio_queue_evacuate(virtio_queue_t *);
+void virtio_queue_flush(virtio_queue_t *);
+void virtio_queue_no_interrupt(virtio_queue_t *, boolean_t);
+uint_t virtio_queue_nactive(virtio_queue_t *);
+uint_t virtio_queue_size(virtio_queue_t *);
+
+virtio_chain_t *virtio_chain_alloc(virtio_queue_t *, int);
+void virtio_chain_clear(virtio_chain_t *);
+void virtio_chain_free(virtio_chain_t *);
+int virtio_chain_append(virtio_chain_t *, uint64_t, size_t, virtio_direction_t);
+
+void *virtio_chain_data(virtio_chain_t *);
+void virtio_chain_data_set(virtio_chain_t *, void *);
+
+void virtio_chain_submit(virtio_chain_t *, boolean_t);
+size_t virtio_chain_received_length(virtio_chain_t *);
+
+int virtio_interrupts_enable(virtio_t *);
+void virtio_interrupts_disable(virtio_t *);
+
+virtio_dma_t *virtio_dma_alloc(virtio_t *, size_t, const ddi_dma_attr_t *, int,
+ int);
+virtio_dma_t *virtio_dma_alloc_nomem(virtio_t *, const ddi_dma_attr_t *, int);
+void virtio_dma_free(virtio_dma_t *);
+int virtio_dma_bind(virtio_dma_t *, void *, size_t, int, int);
+void virtio_dma_unbind(virtio_dma_t *);
+void virtio_dma_sync(virtio_dma_t *, int);
+
+void *virtio_dma_va(virtio_dma_t *, size_t);
+size_t virtio_dma_size(virtio_dma_t *);
+uint_t virtio_dma_ncookies(virtio_dma_t *);
+uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t);
+size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIRTIO_H */
diff --git a/usr/src/uts/common/io/virtio/virtio_dma.c b/usr/src/uts/common/io/virtio/virtio_dma.c
new file mode 100644
index 0000000000..81972b5402
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtio_dma.c
@@ -0,0 +1,295 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * VIRTIO FRAMEWORK: DMA ROUTINES
+ *
+ * For design and usage documentation, see the comments in "virtio.h".
+ */
+
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/autoconf.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/avintr.h>
+#include <sys/spl.h>
+#include <sys/promif.h>
+#include <sys/list.h>
+#include <sys/bootconf.h>
+#include <sys/bootsvcs.h>
+#include <sys/sysmacros.h>
+#include <sys/pci.h>
+
+#include "virtio.h"
+#include "virtio_impl.h"
+
+
+
+void
+virtio_dma_sync(virtio_dma_t *vidma, int flag)
+{
+ VERIFY0(ddi_dma_sync(vidma->vidma_dma_handle, 0, 0, flag));
+}
+
+uint_t
+virtio_dma_ncookies(virtio_dma_t *vidma)
+{
+ return (vidma->vidma_dma_ncookies);
+}
+
+size_t
+virtio_dma_size(virtio_dma_t *vidma)
+{
+ return (vidma->vidma_size);
+}
+
+void *
+virtio_dma_va(virtio_dma_t *vidma, size_t offset)
+{
+ VERIFY3U(offset, <, vidma->vidma_size);
+
+ return (vidma->vidma_va + offset);
+}
+
+uint64_t
+virtio_dma_cookie_pa(virtio_dma_t *vidma, uint_t cookie)
+{
+ VERIFY3U(cookie, <, vidma->vidma_dma_ncookies);
+
+ return (vidma->vidma_dma_cookies[cookie].dmac_laddress);
+}
+
+size_t
+virtio_dma_cookie_size(virtio_dma_t *vidma, uint_t cookie)
+{
+ VERIFY3U(cookie, <, vidma->vidma_dma_ncookies);
+
+ return (vidma->vidma_dma_cookies[cookie].dmac_size);
+}
+
+int
+virtio_dma_init_handle(virtio_t *vio, virtio_dma_t *vidma,
+ const ddi_dma_attr_t *attr, int kmflags)
+{
+ int r;
+ dev_info_t *dip = vio->vio_dip;
+
+ VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
+ int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
+ DDI_DMA_DONTWAIT;
+
+ vidma->vidma_virtio = vio;
+
+ /*
+ * Ensure we don't try to allocate a second time using the same
+ * tracking object.
+ */
+ VERIFY0(vidma->vidma_level);
+
+ if ((r = ddi_dma_alloc_handle(dip, (ddi_dma_attr_t *)attr, dma_wait,
+ NULL, &vidma->vidma_dma_handle)) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "DMA handle allocation failed (%x)", r);
+ goto fail;
+ }
+ vidma->vidma_level |= VIRTIO_DMALEVEL_HANDLE_ALLOC;
+
+ return (DDI_SUCCESS);
+
+fail:
+ virtio_dma_fini(vidma);
+ return (DDI_FAILURE);
+}
+
+int
+virtio_dma_init(virtio_t *vio, virtio_dma_t *vidma, size_t sz,
+ const ddi_dma_attr_t *attr, int dmaflags, int kmflags)
+{
+ int r;
+ dev_info_t *dip = vio->vio_dip;
+ caddr_t va = NULL;
+
+ VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
+ int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
+ DDI_DMA_DONTWAIT;
+
+ if (virtio_dma_init_handle(vio, vidma, attr, kmflags) !=
+ DDI_SUCCESS) {
+ goto fail;
+ }
+
+ if ((r = ddi_dma_mem_alloc(vidma->vidma_dma_handle, sz,
+ &virtio_acc_attr,
+ dmaflags & (DDI_DMA_STREAMING | DDI_DMA_CONSISTENT),
+ dma_wait, NULL, &va, &vidma->vidma_real_size,
+ &vidma->vidma_acc_handle)) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "DMA memory allocation failed (%x)", r);
+ goto fail;
+ }
+ vidma->vidma_level |= VIRTIO_DMALEVEL_MEMORY_ALLOC;
+
+ /*
+ * Zero the memory to avoid accidental exposure of arbitrary kernel
+ * memory.
+ */
+ bzero(va, vidma->vidma_real_size);
+
+ if (virtio_dma_bind(vidma, va, sz, dmaflags, kmflags) != DDI_SUCCESS) {
+ goto fail;
+ }
+
+ return (DDI_SUCCESS);
+
+fail:
+ virtio_dma_fini(vidma);
+ return (DDI_FAILURE);
+}
+
+int
+virtio_dma_bind(virtio_dma_t *vidma, void *va, size_t sz, int dmaflags,
+ int kmflags)
+{
+ int r;
+ dev_info_t *dip = vidma->vidma_virtio->vio_dip;
+ ddi_dma_cookie_t dmac;
+
+ VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP);
+ int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP :
+ DDI_DMA_DONTWAIT;
+
+ VERIFY(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_ALLOC);
+ VERIFY(!(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_BOUND));
+
+ vidma->vidma_va = va;
+ vidma->vidma_size = sz;
+
+ if ((r = ddi_dma_addr_bind_handle(vidma->vidma_dma_handle, NULL,
+ vidma->vidma_va, vidma->vidma_size, dmaflags, dma_wait, NULL,
+ &dmac, &vidma->vidma_dma_ncookies)) != DDI_DMA_MAPPED) {
+ VERIFY3S(r, !=, DDI_DMA_PARTIAL_MAP);
+ dev_err(dip, CE_WARN, "DMA handle bind failed (%x)", r);
+ goto fail;
+ }
+ vidma->vidma_level |= VIRTIO_DMALEVEL_HANDLE_BOUND;
+
+ if ((vidma->vidma_dma_cookies = kmem_alloc(
+ vidma->vidma_dma_ncookies * sizeof (ddi_dma_cookie_t),
+ kmflags)) == NULL) {
+ dev_err(dip, CE_WARN, "DMA cookie array allocation failure");
+ goto fail;
+ }
+ vidma->vidma_level |= VIRTIO_DMALEVEL_COOKIE_ARRAY;
+
+ vidma->vidma_dma_cookies[0] = dmac;
+ for (uint_t n = 1; n < vidma->vidma_dma_ncookies; n++) {
+ ddi_dma_nextcookie(vidma->vidma_dma_handle,
+ &vidma->vidma_dma_cookies[n]);
+ }
+
+ return (DDI_SUCCESS);
+
+fail:
+ virtio_dma_unbind(vidma);
+ return (DDI_FAILURE);
+}
+
+virtio_dma_t *
+virtio_dma_alloc(virtio_t *vio, size_t sz, const ddi_dma_attr_t *attr,
+ int dmaflags, int kmflags)
+{
+ virtio_dma_t *vidma;
+
+ if ((vidma = kmem_zalloc(sizeof (*vidma), kmflags)) == NULL) {
+ return (NULL);
+ }
+
+ if (virtio_dma_init(vio, vidma, sz, attr, dmaflags, kmflags) !=
+ DDI_SUCCESS) {
+ kmem_free(vidma, sizeof (*vidma));
+ return (NULL);
+ }
+
+ return (vidma);
+}
+
+virtio_dma_t *
+virtio_dma_alloc_nomem(virtio_t *vio, const ddi_dma_attr_t *attr, int kmflags)
+{
+ virtio_dma_t *vidma;
+
+ if ((vidma = kmem_zalloc(sizeof (*vidma), kmflags)) == NULL) {
+ return (NULL);
+ }
+
+ if (virtio_dma_init_handle(vio, vidma, attr, kmflags) != DDI_SUCCESS) {
+ kmem_free(vidma, sizeof (*vidma));
+ return (NULL);
+ }
+
+ return (vidma);
+}
+
+void
+virtio_dma_fini(virtio_dma_t *vidma)
+{
+ virtio_dma_unbind(vidma);
+
+ if (vidma->vidma_level & VIRTIO_DMALEVEL_MEMORY_ALLOC) {
+ ddi_dma_mem_free(&vidma->vidma_acc_handle);
+
+ vidma->vidma_level &= ~VIRTIO_DMALEVEL_MEMORY_ALLOC;
+ }
+
+ if (vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_ALLOC) {
+ ddi_dma_free_handle(&vidma->vidma_dma_handle);
+
+ vidma->vidma_level &= ~VIRTIO_DMALEVEL_HANDLE_ALLOC;
+ }
+
+ VERIFY0(vidma->vidma_level);
+ bzero(vidma, sizeof (*vidma));
+}
+
+void
+virtio_dma_unbind(virtio_dma_t *vidma)
+{
+ if (vidma->vidma_level & VIRTIO_DMALEVEL_COOKIE_ARRAY) {
+ kmem_free(vidma->vidma_dma_cookies,
+ vidma->vidma_dma_ncookies * sizeof (ddi_dma_cookie_t));
+
+ vidma->vidma_level &= ~VIRTIO_DMALEVEL_COOKIE_ARRAY;
+ }
+
+ if (vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_BOUND) {
+ VERIFY3U(ddi_dma_unbind_handle(vidma->vidma_dma_handle), ==,
+ DDI_SUCCESS);
+
+ vidma->vidma_level &= ~VIRTIO_DMALEVEL_HANDLE_BOUND;
+ }
+
+ vidma->vidma_va = 0;
+ vidma->vidma_size = 0;
+}
+
+void
+virtio_dma_free(virtio_dma_t *vidma)
+{
+ virtio_dma_fini(vidma);
+ kmem_free(vidma, sizeof (*vidma));
+}
diff --git a/usr/src/uts/common/io/virtio/virtio_impl.h b/usr/src/uts/common/io/virtio/virtio_impl.h
new file mode 100644
index 0000000000..518667c7f4
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtio_impl.h
@@ -0,0 +1,368 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#ifndef _VIRTIO_IMPL_H
+#define _VIRTIO_IMPL_H
+
+/*
+ * VIRTIO FRAMEWORK: FRAMEWORK-PRIVATE DEFINITIONS
+ *
+ * For design and usage documentation, see the comments in "virtio.h".
+ *
+ * NOTE: Client drivers should not use definitions from this file.
+ */
+
+#include <sys/types.h>
+#include <sys/dditypes.h>
+#include <sys/list.h>
+#include <sys/ccompile.h>
+
+#include "virtio.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern ddi_device_acc_attr_t virtio_acc_attr;
+extern ddi_dma_attr_t virtio_dma_attr;
+
+typedef struct virtio_vq_desc virtio_vq_desc_t;
+typedef struct virtio_vq_driver virtio_vq_driver_t;
+typedef struct virtio_vq_device virtio_vq_device_t;
+typedef struct virtio_vq_elem virtio_vq_elem_t;
+
+int virtio_dma_init(virtio_t *, virtio_dma_t *, size_t, const ddi_dma_attr_t *,
+ int, int);
+void virtio_dma_fini(virtio_dma_t *);
+
+
+
+typedef enum virtio_dma_level {
+ VIRTIO_DMALEVEL_HANDLE_ALLOC = (1ULL << 0),
+ VIRTIO_DMALEVEL_MEMORY_ALLOC = (1ULL << 1),
+ VIRTIO_DMALEVEL_HANDLE_BOUND = (1ULL << 2),
+ VIRTIO_DMALEVEL_COOKIE_ARRAY = (1ULL << 3),
+} virtio_dma_level_t;
+
+struct virtio_dma {
+ virtio_dma_level_t vidma_level;
+ virtio_t *vidma_virtio;
+ caddr_t vidma_va;
+ size_t vidma_size;
+ size_t vidma_real_size;
+ ddi_dma_handle_t vidma_dma_handle;
+ ddi_acc_handle_t vidma_acc_handle;
+ uint_t vidma_dma_ncookies;
+ ddi_dma_cookie_t *vidma_dma_cookies;
+};
+
+typedef enum virtio_initlevel {
+ VIRTIO_INITLEVEL_REGS = (1ULL << 0),
+ VIRTIO_INITLEVEL_PROVIDER = (1ULL << 1),
+ VIRTIO_INITLEVEL_INT_ALLOC = (1ULL << 2),
+ VIRTIO_INITLEVEL_INT_ADDED = (1ULL << 3),
+ VIRTIO_INITLEVEL_INT_ENABLED = (1ULL << 4),
+ VIRTIO_INITLEVEL_SHUTDOWN = (1ULL << 5),
+} virtio_initlevel_t;
+
+struct virtio {
+ dev_info_t *vio_dip;
+
+ kmutex_t vio_mutex;
+
+ virtio_initlevel_t vio_initlevel;
+
+ list_t vio_queues;
+
+ ddi_acc_handle_t vio_barh;
+ caddr_t vio_bar;
+ uint_t vio_config_offset;
+
+ uint32_t vio_features;
+ uint32_t vio_features_device;
+
+ ddi_intr_handle_t *vio_interrupts;
+ int vio_ninterrupts;
+ int vio_interrupt_type;
+ int vio_interrupt_cap;
+ uint_t vio_interrupt_priority;
+};
+
+struct virtio_queue {
+ virtio_t *viq_virtio;
+ kmutex_t viq_mutex;
+ const char *viq_name;
+ list_node_t viq_link;
+
+ boolean_t viq_shutdown;
+ boolean_t viq_indirect;
+ uint_t viq_max_segs;
+
+ /*
+ * Each Virtio device type has some set of queues for data transfer to
+ * and from the host. This index is described in the specification for
+ * the particular device and queue type, and written to QUEUE_SELECT to
+ * allow interaction with the queue. For example, a network device has
+ * at least a receive queue with index 0, and a transmit queue with
+ * index 1.
+ */
+ uint16_t viq_index;
+
+ /*
+ * For legacy Virtio devices, the size and shape of the queue is
+ * determined entirely by the number of queue entries.
+ */
+ uint16_t viq_size;
+ id_space_t *viq_descmap;
+
+ /*
+ * The memory shared between the device and the driver is allocated as
+ * a large phyisically contiguous chunk. Access to this area is
+ * through three pointers to packed structures.
+ */
+ virtio_dma_t viq_dma;
+ virtio_vq_desc_t *viq_dma_descs;
+ virtio_vq_driver_t *viq_dma_driver;
+ virtio_vq_device_t *viq_dma_device;
+
+ uint16_t viq_device_index;
+ uint16_t viq_driver_index;
+
+ /*
+ * Interrupt handler function, or NULL if not provided.
+ */
+ ddi_intr_handler_t *viq_func;
+ void *viq_funcarg;
+ boolean_t viq_handler_added;
+ uint_t viq_handler_index;
+
+ /*
+ * When a chain is submitted to the queue, it is also stored in this
+ * AVL tree keyed by the index of the first descriptor in the chain.
+ */
+ avl_tree_t viq_inflight;
+};
+
+struct virtio_chain {
+ virtio_queue_t *vic_vq;
+ avl_node_t vic_node;
+
+ void *vic_data;
+
+ uint16_t vic_head;
+ uint32_t vic_received_length;
+
+ virtio_dma_t vic_indirect_dma;
+ uint_t vic_indirect_capacity;
+ uint_t vic_indirect_used;
+
+ uint_t vic_direct_capacity;
+ uint_t vic_direct_used;
+ uint16_t vic_direct[];
+};
+
+/*
+ * PACKED STRUCTS FOR DEVICE ACCESS
+ */
+
+struct virtio_vq_desc {
+ /*
+ * Buffer physical address and length.
+ */
+ uint64_t vqd_addr;
+ uint32_t vqd_len;
+
+ /*
+ * Flags. Use with the VIRTQ_DESC_F_* family of constants. See below.
+ */
+ uint16_t vqd_flags;
+
+ /*
+ * If VIRTQ_DESC_F_NEXT is set in flags, this refers to the next
+ * descriptor in the chain by table index.
+ */
+ uint16_t vqd_next;
+} __packed;
+
+/*
+ * VIRTIO DESCRIPTOR FLAGS (vqd_flags)
+ */
+
+/*
+ * NEXT:
+ * Signals that this descriptor (direct or indirect) is part of a chain.
+ * If populated, "vqd_next" names the next descriptor in the chain by its
+ * table index.
+ */
+#define VIRTQ_DESC_F_NEXT (1 << 0)
+
+/*
+ * WRITE:
+ * Determines whether this buffer is to be written by the device (WRITE is
+ * set) or by the driver (WRITE is not set).
+ */
+#define VIRTQ_DESC_F_WRITE (1 << 1)
+
+/*
+ * INDIRECT:
+ * This bit signals that a direct descriptor refers to an indirect
+ * descriptor list, rather than directly to a buffer. This bit may only
+ * be used in a direct descriptor; indirect descriptors are not allowed to
+ * refer to additional layers of indirect tables. If this bit is set,
+ * NEXT must be clear; indirect descriptors may not be chained.
+ */
+#define VIRTQ_DESC_F_INDIRECT (1 << 2)
+
+/*
+ * This structure is variously known as the "available" or "avail" ring, or the
+ * driver-owned portion of the queue structure. It is used by the driver to
+ * submit descriptor chains to the device.
+ */
+struct virtio_vq_driver {
+ uint16_t vqdr_flags;
+ uint16_t vqdr_index;
+ uint16_t vqdr_ring[];
+} __packed;
+
+#define VIRTQ_AVAIL_F_NO_INTERRUPT (1 << 0)
+
+/*
+ * We use the sizeof operator on this packed struct to calculate the offset of
+ * subsequent structs. Ensure the compiler is not adding any padding to the
+ * end of the struct.
+ */
+CTASSERT(sizeof (virtio_vq_driver_t) ==
+ offsetof(virtio_vq_driver_t, vqdr_ring));
+
+struct virtio_vq_elem {
+ /*
+ * The device returns chains of descriptors by specifying the table
+ * index of the first descriptor in the chain.
+ */
+ uint32_t vqe_start;
+ uint32_t vqe_len;
+} __packed;
+
+/*
+ * This structure is variously known as the "used" ring, or the device-owned
+ * portion of the queue structure. It is used by the device to return
+ * completed descriptor chains to the device.
+ */
+struct virtio_vq_device {
+ uint16_t vqde_flags;
+ uint16_t vqde_index;
+ virtio_vq_elem_t vqde_ring[];
+} __packed;
+
+#define VIRTQ_USED_F_NO_NOTIFY (1 << 0)
+
+/*
+ * BASIC CONFIGURATION
+ *
+ * Legacy devices expose both their generic and their device-specific
+ * configuration through PCI BAR0. This is the second entry in the register
+ * address space set for these devices.
+ */
+#define VIRTIO_LEGACY_PCI_BAR0 1
+
+/*
+ * These are offsets into the base configuration space available through the
+ * virtio_get*() and virtio_put*() family of functions. These offsets are for
+ * what the specification describes as the "legacy" mode of device operation.
+ */
+#define VIRTIO_LEGACY_FEATURES_DEVICE 0x00 /* 32 R */
+#define VIRTIO_LEGACY_FEATURES_DRIVER 0x04 /* 32 R/W */
+#define VIRTIO_LEGACY_QUEUE_ADDRESS 0x08 /* 32 R/W */
+#define VIRTIO_LEGACY_QUEUE_SIZE 0x0C /* 16 R */
+#define VIRTIO_LEGACY_QUEUE_SELECT 0x0E /* 16 R/W */
+#define VIRTIO_LEGACY_QUEUE_NOTIFY 0x10 /* 16 R/W */
+#define VIRTIO_LEGACY_DEVICE_STATUS 0x12 /* 8 R/W */
+#define VIRTIO_LEGACY_ISR_STATUS 0x13 /* 8 R */
+
+#define VIRTIO_LEGACY_MSIX_CONFIG 0x14 /* 16 R/W */
+#define VIRTIO_LEGACY_MSIX_QUEUE 0x16 /* 16 R/W */
+
+#define VIRTIO_LEGACY_CFG_OFFSET (VIRTIO_LEGACY_ISR_STATUS + 1)
+#define VIRTIO_LEGACY_CFG_OFFSET_MSIX (VIRTIO_LEGACY_MSIX_QUEUE + 2)
+
+#define VIRTIO_LEGACY_MSI_NO_VECTOR 0xFFFF
+
+/*
+ * Bits in the Device Status byte (VIRTIO_LEGACY_DEVICE_STATUS):
+ */
+#define VIRTIO_STATUS_RESET 0
+#define VIRTIO_STATUS_ACKNOWLEDGE (1 << 0)
+#define VIRTIO_STATUS_DRIVER (1 << 1)
+#define VIRTIO_STATUS_DRIVER_OK (1 << 2)
+#define VIRTIO_STATUS_FAILED (1 << 7)
+
+/*
+ * Bits in the Interrupt Service Routine Status byte
+ * (VIRTIO_LEGACY_ISR_STATUS):
+ */
+#define VIRTIO_ISR_CHECK_QUEUES (1 << 0)
+#define VIRTIO_ISR_CHECK_CONFIG (1 << 1)
+
+/*
+ * Bits in the Features fields (VIRTIO_LEGACY_FEATURES_DEVICE,
+ * VIRTIO_LEGACY_FEATURES_DRIVER):
+ */
+#define VIRTIO_F_RING_INDIRECT_DESC (1ULL << 28)
+
+/*
+ * For devices operating in the legacy mode, virtqueues must be aligned on a
+ * "page size" of 4096 bytes; this is also called the "Queue Align" value in
+ * newer versions of the specification.
+ */
+#define VIRTIO_PAGE_SHIFT 12
+#define VIRTIO_PAGE_SIZE (1 << VIRTIO_PAGE_SHIFT)
+CTASSERT(VIRTIO_PAGE_SIZE == 4096);
+CTASSERT(ISP2(VIRTIO_PAGE_SIZE));
+
+/*
+ * DMA SYNCHRONISATION WRAPPERS
+ */
+
+/*
+ * Synchronise the driver-owned portion of the queue so that the device can see
+ * our writes. This covers the memory accessed via the "viq_dma_descs" and
+ * "viq_dma_device" members.
+ */
+#define VIRTQ_DMA_SYNC_FORDEV(viq) VERIFY0(ddi_dma_sync( \
+ (viq)->viq_dma.vidma_dma_handle, \
+ 0, \
+ (uintptr_t)(viq)->viq_dma_device - \
+ (uintptr_t)(viq)->viq_dma_descs, \
+ DDI_DMA_SYNC_FORDEV))
+
+/*
+ * Synchronise the device-owned portion of the queue so that we can see any
+ * writes from the device. This covers the memory accessed via the
+ * "viq_dma_device" member.
+ */
+#define VIRTQ_DMA_SYNC_FORKERNEL(viq) VERIFY0(ddi_dma_sync( \
+ (viq)->viq_dma.vidma_dma_handle, \
+ (uintptr_t)(viq)->viq_dma_device - \
+ (uintptr_t)(viq)->viq_dma_descs, \
+ (viq)->viq_dma.vidma_size - \
+ (uintptr_t)(viq)->viq_dma_device - \
+ (uintptr_t)(viq)->viq_dma_descs, \
+ DDI_DMA_SYNC_FORKERNEL))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VIRTIO_IMPL_H */
diff --git a/usr/src/uts/common/io/virtio/virtio_main.c b/usr/src/uts/common/io/virtio/virtio_main.c
new file mode 100644
index 0000000000..be92dacfba
--- /dev/null
+++ b/usr/src/uts/common/io/virtio/virtio_main.c
@@ -0,0 +1,1730 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * VIRTIO FRAMEWORK
+ *
+ * For design and usage documentation, see the comments in "virtio.h".
+ */
+
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/autoconf.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/avintr.h>
+#include <sys/spl.h>
+#include <sys/promif.h>
+#include <sys/list.h>
+#include <sys/bootconf.h>
+#include <sys/bootsvcs.h>
+#include <sys/sysmacros.h>
+#include <sys/pci.h>
+
+#include "virtio.h"
+#include "virtio_impl.h"
+
+
+/*
+ * Linkage structures
+ */
+static struct modlmisc virtio_modlmisc = {
+ .misc_modops = &mod_miscops,
+ .misc_linkinfo = "VIRTIO common routines",
+};
+
+static struct modlinkage virtio_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &virtio_modlmisc, NULL }
+};
+
+int
+_init(void)
+{
+ return (mod_install(&virtio_modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&virtio_modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&virtio_modlinkage, modinfop));
+}
+
+
+
+static void virtio_set_status(virtio_t *, uint8_t);
+static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t,
+ uint16_t);
+static int virtio_interrupts_setup(virtio_t *, int);
+static void virtio_interrupts_teardown(virtio_t *);
+static void virtio_interrupts_disable_locked(virtio_t *);
+static void virtio_queue_free(virtio_queue_t *);
+
+/*
+ * We use the same device access attributes for BAR mapping and access to the
+ * virtqueue memory.
+ */
+ddi_device_acc_attr_t virtio_acc_attr = {
+ .devacc_attr_version = DDI_DEVICE_ATTR_V1,
+ .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC,
+ .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC,
+ .devacc_attr_access = DDI_DEFAULT_ACC
+};
+
+
+/*
+ * DMA attributes for the memory given to the device for queue management.
+ */
+ddi_dma_attr_t virtio_dma_attr_queue = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ /*
+ * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted
+ * down by VIRTIO_PAGE_SHIFT before being passed to the device in a
+ * 32-bit register.
+ */
+ .dma_attr_addr_hi = 0x00000FFFFFFFF000,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = VIRTIO_PAGE_SIZE,
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
+};
+
+/*
+ * DMA attributes for the the allocation of indirect descriptor lists. The
+ * indirect list is referenced by a regular descriptor entry: the physical
+ * address field is 64 bits wide, but the length field is only 32 bits. Each
+ * descriptor is 16 bytes long.
+ */
+ddi_dma_attr_t virtio_dma_attr_indirect = {
+ .dma_attr_version = DMA_ATTR_V0,
+ .dma_attr_addr_lo = 0x0000000000000000,
+ .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
+ .dma_attr_count_max = 0x00000000FFFFFFFF,
+ .dma_attr_align = sizeof (struct virtio_vq_desc),
+ .dma_attr_burstsizes = 1,
+ .dma_attr_minxfer = 1,
+ .dma_attr_maxxfer = 0x00000000FFFFFFFF,
+ .dma_attr_seg = 0x00000000FFFFFFFF,
+ .dma_attr_sgllen = 1,
+ .dma_attr_granular = 1,
+ .dma_attr_flags = 0
+};
+
+
+uint8_t
+virtio_get8(virtio_t *vio, uintptr_t offset)
+{
+ return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset)));
+}
+
+uint16_t
+virtio_get16(virtio_t *vio, uintptr_t offset)
+{
+ return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset)));
+}
+
+uint32_t
+virtio_get32(virtio_t *vio, uintptr_t offset)
+{
+ return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset)));
+}
+
+void
+virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
+{
+ ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value);
+}
+
+void
+virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
+{
+ ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value);
+}
+
+void
+virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
+{
+ ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value);
+}
+
+void
+virtio_fini(virtio_t *vio, boolean_t failed)
+{
+ mutex_enter(&vio->vio_mutex);
+
+ virtio_interrupts_teardown(vio);
+
+ virtio_queue_t *viq;
+ while ((viq = list_remove_head(&vio->vio_queues)) != NULL) {
+ virtio_queue_free(viq);
+ }
+ list_destroy(&vio->vio_queues);
+
+ if (failed) {
+ /*
+ * Signal to the host that device setup failed.
+ */
+ virtio_set_status(vio, VIRTIO_STATUS_FAILED);
+ } else {
+ virtio_device_reset(vio);
+ }
+
+ /*
+ * We don't need to do anything for the provider initlevel, as it
+ * merely records the fact that virtio_init_complete() was called.
+ */
+ vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER;
+
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) {
+ /*
+ * Unmap PCI BAR0.
+ */
+ ddi_regs_map_free(&vio->vio_barh);
+
+ vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS;
+ }
+
+ /*
+ * Ensure we have torn down everything we set up.
+ */
+ VERIFY0(vio->vio_initlevel);
+
+ mutex_exit(&vio->vio_mutex);
+ mutex_destroy(&vio->vio_mutex);
+
+ kmem_free(vio, sizeof (*vio));
+}
+
+/*
+ * Early device initialisation for legacy (pre-1.0 specification) virtio
+ * devices.
+ */
+virtio_t *
+virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect)
+{
+ int r;
+
+ /*
+ * First, confirm that this is a legacy device.
+ */
+ ddi_acc_handle_t pci;
+ if (pci_config_setup(dip, &pci) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "pci_config_setup failed");
+ return (NULL);
+ }
+
+ uint8_t revid;
+ if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) {
+ dev_err(dip, CE_WARN, "could not read config space");
+ pci_config_teardown(&pci);
+ return (NULL);
+ }
+
+ pci_config_teardown(&pci);
+
+ /*
+ * The legacy specification requires that the device advertise as PCI
+ * Revision 0.
+ */
+ if (revid != 0) {
+ dev_err(dip, CE_WARN, "PCI Revision %u incorrect for "
+ "legacy virtio device", (uint_t)revid);
+ return (NULL);
+ }
+
+ virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP);
+ vio->vio_dip = dip;
+
+ /*
+ * Map PCI BAR0 for legacy device access.
+ */
+ if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0,
+ (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr,
+ &vio->vio_barh)) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r);
+ kmem_free(vio, sizeof (*vio));
+ return (NULL);
+ }
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS;
+
+ /*
+ * We initialise the mutex without an interrupt priority to ease the
+ * implementation of some of the configuration space access routines.
+ * Drivers using the virtio framework MUST make a call to
+ * "virtio_init_complete()" prior to spawning other threads or enabling
+ * interrupt handlers, at which time we will destroy and reinitialise
+ * the mutex for use in our interrupt handlers.
+ */
+ mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL);
+
+ list_create(&vio->vio_queues, sizeof (virtio_queue_t),
+ offsetof(virtio_queue_t, viq_link));
+
+ /*
+ * Legacy virtio devices require a few common steps before we can
+ * negotiate device features.
+ */
+ virtio_device_reset(vio);
+ virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE);
+ virtio_set_status(vio, VIRTIO_STATUS_DRIVER);
+
+ /*
+ * Negotiate features with the device. Record the original supported
+ * feature set for debugging purposes.
+ */
+ vio->vio_features_device = virtio_get32(vio,
+ VIRTIO_LEGACY_FEATURES_DEVICE);
+ if (allow_indirect) {
+ driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
+ }
+ vio->vio_features = vio->vio_features_device & driver_features;
+ virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features);
+
+ /*
+ * The device-specific configuration begins at an offset into the BAR
+ * that depends on whether we have enabled MSI-X interrupts or not.
+ * Start out with the offset for pre-MSI-X operation so that we can
+ * read device configuration space prior to configuring interrupts.
+ */
+ vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET;
+
+ return (vio);
+}
+
+/*
+ * This function must be called by the driver once it has completed early setup
+ * calls.
+ */
+int
+virtio_init_complete(virtio_t *vio, int allowed_interrupt_types)
+{
+ VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER));
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER;
+
+ if (!list_is_empty(&vio->vio_queues)) {
+ /*
+ * Set up interrupts for the queues that have been registered.
+ */
+ if (virtio_interrupts_setup(vio, allowed_interrupt_types) !=
+ DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ }
+
+ /*
+ * We can allocate the mutex once we know the priority.
+ */
+ mutex_destroy(&vio->vio_mutex);
+ mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
+ viq = list_next(&vio->vio_queues, viq)) {
+ mutex_destroy(&viq->viq_mutex);
+ mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER,
+ virtio_intr_pri(vio));
+ }
+
+ virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK);
+
+ return (DDI_SUCCESS);
+}
+
+boolean_t
+virtio_feature_present(virtio_t *vio, uint64_t feature_mask)
+{
+ return ((vio->vio_features & feature_mask) != 0);
+}
+
+void *
+virtio_intr_pri(virtio_t *vio)
+{
+ VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED);
+
+ return (DDI_INTR_PRI(vio->vio_interrupt_priority));
+}
+
+/*
+ * Enable a bit in the device status register. Each bit signals a level of
+ * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_*
+ * constants for "status". To zero the status field use virtio_device_reset().
+ */
+static void
+virtio_set_status(virtio_t *vio, uint8_t status)
+{
+ VERIFY3U(status, !=, 0);
+
+ mutex_enter(&vio->vio_mutex);
+
+ uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS);
+ virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old);
+
+ mutex_exit(&vio->vio_mutex);
+}
+
+static void
+virtio_device_reset_locked(virtio_t *vio)
+{
+ virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET);
+}
+
+void
+virtio_device_reset(virtio_t *vio)
+{
+ mutex_enter(&vio->vio_mutex);
+ virtio_device_reset_locked(vio);
+ mutex_exit(&vio->vio_mutex);
+}
+
+/*
+ * Some queues are effectively long-polled; the driver submits a series of
+ * buffers and the device only returns them when there is data available.
+ * During detach, we need to coordinate the return of these buffers. Calling
+ * "virtio_shutdown()" will reset the device, then allow the removal of all
+ * buffers that were in flight at the time of shutdown via
+ * "virtio_queue_evacuate()".
+ */
+void
+virtio_shutdown(virtio_t *vio)
+{
+ mutex_enter(&vio->vio_mutex);
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
+ /*
+ * Shutdown has been performed already.
+ */
+ mutex_exit(&vio->vio_mutex);
+ return;
+ }
+
+ /*
+ * First, mark all of the queues as shutdown. This will prevent any
+ * further activity.
+ */
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
+ viq = list_next(&vio->vio_queues, viq)) {
+ mutex_enter(&viq->viq_mutex);
+ viq->viq_shutdown = B_TRUE;
+ mutex_exit(&viq->viq_mutex);
+ }
+
+ /*
+ * Now, reset the device. This removes any queue configuration on the
+ * device side.
+ */
+ virtio_device_reset_locked(vio);
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN;
+ mutex_exit(&vio->vio_mutex);
+}
+
+/*
+ * Common implementation of quiesce(9E) for simple Virtio-based devices.
+ */
+int
+virtio_quiesce(virtio_t *vio)
+{
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
+ /*
+ * Device has already been reset.
+ */
+ return (DDI_SUCCESS);
+ }
+
+ /*
+ * When we reset the device, it should immediately stop using any DMA
+ * memory we've previously passed to it. All queue configuration is
+ * discarded. This is good enough for quiesce(9E).
+ */
+ virtio_device_reset_locked(vio);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * DEVICE-SPECIFIC REGISTER ACCESS
+ *
+ * Note that these functions take the mutex to avoid racing with interrupt
+ * enable/disable, when the device-specific offset can potentially change.
+ */
+
+uint8_t
+virtio_dev_get8(virtio_t *vio, uintptr_t offset)
+{
+ mutex_enter(&vio->vio_mutex);
+ uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset);
+ mutex_exit(&vio->vio_mutex);
+
+ return (r);
+}
+
+uint16_t
+virtio_dev_get16(virtio_t *vio, uintptr_t offset)
+{
+ mutex_enter(&vio->vio_mutex);
+ uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset);
+ mutex_exit(&vio->vio_mutex);
+
+ return (r);
+}
+
+uint32_t
+virtio_dev_get32(virtio_t *vio, uintptr_t offset)
+{
+ mutex_enter(&vio->vio_mutex);
+ uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset);
+ mutex_exit(&vio->vio_mutex);
+
+ return (r);
+}
+
+uint64_t
+virtio_dev_get64(virtio_t *vio, uintptr_t offset)
+{
+ mutex_enter(&vio->vio_mutex);
+ /*
+ * On at least some systems, a 64-bit read or write to this BAR is not
+ * possible. For legacy devices, there is no generation number to use
+ * to determine if configuration may have changed half-way through a
+ * read. We need to continue to read both halves of the value until we
+ * read the same value at least twice.
+ */
+ uintptr_t o_lo = vio->vio_config_offset + offset;
+ uintptr_t o_hi = o_lo + 4;
+
+ uint64_t val = virtio_get32(vio, o_lo) |
+ ((uint64_t)virtio_get32(vio, o_hi) << 32);
+
+ for (;;) {
+ uint64_t tval = virtio_get32(vio, o_lo) |
+ ((uint64_t)virtio_get32(vio, o_hi) << 32);
+
+ if (tval == val) {
+ break;
+ }
+
+ val = tval;
+ }
+
+ mutex_exit(&vio->vio_mutex);
+ return (val);
+}
+
+void
+virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
+{
+ mutex_enter(&vio->vio_mutex);
+ virtio_put8(vio, vio->vio_config_offset + offset, value);
+ mutex_exit(&vio->vio_mutex);
+}
+
+void
+virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
+{
+ mutex_enter(&vio->vio_mutex);
+ virtio_put16(vio, vio->vio_config_offset + offset, value);
+ mutex_exit(&vio->vio_mutex);
+}
+
+void
+virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
+{
+ mutex_enter(&vio->vio_mutex);
+ virtio_put32(vio, vio->vio_config_offset + offset, value);
+ mutex_exit(&vio->vio_mutex);
+}
+
+/*
+ * VIRTQUEUE MANAGEMENT
+ */
+
+static int
+virtio_inflight_compar(const void *lp, const void *rp)
+{
+ const virtio_chain_t *l = lp;
+ const virtio_chain_t *r = rp;
+
+ if (l->vic_head < r->vic_head) {
+ return (-1);
+ } else if (l->vic_head > r->vic_head) {
+ return (1);
+ } else {
+ return (0);
+ }
+}
+
+virtio_queue_t *
+virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name,
+ ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct,
+ uint_t max_segs)
+{
+ uint16_t qsz;
+ char space_name[256];
+
+ if (max_segs < 1) {
+ /*
+ * Every descriptor, direct or indirect, needs to refer to at
+ * least one buffer.
+ */
+ dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
+ "segment count must be at least 1", name, (uint_t)qidx);
+ return (NULL);
+ }
+
+ mutex_enter(&vio->vio_mutex);
+
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) {
+ /*
+ * Cannot configure any more queues once initial setup is
+ * complete and interrupts have been allocated.
+ */
+ dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
+ "alloc after init complete", name, (uint_t)qidx);
+ mutex_exit(&vio->vio_mutex);
+ return (NULL);
+ }
+
+ /*
+ * There is no way to negotiate a different queue size for legacy
+ * devices. We must read and use the native queue size of the device.
+ */
+ virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx);
+ if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) {
+ /*
+ * A size of zero means the device does not have a queue with
+ * this index.
+ */
+ dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
+ "does not exist on device", name, (uint_t)qidx);
+ mutex_exit(&vio->vio_mutex);
+ return (NULL);
+ }
+
+ mutex_exit(&vio->vio_mutex);
+
+ virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP);
+ viq->viq_virtio = vio;
+ viq->viq_name = name;
+ viq->viq_index = qidx;
+ viq->viq_size = qsz;
+ viq->viq_func = func;
+ viq->viq_funcarg = funcarg;
+ viq->viq_max_segs = max_segs;
+ avl_create(&viq->viq_inflight, virtio_inflight_compar,
+ sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node));
+
+ /*
+ * Allocate the mutex without an interrupt priority for now, as we do
+ * with "vio_mutex". We'll reinitialise it in
+ * "virtio_init_complete()".
+ */
+ mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL);
+
+ if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) &&
+ !force_direct) {
+ /*
+ * If we were able to negotiate the indirect descriptor
+ * feature, and the caller has not explicitly forced the use of
+ * direct descriptors, we'll allocate indirect descriptor lists
+ * for each chain.
+ */
+ viq->viq_indirect = B_TRUE;
+ }
+
+ /*
+ * Track descriptor usage in an identifier space.
+ */
+ (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s",
+ ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name);
+ if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) {
+ dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor "
+ "ID space");
+ virtio_queue_free(viq);
+ return (NULL);
+ }
+
+ /*
+ * For legacy devices, memory for the queue has a strict layout
+ * determined by the queue size.
+ */
+ size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz;
+ size_t sz_driver = P2ROUNDUP_TYPED(sz_descs +
+ sizeof (virtio_vq_driver_t) +
+ sizeof (uint16_t) * qsz,
+ VIRTIO_PAGE_SIZE, size_t);
+ size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) +
+ sizeof (virtio_vq_elem_t) * qsz,
+ VIRTIO_PAGE_SIZE, size_t);
+
+ if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device,
+ &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
+ KM_SLEEP) != DDI_SUCCESS) {
+ dev_err(vio->vio_dip, CE_WARN, "could not allocate queue "
+ "DMA memory");
+ virtio_queue_free(viq);
+ return (NULL);
+ }
+
+ /*
+ * NOTE: The viq_dma_* members below are used by
+ * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate
+ * offsets into the DMA allocation for partial synchronisation. If the
+ * ordering of, or relationship between, these pointers changes, the
+ * macros must be kept in sync.
+ */
+ viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0);
+ viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs);
+ viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver);
+
+ /*
+ * Install in the per-device list of queues.
+ */
+ mutex_enter(&vio->vio_mutex);
+ for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL;
+ chkvq = list_next(&vio->vio_queues, chkvq)) {
+ if (chkvq->viq_index == qidx) {
+ dev_err(vio->vio_dip, CE_WARN, "attempt to register "
+ "queue \"%s\" with same index (%d) as queue \"%s\"",
+ name, qidx, chkvq->viq_name);
+ mutex_exit(&vio->vio_mutex);
+ virtio_queue_free(viq);
+ return (NULL);
+ }
+ }
+ list_insert_tail(&vio->vio_queues, viq);
+
+ /*
+ * Ensure the zeroing of the queue memory is visible to the host before
+ * we inform the device of the queue address.
+ */
+ membar_producer();
+ VIRTQ_DMA_SYNC_FORDEV(viq);
+
+ virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx);
+ virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS,
+ virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT);
+
+ mutex_exit(&vio->vio_mutex);
+ return (viq);
+}
+
+static void
+virtio_queue_free(virtio_queue_t *viq)
+{
+ virtio_t *vio = viq->viq_virtio;
+
+ /*
+ * We are going to destroy the queue mutex. Make sure we've already
+ * removed the interrupt handlers.
+ */
+ VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
+
+ mutex_enter(&viq->viq_mutex);
+
+ /*
+ * If the device has not already been reset as part of a shutdown,
+ * detach the queue from the device now.
+ */
+ if (!viq->viq_shutdown) {
+ virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index);
+ virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0);
+ }
+
+ virtio_dma_fini(&viq->viq_dma);
+
+ VERIFY(avl_is_empty(&viq->viq_inflight));
+ avl_destroy(&viq->viq_inflight);
+ if (viq->viq_descmap != NULL) {
+ id_space_destroy(viq->viq_descmap);
+ }
+
+ mutex_exit(&viq->viq_mutex);
+ mutex_destroy(&viq->viq_mutex);
+
+ kmem_free(viq, sizeof (*viq));
+}
+
+void
+virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts)
+{
+ mutex_enter(&viq->viq_mutex);
+
+ if (stop_interrupts) {
+ viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT;
+ } else {
+ viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;
+ }
+ VIRTQ_DMA_SYNC_FORDEV(viq);
+
+ mutex_exit(&viq->viq_mutex);
+}
+
+static virtio_chain_t *
+virtio_queue_complete(virtio_queue_t *viq, uint_t index)
+{
+ VERIFY(MUTEX_HELD(&viq->viq_mutex));
+
+ virtio_chain_t *vic;
+
+ virtio_chain_t search;
+ bzero(&search, sizeof (search));
+ search.vic_head = index;
+
+ if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) {
+ return (NULL);
+ }
+ avl_remove(&viq->viq_inflight, vic);
+
+ return (vic);
+}
+
+uint_t
+virtio_queue_size(virtio_queue_t *viq)
+{
+ return (viq->viq_size);
+}
+
+uint_t
+virtio_queue_nactive(virtio_queue_t *viq)
+{
+ mutex_enter(&viq->viq_mutex);
+ uint_t r = avl_numnodes(&viq->viq_inflight);
+ mutex_exit(&viq->viq_mutex);
+
+ return (r);
+}
+
+virtio_chain_t *
+virtio_queue_poll(virtio_queue_t *viq)
+{
+ mutex_enter(&viq->viq_mutex);
+ if (viq->viq_shutdown) {
+ /*
+ * The device has been reset by virtio_shutdown(), and queue
+ * processing has been halted. Any previously submitted chains
+ * will be evacuated using virtio_queue_evacuate().
+ */
+ mutex_exit(&viq->viq_mutex);
+ return (NULL);
+ }
+
+ VIRTQ_DMA_SYNC_FORKERNEL(viq);
+ if (viq->viq_device_index == viq->viq_dma_device->vqde_index) {
+ /*
+ * If the device index has not changed since the last poll,
+ * there are no new chains to process.
+ */
+ mutex_exit(&viq->viq_mutex);
+ return (NULL);
+ }
+
+ /*
+ * We need to ensure that all reads from the descriptor (vqde_ring[])
+ * and any referenced memory by the descriptor occur after we have read
+ * the descriptor index value above (vqde_index).
+ */
+ membar_consumer();
+
+ uint16_t index = (viq->viq_device_index++) % viq->viq_size;
+ uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start;
+ uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len;
+
+ virtio_chain_t *vic;
+ if ((vic = virtio_queue_complete(viq, start)) == NULL) {
+ /*
+ * We could not locate a chain for this descriptor index, which
+ * suggests that something has gone horribly wrong.
+ */
+ dev_err(viq->viq_virtio->vio_dip, CE_PANIC,
+ "queue \"%s\" ring entry %u (descriptor %u) has no chain",
+ viq->viq_name, (uint16_t)index, (uint16_t)start);
+ }
+
+ vic->vic_received_length = len;
+
+ mutex_exit(&viq->viq_mutex);
+
+ return (vic);
+}
+
+/*
+ * After a call to "virtio_shutdown()", the driver must retrieve any previously
+ * submitted chains and free any associated resources.
+ */
+virtio_chain_t *
+virtio_queue_evacuate(virtio_queue_t *viq)
+{
+ virtio_t *vio = viq->viq_virtio;
+
+ mutex_enter(&vio->vio_mutex);
+ if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) {
+ dev_err(vio->vio_dip, CE_PANIC,
+ "virtio_queue_evacuate() without virtio_shutdown()");
+ }
+ mutex_exit(&vio->vio_mutex);
+
+ mutex_enter(&viq->viq_mutex);
+ VERIFY(viq->viq_shutdown);
+
+ virtio_chain_t *vic = avl_first(&viq->viq_inflight);
+ if (vic != NULL) {
+ avl_remove(&viq->viq_inflight, vic);
+ }
+
+ mutex_exit(&viq->viq_mutex);
+
+ return (vic);
+}
+
+/*
+ * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT
+ */
+
+/*
+ * When the device returns a descriptor chain to the driver, it may provide the
+ * length in bytes of data written into the chain. Client drivers should use
+ * this value with care; the specification suggests some device implementations
+ * have not always provided a useful or correct value.
+ */
+size_t
+virtio_chain_received_length(virtio_chain_t *vic)
+{
+ return (vic->vic_received_length);
+}
+
+/*
+ * Allocate a descriptor chain for use with this queue. The "kmflags" value
+ * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F).
+ */
+virtio_chain_t *
+virtio_chain_alloc(virtio_queue_t *viq, int kmflags)
+{
+ virtio_t *vio = viq->viq_virtio;
+ virtio_chain_t *vic;
+ uint_t cap;
+
+ /*
+ * Direct descriptors are known by their index in the descriptor table
+ * for the queue. We use the variable-length array member at the end
+ * of the chain tracking object to hold the list of direct descriptors
+ * assigned to this chain.
+ */
+ if (viq->viq_indirect) {
+ /*
+ * When using indirect descriptors we still need one direct
+ * descriptor entry to hold the physical address and length of
+ * the indirect descriptor table.
+ */
+ cap = 1;
+ } else {
+ /*
+ * For direct descriptors we need to be able to track a
+ * descriptor for each possible segment in a single chain.
+ */
+ cap = viq->viq_max_segs;
+ }
+
+ size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap;
+ if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) {
+ return (NULL);
+ }
+ vic->vic_vq = viq;
+ vic->vic_direct_capacity = cap;
+
+ if (viq->viq_indirect) {
+ /*
+ * Allocate an indirect descriptor list with the appropriate
+ * number of entries.
+ */
+ if (virtio_dma_init(vio, &vic->vic_indirect_dma,
+ sizeof (virtio_vq_desc_t) * viq->viq_max_segs,
+ &virtio_dma_attr_indirect,
+ DDI_DMA_CONSISTENT | DDI_DMA_WRITE,
+ kmflags) != DDI_SUCCESS) {
+ goto fail;
+ }
+
+ /*
+ * Allocate a single descriptor to hold the indirect list.
+ * Leave the length as zero for now; it will be set to include
+ * any occupied entries at push time.
+ */
+ mutex_enter(&viq->viq_mutex);
+ if (virtio_chain_append_impl(vic,
+ virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0,
+ VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) {
+ mutex_exit(&viq->viq_mutex);
+ goto fail;
+ }
+ mutex_exit(&viq->viq_mutex);
+ VERIFY3U(vic->vic_direct_used, ==, 1);
+
+ /*
+ * Don't set the indirect capacity until after we've installed
+ * the direct descriptor which points at the indirect list, or
+ * virtio_chain_append_impl() will be confused.
+ */
+ vic->vic_indirect_capacity = viq->viq_max_segs;
+ }
+
+ return (vic);
+
+fail:
+ virtio_dma_fini(&vic->vic_indirect_dma);
+ kmem_free(vic, vicsz);
+ return (NULL);
+}
+
+void *
+virtio_chain_data(virtio_chain_t *vic)
+{
+ return (vic->vic_data);
+}
+
+void
+virtio_chain_data_set(virtio_chain_t *vic, void *data)
+{
+ vic->vic_data = data;
+}
+
+void
+virtio_chain_clear(virtio_chain_t *vic)
+{
+ if (vic->vic_indirect_capacity != 0) {
+ /*
+ * There should only be one direct descriptor, which points at
+ * our indirect descriptor list. We don't want to clear it
+ * here.
+ */
+ VERIFY3U(vic->vic_direct_capacity, ==, 1);
+
+ if (vic->vic_indirect_used > 0) {
+ /*
+ * Clear out the indirect descriptor table.
+ */
+ vic->vic_indirect_used = 0;
+ bzero(virtio_dma_va(&vic->vic_indirect_dma, 0),
+ virtio_dma_size(&vic->vic_indirect_dma));
+ }
+
+ } else if (vic->vic_direct_capacity > 0) {
+ /*
+ * Release any descriptors that were assigned to us previously.
+ */
+ for (uint_t i = 0; i < vic->vic_direct_used; i++) {
+ id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]);
+ vic->vic_direct[i] = 0;
+ }
+ vic->vic_direct_used = 0;
+ }
+}
+
+void
+virtio_chain_free(virtio_chain_t *vic)
+{
+ /*
+ * First ensure that we have released any descriptors used by this
+ * chain.
+ */
+ virtio_chain_clear(vic);
+
+ if (vic->vic_indirect_capacity > 0) {
+ /*
+ * Release the direct descriptor that points to our indirect
+ * descriptor list.
+ */
+ VERIFY3U(vic->vic_direct_capacity, ==, 1);
+ id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]);
+
+ virtio_dma_fini(&vic->vic_indirect_dma);
+ }
+
+ size_t vicsz = sizeof (*vic) +
+ vic->vic_direct_capacity * sizeof (uint16_t);
+
+ kmem_free(vic, vicsz);
+}
+
+static inline int
+virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp)
+{
+ id_t index;
+
+ if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) {
+ return (ENOMEM);
+ }
+
+ VERIFY3S(index, >=, 0);
+ VERIFY3S(index, <=, viq->viq_size);
+
+ *indexp = (uint_t)index;
+ return (0);
+}
+
+static int
+virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len,
+ uint16_t flags)
+{
+ virtio_queue_t *viq = vic->vic_vq;
+ virtio_vq_desc_t *vqd;
+ uint_t index;
+
+ /*
+ * We're modifying the queue-wide descriptor list so make sure we have
+ * the appropriate lock.
+ */
+ VERIFY(MUTEX_HELD(&viq->viq_mutex));
+
+ if (vic->vic_indirect_capacity != 0) {
+ /*
+ * Use indirect descriptors.
+ */
+ if (vic->vic_indirect_used >= vic->vic_indirect_capacity) {
+ return (DDI_FAILURE);
+ }
+
+ vqd = virtio_dma_va(&vic->vic_indirect_dma, 0);
+
+ if ((index = vic->vic_indirect_used++) > 0) {
+ /*
+ * Chain the current last indirect descriptor to the
+ * new one.
+ */
+ vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT;
+ vqd[index - 1].vqd_next = index;
+ }
+
+ } else {
+ /*
+ * Use direct descriptors.
+ */
+ if (vic->vic_direct_used >= vic->vic_direct_capacity) {
+ return (DDI_FAILURE);
+ }
+
+ if (virtio_queue_descmap_alloc(viq, &index) != 0) {
+ return (DDI_FAILURE);
+ }
+
+ vqd = virtio_dma_va(&viq->viq_dma, 0);
+
+ if (vic->vic_direct_used > 0) {
+ /*
+ * This is not the first entry. Chain the current
+ * descriptor to the next one.
+ */
+ uint16_t p = vic->vic_direct[vic->vic_direct_used - 1];
+
+ vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT;
+ vqd[p].vqd_next = index;
+ }
+ vic->vic_direct[vic->vic_direct_used++] = index;
+ }
+
+ vqd[index].vqd_addr = pa;
+ vqd[index].vqd_len = len;
+ vqd[index].vqd_flags = flags;
+ vqd[index].vqd_next = 0;
+
+ return (DDI_SUCCESS);
+}
+
+int
+virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len,
+ virtio_direction_t dir)
+{
+ virtio_queue_t *viq = vic->vic_vq;
+ uint16_t flags = 0;
+
+ switch (dir) {
+ case VIRTIO_DIR_DEVICE_WRITES:
+ flags |= VIRTQ_DESC_F_WRITE;
+ break;
+
+ case VIRTIO_DIR_DEVICE_READS:
+ break;
+
+ default:
+ panic("unknown direction value %u", dir);
+ }
+
+ mutex_enter(&viq->viq_mutex);
+ int r = virtio_chain_append_impl(vic, pa, len, flags);
+ mutex_exit(&viq->viq_mutex);
+
+ return (r);
+}
+
+static void
+virtio_queue_flush_locked(virtio_queue_t *viq)
+{
+ VERIFY(MUTEX_HELD(&viq->viq_mutex));
+
+ /*
+ * Make sure any writes we have just made to the descriptors
+ * (vqdr_ring[]) are visible to the device before we update the ring
+ * pointer (vqdr_index).
+ */
+ membar_producer();
+ viq->viq_dma_driver->vqdr_index = viq->viq_driver_index;
+ VIRTQ_DMA_SYNC_FORDEV(viq);
+
+ /*
+ * Determine whether the device expects us to notify it of new
+ * descriptors.
+ */
+ VIRTQ_DMA_SYNC_FORKERNEL(viq);
+ if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) {
+ virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY,
+ viq->viq_index);
+ }
+}
+
+void
+virtio_queue_flush(virtio_queue_t *viq)
+{
+ mutex_enter(&viq->viq_mutex);
+ virtio_queue_flush_locked(viq);
+ mutex_exit(&viq->viq_mutex);
+}
+
+void
+virtio_chain_submit(virtio_chain_t *vic, boolean_t flush)
+{
+ virtio_queue_t *viq = vic->vic_vq;
+
+ mutex_enter(&viq->viq_mutex);
+
+ if (vic->vic_indirect_capacity != 0) {
+ virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0);
+
+ VERIFY3U(vic->vic_direct_used, ==, 1);
+
+ /*
+ * This is an indirect descriptor queue. The length in bytes
+ * of the descriptor must extend to cover the populated
+ * indirect descriptor entries.
+ */
+ vqd[vic->vic_direct[0]].vqd_len =
+ sizeof (virtio_vq_desc_t) * vic->vic_indirect_used;
+
+ virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV);
+ }
+
+ /*
+ * Populate the next available slot in the driver-owned ring for this
+ * chain. The updated value of viq_driver_index is not yet visible to
+ * the device until a subsequent queue flush.
+ */
+ uint16_t index = (viq->viq_driver_index++) % viq->viq_size;
+ viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0];
+
+ vic->vic_head = vic->vic_direct[0];
+ avl_add(&viq->viq_inflight, vic);
+
+ if (flush) {
+ virtio_queue_flush_locked(vic->vic_vq);
+ }
+
+ mutex_exit(&viq->viq_mutex);
+}
+
+/*
+ * INTERRUPTS MANAGEMENT
+ */
+
+static const char *
+virtio_interrupt_type_name(int type)
+{
+ switch (type) {
+ case DDI_INTR_TYPE_MSIX:
+ return ("MSI-X");
+ case DDI_INTR_TYPE_MSI:
+ return ("MSI");
+ case DDI_INTR_TYPE_FIXED:
+ return ("fixed");
+ default:
+ return ("?");
+ }
+}
+
+static int
+virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired)
+{
+ dev_info_t *dip = vio->vio_dip;
+ int nintrs = 0;
+ int navail = 0;
+
+ VERIFY(MUTEX_HELD(&vio->vio_mutex));
+ VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC));
+
+ if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "could not count %s interrupts",
+ virtio_interrupt_type_name(type));
+ return (DDI_FAILURE);
+ }
+ if (nintrs < 1) {
+ dev_err(dip, CE_WARN, "no %s interrupts supported",
+ virtio_interrupt_type_name(type));
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "could not count available %s interrupts",
+ virtio_interrupt_type_name(type));
+ return (DDI_FAILURE);
+ }
+ if (navail < nrequired) {
+ dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d "
+ "available", nrequired, virtio_interrupt_type_name(type),
+ navail);
+ return (DDI_FAILURE);
+ }
+
+ VERIFY3P(vio->vio_interrupts, ==, NULL);
+ vio->vio_interrupts = kmem_zalloc(
+ sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP);
+
+ int r;
+ if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired,
+ &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)",
+ virtio_interrupt_type_name(type), r);
+ kmem_free(vio->vio_interrupts,
+ sizeof (ddi_intr_handle_t) * nrequired);
+ vio->vio_interrupts = NULL;
+ return (DDI_FAILURE);
+ }
+
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC;
+ vio->vio_interrupt_type = type;
+ return (DDI_SUCCESS);
+}
+
+static uint_t
+virtio_shared_isr(caddr_t arg0, caddr_t arg1)
+{
+ virtio_t *vio = (virtio_t *)arg0;
+ uint_t r = DDI_INTR_UNCLAIMED;
+ uint8_t isr;
+
+ mutex_enter(&vio->vio_mutex);
+
+ /*
+ * Check the ISR status to see if the interrupt applies to us. Reading
+ * this field resets it to zero.
+ */
+ isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS);
+ if ((isr & VIRTIO_ISR_CHECK_QUEUES) == 0) {
+ goto done;
+ }
+
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
+ viq = list_next(&vio->vio_queues, viq)) {
+ if (viq->viq_func != NULL) {
+ mutex_exit(&vio->vio_mutex);
+ if (viq->viq_func(viq->viq_funcarg, arg0) ==
+ DDI_INTR_CLAIMED) {
+ r = DDI_INTR_CLAIMED;
+ }
+ mutex_enter(&vio->vio_mutex);
+
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
+ /*
+ * The device was shut down while in a queue
+ * handler routine.
+ */
+ goto done;
+ }
+ }
+ }
+
+done:
+ mutex_exit(&vio->vio_mutex);
+ return (r);
+}
+
+static int
+virtio_interrupts_setup(virtio_t *vio, int allow_types)
+{
+ dev_info_t *dip = vio->vio_dip;
+ int types;
+ int count = 0;
+
+ mutex_enter(&vio->vio_mutex);
+
+ /*
+ * Determine the number of interrupts we'd like based on the number of
+ * virtqueues.
+ */
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
+ viq = list_next(&vio->vio_queues, viq)) {
+ if (viq->viq_func != NULL) {
+ count++;
+ }
+ }
+
+ if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "could not get supported interrupts");
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_FAILURE);
+ }
+
+ if (allow_types != 0) {
+ /*
+ * Restrict the possible interrupt types at the request of the
+ * driver.
+ */
+ types &= allow_types;
+ }
+
+ /*
+ * Try each potential interrupt type in descending order of preference.
+ * Note that the specification does not appear to allow for the use of
+ * classical MSI, so we are limited to either MSI-X or fixed
+ * interrupts.
+ */
+ if (types & DDI_INTR_TYPE_MSIX) {
+ if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX,
+ count) == DDI_SUCCESS) {
+ goto add_handlers;
+ }
+ }
+ if (types & DDI_INTR_TYPE_FIXED) {
+ /*
+ * If fixed interrupts are all that are available, we'll just
+ * ask for one.
+ */
+ if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) ==
+ DDI_SUCCESS) {
+ goto add_handlers;
+ }
+ }
+
+ dev_err(dip, CE_WARN, "interrupt allocation failed");
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_FAILURE);
+
+add_handlers:
+ /*
+ * Ensure that we have not been given any high-level interrupts as our
+ * interrupt handlers do not support them.
+ */
+ for (int i = 0; i < vio->vio_ninterrupts; i++) {
+ uint_t ipri;
+
+ if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) !=
+ DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "could not determine interrupt "
+ "priority");
+ goto fail;
+ }
+
+ if (ipri >= ddi_intr_get_hilevel_pri()) {
+ dev_err(dip, CE_WARN, "high level interrupts not "
+ "supported");
+ goto fail;
+ }
+
+ /*
+ * Record the highest priority we've been allocated to use for
+ * mutex initialisation.
+ */
+ if (i == 0 || ipri > vio->vio_interrupt_priority) {
+ vio->vio_interrupt_priority = ipri;
+ }
+ }
+
+ /*
+ * Get the interrupt capabilities from the first handle to determine
+ * whether we need to use ddi_intr_block_enable(9F).
+ */
+ if (ddi_intr_get_cap(vio->vio_interrupts[0],
+ &vio->vio_interrupt_cap) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "failed to get interrupt capabilities");
+ goto fail;
+ }
+
+ if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
+ VERIFY3S(vio->vio_ninterrupts, ==, 1);
+ /*
+ * For fixed interrupts, we need to use our shared handler to
+ * multiplex the per-queue handlers provided by the driver.
+ */
+ if (ddi_intr_add_handler(vio->vio_interrupts[0],
+ virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "adding shared %s interrupt "
+ "handler failed", virtio_interrupt_type_name(
+ vio->vio_interrupt_type));
+ goto fail;
+ }
+
+ goto done;
+ }
+
+ VERIFY3S(vio->vio_ninterrupts, ==, count);
+
+ uint_t n = 0;
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
+ viq = list_next(&vio->vio_queues, viq)) {
+ if (viq->viq_func == NULL) {
+ continue;
+ }
+
+ if (ddi_intr_add_handler(vio->vio_interrupts[n],
+ viq->viq_func, (caddr_t)viq->viq_funcarg,
+ (caddr_t)vio) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed",
+ n, viq->viq_name);
+ goto fail;
+ }
+
+ viq->viq_handler_index = n;
+ viq->viq_handler_added = B_TRUE;
+ n++;
+ }
+
+done:
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED;
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_SUCCESS);
+
+fail:
+ virtio_interrupts_teardown(vio);
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_FAILURE);
+}
+
+static void
+virtio_interrupts_teardown(virtio_t *vio)
+{
+ VERIFY(MUTEX_HELD(&vio->vio_mutex));
+
+ virtio_interrupts_disable_locked(vio);
+
+ if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
+ /*
+ * Remove the multiplexing interrupt handler.
+ */
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) {
+ int r;
+
+ VERIFY3S(vio->vio_ninterrupts, ==, 1);
+
+ if ((r = ddi_intr_remove_handler(
+ vio->vio_interrupts[0])) != DDI_SUCCESS) {
+ dev_err(vio->vio_dip, CE_WARN, "removing "
+ "shared interrupt handler failed (%d)", r);
+ }
+ }
+ } else {
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues);
+ viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
+ int r;
+
+ if (!viq->viq_handler_added) {
+ continue;
+ }
+
+ if ((r = ddi_intr_remove_handler(
+ vio->vio_interrupts[viq->viq_handler_index])) !=
+ DDI_SUCCESS) {
+ dev_err(vio->vio_dip, CE_WARN, "removing "
+ "interrupt handler (%s) failed (%d)",
+ viq->viq_name, r);
+ }
+
+ viq->viq_handler_added = B_FALSE;
+ }
+ }
+ vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED;
+
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) {
+ for (int i = 0; i < vio->vio_ninterrupts; i++) {
+ int r;
+
+ if ((r = ddi_intr_free(vio->vio_interrupts[i])) !=
+ DDI_SUCCESS) {
+ dev_err(vio->vio_dip, CE_WARN, "freeing "
+ "interrupt %u failed (%d)", i, r);
+ }
+ }
+ kmem_free(vio->vio_interrupts,
+ sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts);
+ vio->vio_interrupts = NULL;
+ vio->vio_ninterrupts = 0;
+ vio->vio_interrupt_type = 0;
+ vio->vio_interrupt_cap = 0;
+ vio->vio_interrupt_priority = 0;
+
+ vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC;
+ }
+}
+
+static void
+virtio_interrupts_unwind(virtio_t *vio)
+{
+ VERIFY(MUTEX_HELD(&vio->vio_mutex));
+
+ if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues);
+ viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
+ if (!viq->viq_handler_added) {
+ continue;
+ }
+
+ virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT,
+ viq->viq_index);
+ virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE,
+ VIRTIO_LEGACY_MSI_NO_VECTOR);
+ }
+ }
+
+ if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
+ (void) ddi_intr_block_disable(vio->vio_interrupts,
+ vio->vio_ninterrupts);
+ } else {
+ for (int i = 0; i < vio->vio_ninterrupts; i++) {
+ (void) ddi_intr_disable(vio->vio_interrupts[i]);
+ }
+ }
+
+ /*
+ * Disabling the interrupts makes the MSI-X fields disappear from the
+ * BAR once more.
+ */
+ vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET;
+}
+
+int
+virtio_interrupts_enable(virtio_t *vio)
+{
+ mutex_enter(&vio->vio_mutex);
+ if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) {
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_SUCCESS);
+ }
+
+ int r = DDI_SUCCESS;
+ if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
+ r = ddi_intr_block_enable(vio->vio_interrupts,
+ vio->vio_ninterrupts);
+ } else {
+ for (int i = 0; i < vio->vio_ninterrupts; i++) {
+ if ((r = ddi_intr_enable(vio->vio_interrupts[i])) !=
+ DDI_SUCCESS) {
+ /*
+ * Disable the interrupts we have enabled so
+ * far.
+ */
+ for (i--; i >= 0; i--) {
+ (void) ddi_intr_disable(
+ vio->vio_interrupts[i]);
+ }
+ break;
+ }
+ }
+ }
+
+ if (r != DDI_SUCCESS) {
+ mutex_exit(&vio->vio_mutex);
+ return (r);
+ }
+
+ if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
+ /*
+ * When asked to enable the interrupts, the system enables
+ * MSI-X in the PCI configuration for the device. While
+ * enabled, the extra MSI-X configuration table fields appear
+ * between the general and the device-specific regions of the
+ * BAR.
+ */
+ vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX;
+
+ for (virtio_queue_t *viq = list_head(&vio->vio_queues);
+ viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
+ if (!viq->viq_handler_added) {
+ continue;
+ }
+
+ uint16_t qi = viq->viq_index;
+ uint16_t msi = viq->viq_handler_index;
+
+ /*
+ * Route interrupts for this queue to the assigned
+ * MSI-X vector number.
+ */
+ virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi);
+ virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi);
+
+ /*
+ * The device may not actually accept the vector number
+ * we're attempting to program. We need to confirm
+ * that configuration was successful by re-reading the
+ * configuration we just wrote.
+ */
+ if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) !=
+ msi) {
+ dev_err(vio->vio_dip, CE_WARN,
+ "failed to configure MSI-X vector %u for "
+ "queue \"%s\" (#%u)", (uint_t)msi,
+ viq->viq_name, (uint_t)qi);
+
+ virtio_interrupts_unwind(vio);
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED;
+
+ mutex_exit(&vio->vio_mutex);
+ return (DDI_SUCCESS);
+}
+
+static void
+virtio_interrupts_disable_locked(virtio_t *vio)
+{
+ VERIFY(MUTEX_HELD(&vio->vio_mutex));
+
+ if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) {
+ return;
+ }
+
+ virtio_interrupts_unwind(vio);
+
+ vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED;
+}
+
+void
+virtio_interrupts_disable(virtio_t *vio)
+{
+ mutex_enter(&vio->vio_mutex);
+ virtio_interrupts_disable_locked(vio);
+ mutex_exit(&vio->vio_mutex);
+}
diff --git a/usr/src/uts/common/io/virtio/virtioreg.h b/usr/src/uts/common/io/virtio/virtioreg.h
deleted file mode 100644
index 19579e96bc..0000000000
--- a/usr/src/uts/common/io/virtio/virtioreg.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2010 Minoura Makoto.
- * Copyright (c) 2012 Nexenta Systems, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
- * Appendix A.
- */
-
-/*
- * An interface for efficient virtio implementation.
- *
- * This header is BSD licensed so anyone can use the definitions
- * to implement compatible drivers/servers.
- *
- * Copyright 2007, 2009, IBM Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of IBM nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-
-#ifndef __VIRTIOREG_H__
-#define __VIRTIOREG_H__
-
-#include <sys/types.h>
-
-#define PCI_VENDOR_QUMRANET 0x1af4
-#define PCI_DEV_VIRTIO_MIN 0x1000
-#define PCI_DEV_VIRTIO_MAX 0x103f
-#define VIRTIO_PCI_ABI_VERSION 0
-
-/* Virtio product id (subsystem) */
-#define PCI_PRODUCT_VIRTIO_NETWORK 1
-#define PCI_PRODUCT_VIRTIO_BLOCK 2
-#define PCI_PRODUCT_VIRTIO_CONSOLE 3
-#define PCI_PRODUCT_VIRTIO_ENTROPY 4
-#define PCI_PRODUCT_VIRTIO_BALLOON 5
-#define PCI_PRODUCT_VIRTIO_9P 9
-
-/* Virtio header */
-#define VIRTIO_CONFIG_DEVICE_FEATURES 0 /* 32bit */
-#define VIRTIO_CONFIG_GUEST_FEATURES 4 /* 32bit */
-
-#define VIRTIO_F_NOTIFY_ON_EMPTY (1<<24)
-#define VIRTIO_F_RING_INDIRECT_DESC (1<<28)
-#define VIRTIO_F_BAD_FEATURE (1<<30)
-
-#define VIRTIO_CONFIG_QUEUE_ADDRESS 8 /* 32bit */
-#define VIRTIO_CONFIG_QUEUE_SIZE 12 /* 16bit */
-#define VIRTIO_CONFIG_QUEUE_SELECT 14 /* 16bit */
-#define VIRTIO_CONFIG_QUEUE_NOTIFY 16 /* 16bit */
-#define VIRTIO_CONFIG_DEVICE_STATUS 18 /* 8bit */
-
-#define VIRTIO_CONFIG_DEVICE_STATUS_RESET 0
-#define VIRTIO_CONFIG_DEVICE_STATUS_ACK 1
-#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER 2
-#define VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK 4
-#define VIRTIO_CONFIG_DEVICE_STATUS_FAILED 128
-
-#define VIRTIO_CONFIG_ISR_STATUS 19 /* 8bit */
-#define VIRTIO_CONFIG_ISR_CONFIG_CHANGE 2
-
-#define VIRTIO_CONFIG_CONFIG_VECTOR 20 /* 16bit, optional */
-#define VIRTIO_CONFIG_QUEUE_VECTOR 22
-
-#define VIRTIO_CONFIG_DEVICE_CONFIG_NOMSIX 20
-#define VIRTIO_CONFIG_DEVICE_CONFIG_MSIX 24
-
-#define VIRTIO_MSI_NO_VECTOR 0xffff
-
-/* Virtqueue */
-/* This marks a buffer as continuing via the next field. */
-#define VRING_DESC_F_NEXT 1
-/*
- * This marks a buffer as write-only, from the devices's perspective.
- * (otherwise read-only).
- */
-#define VRING_DESC_F_WRITE 2
-/* This means the buffer contains a list of buffer descriptors. */
-#define VRING_DESC_F_INDIRECT 4
-
-/*
- * The Host uses this in used->flags to advise the Guest: don't kick me
- * when you add a buffer. It's unreliable, so it's simply an
- * optimization. Guest will still kick if it's out of buffers.
- */
-#define VRING_USED_F_NO_NOTIFY 1
-/*
- * The Guest uses this in avail->flags to advise the Host: don't
- * interrupt me when you consume a buffer. It's unreliable, so it's
- * simply an optimization.
- */
-#define VRING_AVAIL_F_NO_INTERRUPT 1
-
-/*
- * Virtio ring descriptors: 16 bytes.
- * These can chain together via "next".
- */
-struct vring_desc {
- /* Address (guest-physical). */
- uint64_t addr;
- /* Length. */
- uint32_t len;
- /* The flags as indicated above. */
- uint16_t flags;
- /* We chain unused descriptors via this, too */
- uint16_t next;
-} __attribute__((packed));
-
-struct vring_avail {
- uint16_t flags;
- uint16_t idx;
- uint16_t ring[];
-} __attribute__((packed));
-
-/* u32 is used here for ids for padding reasons. */
-struct vring_used_elem {
- /* Index of start of used descriptor chain. */
- uint32_t id;
- /* Total length of the descriptor chain which was written to. */
- uint32_t len;
-} __attribute__((packed));
-
-struct vring_used {
- uint16_t flags;
- uint16_t idx;
- struct vring_used_elem ring[];
-} __attribute__((packed));
-
-
-/* Got nothing to do with the system page size, just a confusing name. */
-#define VIRTIO_PAGE_SIZE (4096)
-
-#endif /* __VIRTIOREG_H__ */
diff --git a/usr/src/uts/common/io/virtio/virtiovar.h b/usr/src/uts/common/io/virtio/virtiovar.h
deleted file mode 100644
index 17aebe3864..0000000000
--- a/usr/src/uts/common/io/virtio/virtiovar.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2010 Minoura Makoto.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Part of the file derived from `Virtio PCI Card Specification v0.8.6 DRAFT'
- * Appendix A.
- */
-
-/*
- * An interface for efficient virtio implementation.
- *
- * This header is BSD licensed so anyone can use the definitions
- * to implement compatible drivers/servers.
- *
- * Copyright 2007, 2009, IBM Corporation
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of IBM nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
- */
-
-#ifndef __VIRTIOVAR_H__
-#define __VIRTIOVAR_H__
-
-#include <sys/types.h>
-#include <sys/dditypes.h>
-#include <sys/cmn_err.h>
-#include <sys/list.h>
-
-#ifdef DEBUG
-#define dev_debug(dip, fmt, arg...) \
- dev_err(dip, fmt, ##arg)
-#else
-#define dev_debug(dip, fmt, arg...)
-#endif
-
-struct vq_entry {
- list_node_t qe_list;
- struct virtqueue *qe_queue;
- uint16_t qe_index; /* index in vq_desc array */
- /* followings are used only when it is the `head' entry */
- struct vq_entry *qe_next;
- struct vring_desc *qe_desc;
- ddi_dma_cookie_t qe_indirect_dma_cookie;
- ddi_dma_handle_t qe_indirect_dma_handle;
- ddi_acc_handle_t qe_indirect_dma_acch;
- struct vring_desc *qe_indirect_descs;
- unsigned int qe_indirect_next;
-};
-
-struct virtqueue {
- struct virtio_softc *vq_owner;
- unsigned int vq_num; /* queue size (# of entries) */
- unsigned int vq_indirect_num;
- int vq_index; /* queue number (0, 1, ...) */
-
- /* vring pointers (KVA) */
- struct vring_desc *vq_descs;
- struct vring_avail *vq_avail;
- struct vring_used *vq_used;
-
- /* virtqueue allocation info */
- void *vq_vaddr;
- int vq_availoffset;
- int vq_usedoffset;
- ddi_dma_cookie_t vq_dma_cookie;
- ddi_dma_handle_t vq_dma_handle;
- ddi_acc_handle_t vq_dma_acch;
-
- int vq_maxsegsize;
-
- /* free entry management */
- struct vq_entry *vq_entries;
- list_t vq_freelist;
- kmutex_t vq_freelist_lock;
- int vq_used_entries;
-
- /* enqueue/dequeue status */
- uint16_t vq_avail_idx;
- kmutex_t vq_avail_lock;
- uint16_t vq_used_idx;
- kmutex_t vq_used_lock;
-};
-
-struct virtio_softc {
- dev_info_t *sc_dev;
-
- uint_t sc_intr_prio;
-
- ddi_acc_handle_t sc_ioh;
- caddr_t sc_io_addr;
- int sc_config_offset;
-
- uint32_t sc_features;
-
- int sc_nvqs; /* set by the user */
-
- ddi_intr_handle_t *sc_intr_htable;
- int sc_intr_num;
- boolean_t sc_intr_config;
- int sc_intr_cap;
- int sc_int_type;
-};
-
-struct virtio_int_handler {
- ddi_intr_handler_t *vh_func;
- void *vh_priv;
-};
-
-/* public interface */
-uint32_t virtio_negotiate_features(struct virtio_softc *, uint32_t);
-size_t virtio_show_features(uint32_t features, char *buffer, size_t len);
-boolean_t virtio_has_feature(struct virtio_softc *sc, uint32_t feature);
-void virtio_set_status(struct virtio_softc *sc, unsigned int);
-#define virtio_device_reset(sc) virtio_set_status((sc), 0)
-
-uint8_t virtio_read_device_config_1(struct virtio_softc *sc,
- unsigned int index);
-uint16_t virtio_read_device_config_2(struct virtio_softc *sc,
- unsigned int index);
-uint32_t virtio_read_device_config_4(struct virtio_softc *sc,
- unsigned int index);
-uint64_t virtio_read_device_config_8(struct virtio_softc *sc,
- unsigned int index);
-void virtio_write_device_config_1(struct virtio_softc *sc,
- unsigned int index, uint8_t value);
-void virtio_write_device_config_2(struct virtio_softc *sc,
- unsigned int index, uint16_t value);
-void virtio_write_device_config_4(struct virtio_softc *sc,
- unsigned int index, uint32_t value);
-void virtio_write_device_config_8(struct virtio_softc *sc,
- unsigned int index, uint64_t value);
-
-struct virtqueue *virtio_alloc_vq(struct virtio_softc *sc,
- unsigned int index, unsigned int size,
- unsigned int indirect_num, const char *name);
-void virtio_free_vq(struct virtqueue *);
-void virtio_reset(struct virtio_softc *);
-struct vq_entry *vq_alloc_entry(struct virtqueue *vq);
-void vq_free_entry(struct virtqueue *vq, struct vq_entry *qe);
-uint_t vq_num_used(struct virtqueue *vq);
-unsigned int virtio_ve_indirect_available(struct vq_entry *qe);
-
-void virtio_stop_vq_intr(struct virtqueue *);
-void virtio_start_vq_intr(struct virtqueue *);
-
-void virtio_ve_add_cookie(struct vq_entry *qe, ddi_dma_handle_t dma_handle,
- ddi_dma_cookie_t dma_cookie, unsigned int ncookies, boolean_t write);
-void virtio_ve_add_indirect_buf(struct vq_entry *qe, uint64_t paddr,
- uint32_t len, boolean_t write);
-void virtio_ve_set(struct vq_entry *qe, uint64_t paddr, uint32_t len,
- boolean_t write);
-
-void virtio_push_chain(struct vq_entry *qe, boolean_t sync);
-struct vq_entry *virtio_pull_chain(struct virtqueue *vq, uint32_t *len);
-void virtio_free_chain(struct vq_entry *ve);
-void virtio_sync_vq(struct virtqueue *vq);
-
-int virtio_register_ints(struct virtio_softc *sc,
- struct virtio_int_handler *config_handler,
- struct virtio_int_handler vq_handlers[]);
-void virtio_release_ints(struct virtio_softc *sc);
-int virtio_enable_ints(struct virtio_softc *sc);
-
-#endif /* __VIRTIOVAR_H__ */
diff --git a/usr/src/uts/common/smbsrv/smb2_kproto.h b/usr/src/uts/common/smbsrv/smb2_kproto.h
index 97b13af868..ed553bedcd 100644
--- a/usr/src/uts/common/smbsrv/smb2_kproto.h
+++ b/usr/src/uts/common/smbsrv/smb2_kproto.h
@@ -32,6 +32,7 @@ extern uint32_t smb2_dh_def_timeout;
extern uint32_t smb2_dh_max_timeout;
extern uint32_t smb2_res_def_timeout;
extern uint32_t smb2_res_max_timeout;
+extern uint32_t smb2_persist_timeout;
extern int smb2_enable_dh;
#define SMB3_CLIENT_ENCRYPTS(sr) \
@@ -131,7 +132,7 @@ uint32_t smb2_setinfo_quota(smb_request_t *, smb_setinfo_t *);
void smb2_oplock_acquire(smb_request_t *sr);
void smb2_oplock_reconnect(smb_request_t *sr);
void smb2_lease_acquire(smb_request_t *sr);
-uint32_t smb2_lease_create(smb_request_t *sr);
+uint32_t smb2_lease_create(smb_request_t *sr, uint8_t *);
void smb2_lease_rele(smb_lease_t *);
void smb2_lease_init(void);
void smb2_lease_fini(void);
@@ -142,6 +143,15 @@ void smb2_durable_timers(smb_server_t *);
uint32_t smb2_dh_reconnect(smb_request_t *);
boolean_t smb_dh_should_save(smb_ofile_t *);
extern void smb2_dh_shutdown(smb_server_t *);
+int smb2_dh_new_ca_share(smb_server_t *, smb_kshare_t *);
+void smb2_dh_close_persistent(smb_ofile_t *);
+void smb2_dh_close_my_orphans(smb_request_t *, smb_ofile_t *);
+int smb2_dh_make_persistent(smb_request_t *, smb_ofile_t *);
+void smb2_dh_setdoc_persistent(smb_ofile_t *);
+void smb2_dh_update_nvfile(smb_request_t *);
+void smb2_dh_update_oplock(smb_request_t *, smb_ofile_t *);
+void smb2_dh_update_locks(smb_request_t *, smb_ofile_t *);
+void smb2_dh_update_times(smb_request_t *, smb_ofile_t *, smb_attr_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/smbsrv/smb_kproto.h b/usr/src/uts/common/smbsrv/smb_kproto.h
index d18ff80d5e..751f047e0c 100644
--- a/usr/src/uts/common/smbsrv/smb_kproto.h
+++ b/usr/src/uts/common/smbsrv/smb_kproto.h
@@ -338,6 +338,8 @@ boolean_t smb_validate_dirname(smb_request_t *, smb_pathname_t *);
boolean_t smb_validate_object_name(smb_request_t *, smb_pathname_t *);
boolean_t smb_validate_stream_name(smb_request_t *, smb_pathname_t *);
boolean_t smb_is_stream_name(char *);
+boolean_t smb_strname_restricted(char *);
+
void smb_stream_parse_name(char *, char *, char *);
@@ -438,7 +440,7 @@ int smb_server_get_count(void);
int smb_server_g_init(void);
void smb_server_g_fini(void);
int smb_server_create(void);
-int smb_server_delete(void);
+int smb_server_delete(smb_server_t *);
int smb_server_configure(smb_ioc_cfg_t *);
int smb_server_start(smb_ioc_start_t *);
int smb_server_stop(void);
@@ -451,7 +453,7 @@ int smb_server_numopen(smb_ioc_opennum_t *);
int smb_server_enum(smb_ioc_svcenum_t *);
int smb_server_session_close(smb_ioc_session_t *);
int smb_server_file_close(smb_ioc_fileid_t *);
-int smb_server_sharevp(smb_server_t *, const char *, vnode_t **);
+int smb_server_share_lookup(smb_server_t *, const char *, smb_node_t **);
int smb_server_unshare(const char *);
void smb_server_logoff_ssnid(smb_request_t *, uint64_t);
@@ -553,14 +555,6 @@ int smb_pathname(smb_request_t *, char *, int, smb_node_t *,
smb_node_t *, smb_node_t **, smb_node_t **, cred_t *);
/*
- * smb_vfs functions
- */
-
-int smb_vfs_hold(smb_export_t *, vfs_t *);
-void smb_vfs_rele(smb_export_t *, vfs_t *);
-void smb_vfs_rele_all(smb_export_t *);
-
-/*
* smb_notify.c
*/
uint32_t smb_notify_act1(smb_request_t *, uint32_t, uint32_t);
@@ -633,6 +627,7 @@ smb_tree_t *smb_session_lookup_volume(smb_session_t *, const char *,
void smb_session_close_pid(smb_session_t *, uint32_t);
void smb_session_disconnect_owned_trees(smb_session_t *, smb_user_t *);
void smb_session_disconnect_share(smb_session_t *, const char *);
+void smb_session_logoff(smb_session_t *);
void smb_session_getclient(smb_session_t *, char *, size_t);
boolean_t smb_session_isclient(smb_session_t *, const char *);
void smb_session_correct_keep_alive_values(smb_llist_t *, uint32_t);
@@ -654,7 +649,7 @@ smb_ofile_t *smb_ofile_lookup_by_uniqid(smb_tree_t *, uint32_t);
smb_ofile_t *smb_ofile_lookup_by_persistid(smb_request_t *, uint64_t);
boolean_t smb_ofile_disallow_fclose(smb_ofile_t *);
smb_ofile_t *smb_ofile_alloc(smb_request_t *, smb_arg_open_t *, smb_node_t *,
- uint16_t, uint16_t, uint32_t);
+ uint16_t, uint16_t);
void smb_ofile_open(smb_request_t *, smb_arg_open_t *, smb_ofile_t *);
void smb_ofile_close(smb_ofile_t *, int32_t);
void smb_ofile_free(smb_ofile_t *);
@@ -678,7 +673,9 @@ void smb_delayed_write_timer(smb_llist_t *);
void smb_ofile_set_quota_resume(smb_ofile_t *, char *);
void smb_ofile_get_quota_resume(smb_ofile_t *, char *, int);
void smb_ofile_del_persistid(smb_ofile_t *);
-void smb_ofile_set_persistid(smb_ofile_t *);
+void smb_ofile_set_persistid_dh(smb_ofile_t *);
+void smb_ofile_set_persistid_ph(smb_ofile_t *);
+int smb_ofile_insert_persistid(smb_ofile_t *, uint64_t);
#define SMB_OFILE_GET_SESSION(of) ((of)->f_session)
#define SMB_OFILE_GET_TREE(of) ((of)->f_tree)
@@ -734,6 +731,7 @@ void smb_user_netinfo_fini(smb_netuserinfo_t *);
int smb_user_netinfo_encode(smb_user_t *, uint8_t *, size_t, uint32_t *);
smb_token_t *smb_get_token(smb_session_t *, smb_logon_t *);
cred_t *smb_cred_create(smb_token_t *);
+cred_t *smb_kcred_create(void);
void smb_user_setcred(smb_user_t *, cred_t *, uint32_t);
boolean_t smb_is_same_user(cred_t *, cred_t *);
@@ -741,6 +739,7 @@ boolean_t smb_is_same_user(cred_t *, cred_t *);
* SMB tree functions (file smb_tree.c)
*/
uint32_t smb_tree_connect(smb_request_t *);
+uint32_t smb_tree_connect_disk(smb_request_t *, smb_arg_tcon_t *);
void smb_tree_disconnect(smb_tree_t *, boolean_t);
void smb_tree_close_pid(smb_tree_t *, uint32_t);
boolean_t smb_tree_has_feature(smb_tree_t *, uint_t);
@@ -751,6 +750,8 @@ void smb_tree_hold_internal(smb_tree_t *);
void smb_tree_release(smb_tree_t *);
smb_odir_t *smb_tree_lookup_odir(smb_request_t *, uint16_t);
boolean_t smb_tree_is_connected(smb_tree_t *);
+smb_tree_t *smb_tree_alloc(smb_request_t *, const smb_kshare_t *,
+ smb_node_t *, uint32_t, uint32_t);
smb_xa_t *smb_xa_create(smb_session_t *session, smb_request_t *sr,
uint32_t total_parameter_count, uint32_t total_data_count,
@@ -937,7 +938,7 @@ void smb_threshold_exit(smb_cmd_threshold_t *);
void smb_threshold_wake_all(smb_cmd_threshold_t *);
/* SMB hash function prototypes */
-smb_hash_t *smb_hash_create(size_t, size_t, uint32_t num_buckets);
+smb_hash_t *smb_hash_create(size_t, size_t, uint32_t);
void smb_hash_destroy(smb_hash_t *);
uint_t smb_hash_uint64(smb_hash_t *, uint64_t);
diff --git a/usr/src/uts/common/smbsrv/smb_ktypes.h b/usr/src/uts/common/smbsrv/smb_ktypes.h
index 09e52b70f7..1f8ce704fb 100644
--- a/usr/src/uts/common/smbsrv/smb_ktypes.h
+++ b/usr/src/uts/common/smbsrv/smb_ktypes.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -61,6 +61,7 @@ extern "C" {
struct __door_handle; /* <sys/door.h> */
struct edirent; /* <sys/extdirent.h> */
+struct nvlist;
struct smb_disp_entry;
struct smb_request;
@@ -476,7 +477,6 @@ typedef struct {
typedef struct smb_export {
kmutex_t e_mutex;
boolean_t e_ready;
- smb_llist_t e_vfs_list;
smb_avl_t e_share_avl;
smb_slist_t e_unexport_list;
smb_thread_t e_unexport_thread;
@@ -629,16 +629,6 @@ typedef struct smb_lease {
uint8_t ls_clnt[SMB_LEASE_KEY_SZ];
} smb_lease_t;
-#define SMB_VFS_MAGIC 0x534D4256 /* 'SMBV' */
-
-typedef struct smb_vfs {
- list_node_t sv_lnd;
- uint32_t sv_magic;
- uint32_t sv_refcnt;
- vfs_t *sv_vfsp;
- vnode_t *sv_rootvp;
-} smb_vfs_t;
-
#define SMB_NODE_MAGIC 0x4E4F4445 /* 'NODE' */
#define SMB_NODE_VALID(p) ASSERT((p)->n_magic == SMB_NODE_MAGIC)
@@ -703,6 +693,9 @@ typedef struct smb_node {
typedef struct smb_kshare {
uint32_t shr_magic;
+ avl_node_t shr_link;
+ kmutex_t shr_mutex;
+ kcondvar_t shr_cv;
char *shr_name;
char *shr_path;
char *shr_cmnt;
@@ -717,8 +710,9 @@ typedef struct smb_kshare {
char *shr_access_none;
char *shr_access_ro;
char *shr_access_rw;
- avl_node_t shr_link;
- kmutex_t shr_mutex;
+ smb_node_t *shr_root_node;
+ smb_node_t *shr_ca_dir;
+ void *shr_import_busy;
smb_cfg_val_t shr_encrypt; /* Share.EncryptData */
} smb_kshare_t;
@@ -984,7 +978,7 @@ typedef struct smb_session {
unsigned char MAC_key[44];
char ip_addr_str[INET6_ADDRSTRLEN];
uint8_t clnt_uuid[16];
- char workstation[SMB_PI_MAX_HOST];
+ char workstation[SMB_PI_MAX_HOST];
} smb_session_t;
/*
@@ -1100,6 +1094,7 @@ typedef struct smb_user {
#define SMB_TREE_SPARSE 0x00040000
#define SMB_TREE_TRAVERSE_MOUNTS 0x00080000
#define SMB_TREE_FORCE_L2_OPLOCK 0x00100000
+#define SMB_TREE_CA 0x00200000
/* Note: SMB_TREE_... in the mdb module too. */
/*
@@ -1166,15 +1161,15 @@ typedef struct smb_tree {
(((sr) && (sr)->tid_tree) ? \
(((sr)->tid_tree->t_access) & (acemask)) : 0)))
-#define SMB_TREE_SUPPORTS_CATIA(sr) \
+#define SMB_TREE_SUPPORTS_CATIA(sr) \
(((sr) && (sr)->tid_tree) ? \
smb_tree_has_feature((sr)->tid_tree, SMB_TREE_CATIA) : 0)
-#define SMB_TREE_SUPPORTS_ABE(sr) \
+#define SMB_TREE_SUPPORTS_ABE(sr) \
(((sr) && (sr)->tid_tree) ? \
smb_tree_has_feature((sr)->tid_tree, SMB_TREE_ABE) : 0)
-#define SMB_TREE_IS_DFSROOT(sr) \
+#define SMB_TREE_IS_DFSROOT(sr) \
(((sr) && (sr)->tid_tree) ? \
smb_tree_has_feature((sr)->tid_tree, SMB_TREE_DFSROOT) : 0)
@@ -1202,7 +1197,7 @@ typedef struct smb_tree {
(SMB_TREE_IS_READONLY((sr)) || \
smb_node_file_is_readonly((node)))
-#define SMB_ODIR_MAGIC 0x4F444952 /* 'ODIR' */
+#define SMB_ODIR_MAGIC 0x4F444952 /* 'ODIR' */
#define SMB_ODIR_VALID(p) \
ASSERT((p != NULL) && ((p)->d_magic == SMB_ODIR_MAGIC))
@@ -1332,7 +1327,7 @@ typedef struct smb_opipe {
#define SMB_OFLAGS_SET_DELETE_ON_CLOSE 0x0004
#define SMB_OFLAGS_LLF_POS_VALID 0x0008
-#define SMB_OFILE_MAGIC 0x4F464C45 /* 'OFLE' */
+#define SMB_OFILE_MAGIC 0x4F464C45 /* 'OFLE' */
#define SMB_OFILE_VALID(p) \
ASSERT((p != NULL) && ((p)->f_magic == SMB_OFILE_MAGIC))
@@ -1416,6 +1411,10 @@ typedef struct smb_ofile {
hrtime_t dh_timeout_offset; /* time offset for timeout */
hrtime_t dh_expire_time; /* time the handle expires */
boolean_t dh_persist;
+ kmutex_t dh_nvlock;
+ struct nvlist *dh_nvlist;
+ smb_node_t *dh_nvfile;
+
uint8_t dh_create_guid[16];
char f_quota_resume[SMB_SID_STRSZ];
uint8_t f_lock_seq[SMB_OFILE_LSEQ_MAX];
@@ -1441,7 +1440,7 @@ typedef struct smb_streaminfo {
char si_name[MAXPATHLEN];
} smb_streaminfo_t;
-#define SMB_LOCK_MAGIC 0x4C4F434B /* 'LOCK' */
+#define SMB_LOCK_MAGIC 0x4C4F434B /* 'LOCK' */
typedef struct smb_lock {
list_node_t l_lnd;
@@ -1472,7 +1471,7 @@ typedef struct smb_lock {
typedef struct vardata_block {
uint8_t vdb_tag;
uint32_t vdb_len;
- struct uio vdb_uio;
+ struct uio vdb_uio;
struct iovec vdb_iovec[MAX_IOVEC];
} smb_vdb_t;
@@ -1760,7 +1759,7 @@ typedef struct smb_arg_olbrk {
*
*/
-#define SMB_REQ_MAGIC 0x534D4252 /* 'SMBR' */
+#define SMB_REQ_MAGIC 0x534D4252 /* 'SMBR' */
#define SMB_REQ_VALID(p) ASSERT((p)->sr_magic == SMB_REQ_MAGIC)
typedef enum smb_req_state {
@@ -1810,7 +1809,7 @@ typedef struct smb_request {
list_t sr_storage;
struct smb_xa *r_xa;
int andx_prev_wct;
- int cur_reply_offset;
+ int cur_reply_offset;
int orig_request_hdr;
unsigned int reply_seqnum; /* reply sequence number */
unsigned char first_smb_com; /* command code */
@@ -1868,6 +1867,7 @@ typedef struct smb_request {
uint8_t nonce[16];
boolean_t encrypted;
+ boolean_t dh_nvl_dirty;
boolean_t smb2_async;
uint64_t smb2_async_id;
@@ -2068,7 +2068,7 @@ typedef enum smb_server_state {
typedef struct {
/* protected by sv_mutex */
kcondvar_t sp_cv;
- uint32_t sp_cnt;
+ uint32_t sp_cnt;
smb_llist_t sp_list;
smb_llist_t sp_fidlist;
} smb_spool_t;
@@ -2094,11 +2094,12 @@ typedef struct smb_server {
krwlock_t sv_cfg_lock;
smb_kmod_cfg_t sv_cfg;
smb_session_t *sv_session;
+ smb_user_t *sv_rootuser;
smb_llist_t sv_session_list;
smb_hash_t *sv_persistid_ht;
smb_hash_t *sv_lease_ht;
- struct smb_export sv_export;
+ smb_export_t sv_export;
struct __door_handle *sv_lmshrd;
/* Internal door for up-calls to smbd */
diff --git a/usr/src/uts/common/smbsrv/smb_share.h b/usr/src/uts/common/smbsrv/smb_share.h
index 7c2219caad..090de59105 100644
--- a/usr/src/uts/common/smbsrv/smb_share.h
+++ b/usr/src/uts/common/smbsrv/smb_share.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
@@ -92,6 +92,7 @@ extern "C" {
#define SHOPT_AD_CONTAINER "ad-container"
#define SHOPT_ABE "abe"
#define SHOPT_NAME "name"
+#define SHOPT_CA "ca"
#define SHOPT_CSC "csc"
#define SHOPT_CATIA "catia"
#define SHOPT_GUEST "guestok"
@@ -185,6 +186,7 @@ extern "C" {
#define SMB_SHRF_QUOTAS 0x1000 /* Enable SMB Quotas */
#define SMB_SHRF_FSO 0x2000 /* Force Shared Oplocks */
+#define SMB_SHRF_CA 0x4000 /* Continuous Availability */
/*
* Runtime flags
@@ -193,6 +195,7 @@ extern "C" {
#define SMB_SHRF_TRANS 0x10000000
#define SMB_SHRF_PERM 0x20000000
#define SMB_SHRF_AUTOHOME 0x40000000
+#define SMB_SHRF_REMOVED 0x80000000 /* unshared */
#define SMB_SHARE_PRINT "print$"
#define SMB_SHARE_PRINT_LEN 6
diff --git a/usr/src/uts/intel/vioblk/Makefile b/usr/src/uts/intel/vioblk/Makefile
index 5e5783fca6..ace9b626d0 100644
--- a/usr/src/uts/intel/vioblk/Makefile
+++ b/usr/src/uts/intel/vioblk/Makefile
@@ -1,90 +1,68 @@
#
-# CDDL HEADER START
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
#
+
#
# Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2019 Joyent, Inc.
#
#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
+# Path to the base of the uts directory tree (usually /usr/src/uts).
#
-UTSBASE = ../..
+UTSBASE = ../..
#
-# Define the module and object file sets.
+# Define the module and object file sets.
#
-MODULE = vioblk
-OBJECTS = $(VIOBLK_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(VIOBLK_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+MODULE = vioblk
+OBJECTS = $(VIOBLK_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
#
-# Include common rules.
+# Include common rules.
#
include $(UTSBASE)/intel/Makefile.intel
#
-# Define targets
+# Define targets
#
-ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+ALL_TARGET = $(BINARY)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
-# Overrides
+# Overrides
#
-
-INC_PATH += -I$(UTSBASE)/common/io/virtio
-
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
+INC_PATH += -I$(UTSBASE)/common/io/virtio
#
# Driver depends on virtio and blkdev
#
-LDFLAGS += -dy -N misc/virtio -N drv/blkdev
+LDFLAGS += -dy -N misc/virtio -N drv/blkdev
#
-# Default build targets.
+# Default build targets.
#
.KEEP_STATE:
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
-
-clobber: $(CLOBBER_DEPS)
+def: $(DEF_DEPS)
-lint: $(LINT_DEPS)
+all: $(ALL_DEPS)
-modlintlib: $(MODLINTLIB_DEPS)
+clean: $(CLEAN_DEPS)
-clean.lint: $(CLEAN_LINT_DEPS)
+clobber: $(CLOBBER_DEPS)
-install: $(INSTALL_DEPS)
+install: $(INSTALL_DEPS)
#
-# Include common targets.
+# Include common targets.
#
include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/vioif/Makefile b/usr/src/uts/intel/vioif/Makefile
index ba87d97c61..a2dc4a337b 100644
--- a/usr/src/uts/intel/vioif/Makefile
+++ b/usr/src/uts/intel/vioif/Makefile
@@ -11,70 +11,58 @@
#
# Copyright 2013 Nexenta Inc. All rights reserved.
+# Copyright 2019 Joyent, Inc.
#
#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
+# Path to the base of the uts directory tree (usually /usr/src/uts).
#
-UTSBASE = ../..
+UTSBASE = ../..
#
-# Define the module and object file sets.
+# Define the module and object file sets.
#
-MODULE = vioif
-OBJECTS = $(VIOIF_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(VIOIF_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+MODULE = vioif
+OBJECTS = $(VIOIF_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
#
-# Include common rules.
+# Include common rules.
#
include $(UTSBASE)/intel/Makefile.intel
#
-# Define targets
+# Define targets
#
-ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+ALL_TARGET = $(BINARY)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
-# Overrides
+# Overrides
#
+INC_PATH += -I$(UTSBASE)/common/io/virtio
-INC_PATH += -I$(UTSBASE)/common/io/virtio
-
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
#
-# Driver depends on virtio and blkdev
+# Driver depends on virtio and mac
#
-LDFLAGS += -dy -N misc/virtio -N misc/mac
+LDFLAGS += -dy -N misc/virtio -N misc/mac
#
-# Default build targets.
+# Default build targets.
#
.KEEP_STATE:
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
-
-clobber: $(CLOBBER_DEPS)
+def: $(DEF_DEPS)
-lint: $(LINT_DEPS)
+all: $(ALL_DEPS)
-modlintlib: $(MODLINTLIB_DEPS)
+clean: $(CLEAN_DEPS)
-clean.lint: $(CLEAN_LINT_DEPS)
+clobber: $(CLOBBER_DEPS)
-install: $(INSTALL_DEPS)
+install: $(INSTALL_DEPS)
#
-# Include common targets.
+# Include common targets.
#
include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/virtio/Makefile b/usr/src/uts/intel/virtio/Makefile
index 1f6548a135..c5a0d05b6a 100644
--- a/usr/src/uts/intel/virtio/Makefile
+++ b/usr/src/uts/intel/virtio/Makefile
@@ -1,90 +1,63 @@
#
-# CDDL HEADER START
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
#
#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
+# Path to the base of the uts directory tree (usually /usr/src/uts).
#
-UTSBASE = ../..
+UTSBASE = ../..
#
-# Define the module and object file sets.
+# Define the module and object file sets.
#
-MODULE = virtio
-OBJECTS = $(VIRTIO_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(VIRTIO_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+MODULE = virtio
+OBJECTS = $(VIRTIO_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
#
-# Include common rules.
+# Include common rules.
#
include $(UTSBASE)/intel/Makefile.intel
#
-# Define targets
+# Define targets
#
-ALL_TARGET = $(BINARY)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+ALL_TARGET = $(BINARY)
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
#
-# Overrides
+# Overrides
#
-
-INC_PATH += -I$(UTSBASE)/common/io/virtio
+INC_PATH += -I$(UTSBASE)/common/io/virtio
#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE)
-
-# needs work
-SMOFF += all_func_returns
-
-#
-# Default build targets.
+# Default build targets.
#
.KEEP_STATE:
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
-
-clobber: $(CLOBBER_DEPS)
+def: $(DEF_DEPS)
-lint: $(LINT_DEPS)
+all: $(ALL_DEPS)
-modlintlib: $(MODLINTLIB_DEPS)
+clean: $(CLEAN_DEPS)
-clean.lint: $(CLEAN_LINT_DEPS)
+clobber: $(CLOBBER_DEPS)
-install: $(INSTALL_DEPS)
+install: $(INSTALL_DEPS)
#
-# Include common targets.
+# Include common targets.
#
include $(UTSBASE)/intel/Makefile.targ