summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/pkgdefs/Makefile1
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/Makefile (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile)0
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl)0
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl)0
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/preremove.tmpl (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl)0
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/prototype_com (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com)0
-rw-r--r--usr/src/pkgdefs/SUNWdcopy/prototype_i386 (renamed from deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386)0
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com1
-rw-r--r--usr/src/uts/common/fs/sockfs/socksctp.c6
-rwxr-xr-xusr/src/uts/common/fs/sockfs/socksdp.c6
-rw-r--r--usr/src/uts/common/fs/sockfs/sockstr.c170
-rw-r--r--usr/src/uts/common/fs/sockfs/socksubr.c25
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi.c127
-rw-r--r--usr/src/uts/common/fs/sockfs/sockvnops.c10
-rw-r--r--usr/src/uts/common/inet/tcp.h10
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c536
-rw-r--r--usr/src/uts/common/inet/tcp/tcp6ddi.c4
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_fusion.c14
-rw-r--r--usr/src/uts/common/inet/tcp/tcpddi.c4
-rw-r--r--usr/src/uts/common/io/dcopy.c (renamed from deleted_files/usr/src/uts/common/io/dcopy.c)6
-rw-r--r--usr/src/uts/common/io/stream.c50
-rw-r--r--usr/src/uts/common/os/move.c400
-rw-r--r--usr/src/uts/common/os/streamio.c176
-rw-r--r--usr/src/uts/common/os/strsubr.c11
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/conf.h5
-rw-r--r--usr/src/uts/common/sys/dcopy.h (renamed from deleted_files/usr/src/uts/common/sys/dcopy.h)4
-rw-r--r--usr/src/uts/common/sys/dcopy_device.h (renamed from deleted_files/usr/src/uts/common/sys/dcopy_device.h)0
-rw-r--r--usr/src/uts/common/sys/socketvar.h9
-rw-r--r--usr/src/uts/common/sys/sodirect.h (renamed from deleted_files/usr/src/uts/common/sys/sodirect.h)0
-rw-r--r--usr/src/uts/common/sys/stream.h3
-rw-r--r--usr/src/uts/common/sys/strsubr.h8
-rw-r--r--usr/src/uts/common/sys/uio.h69
-rw-r--r--usr/src/uts/i86pc/Makefile.files1
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc.shared1
-rw-r--r--usr/src/uts/i86pc/Makefile.rules7
-rw-r--r--usr/src/uts/i86pc/io/ioat/ioat.c (renamed from deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c)0
-rw-r--r--usr/src/uts/i86pc/io/ioat/ioat.conf (renamed from deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf)0
-rw-r--r--usr/src/uts/i86pc/io/ioat/ioat_chan.c (renamed from deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c)0
-rw-r--r--usr/src/uts/i86pc/io/ioat/ioat_ioctl.c (renamed from deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c)0
-rw-r--r--usr/src/uts/i86pc/io/ioat/ioat_rs.c (renamed from deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c)0
-rw-r--r--usr/src/uts/i86pc/ioat/Makefile (renamed from deleted_files/usr/src/uts/i86pc/ioat/Makefile)0
-rw-r--r--usr/src/uts/i86pc/sys/ioat.h (renamed from deleted_files/usr/src/uts/i86pc/sys/ioat.h)0
-rw-r--r--usr/src/uts/i86xpv/Makefile.files5
-rw-r--r--usr/src/uts/i86xpv/Makefile.i86xpv.shared1
-rw-r--r--usr/src/uts/i86xpv/Makefile.rules7
-rw-r--r--usr/src/uts/i86xpv/ioat/Makefile (renamed from deleted_files/usr/src/uts/i86xpv/ioat/Makefile)0
-rw-r--r--usr/src/uts/intel/Makefile.files1
-rw-r--r--usr/src/uts/intel/Makefile.intel.shared1
-rw-r--r--usr/src/uts/intel/dcopy/Makefile (renamed from deleted_files/usr/src/uts/intel/dcopy/Makefile)0
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s16
-rw-r--r--usr/src/uts/sparc/ml/modstubs.s16
52 files changed, 1588 insertions, 124 deletions
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index b0e0d4b520..067ddb9ae0 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -125,6 +125,7 @@ i386_SUBDIRS= \
SUNWgrub \
SUNWgrubS \
SUNWhxge \
+ SUNWdcopy \
SUNWipw \
SUNWiwi \
SUNWiwk \
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile b/usr/src/pkgdefs/SUNWdcopy/Makefile
index 3431d26eb9..3431d26eb9 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile
+++ b/usr/src/pkgdefs/SUNWdcopy/Makefile
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl b/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl
index 3b9f1d87d6..3b9f1d87d6 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl
+++ b/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl b/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl
index cdb1f395bf..cdb1f395bf 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl
+++ b/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl b/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl
index 2526218df9..2526218df9 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl
+++ b/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com b/usr/src/pkgdefs/SUNWdcopy/prototype_com
index 34626771bc..34626771bc 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com
+++ b/usr/src/pkgdefs/SUNWdcopy/prototype_com
diff --git a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386 b/usr/src/pkgdefs/SUNWdcopy/prototype_i386
index 77bcc81a7e..77bcc81a7e 100644
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386
+++ b/usr/src/pkgdefs/SUNWdcopy/prototype_i386
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index d41415c209..bf0e84c13b 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -1218,6 +1218,7 @@ f none usr/include/sys/socket.h 644 root bin
f none usr/include/sys/socket_impl.h 644 root bin
f none usr/include/sys/socketvar.h 644 root bin
f none usr/include/sys/sockio.h 644 root bin
+f none usr/include/sys/sodirect.h 644 root bin
f none usr/include/sys/sservice.h 644 root bin
f none usr/include/sys/squeue.h 644 root bin
f none usr/include/sys/squeue_impl.h 644 root bin
diff --git a/usr/src/uts/common/fs/sockfs/socksctp.c b/usr/src/uts/common/fs/sockfs/socksctp.c
index 5478bbfda0..8f9ca22255 100644
--- a/usr/src/uts/common/fs/sockfs/socksctp.c
+++ b/usr/src/uts/common/fs/sockfs/socksctp.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -171,6 +171,8 @@ sosctp_sock_constructor(void *buf, void *cdrarg, int kmflags)
so->so_nl7c_uri = NULL;
so->so_nl7c_rcv_mp = NULL;
+ so->so_direct = NULL;
+
vp = vn_alloc(kmflags);
if (vp == NULL) {
return (-1);
@@ -204,6 +206,8 @@ sosctp_sock_destructor(void *buf, void *cdrarg)
struct sonode *so = &ss->ss_so;
struct vnode *vp = SOTOV(so);
+ ASSERT(so->so_direct == NULL);
+
ASSERT(so->so_nl7c_flags == 0);
ASSERT(so->so_nl7c_uri == NULL);
ASSERT(so->so_nl7c_rcv_mp == NULL);
diff --git a/usr/src/uts/common/fs/sockfs/socksdp.c b/usr/src/uts/common/fs/sockfs/socksdp.c
index 09ab4d0b49..b8482b90b1 100755
--- a/usr/src/uts/common/fs/sockfs/socksdp.c
+++ b/usr/src/uts/common/fs/sockfs/socksdp.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,6 +133,8 @@ sosdp_sock_constructor(void *buf, void *cdrarg, int kmflags)
so->so_nl7c_uri = NULL;
so->so_nl7c_rcv_mp = NULL;
+ so->so_direct = NULL;
+
vp = vn_alloc(kmflags);
if (vp == NULL) {
return (-1);
@@ -159,6 +161,8 @@ sosdp_sock_destructor(void *buf, void *cdrarg)
struct sonode *so = &ss->ss_so;
struct vnode *vp = SOTOV(so);
+ ASSERT(so->so_direct == NULL);
+
ASSERT(so->so_nl7c_flags == 0);
ASSERT(so->so_nl7c_uri == NULL);
ASSERT(so->so_nl7c_rcv_mp == NULL);
diff --git a/usr/src/uts/common/fs/sockfs/sockstr.c b/usr/src/uts/common/fs/sockfs/sockstr.c
index eb540644be..1e3d0aaa5d 100644
--- a/usr/src/uts/common/fs/sockfs/sockstr.c
+++ b/usr/src/uts/common/fs/sockfs/sockstr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -69,6 +69,8 @@
#include <c2/audit.h>
+#include <sys/dcopy.h>
+
int so_default_version = SOV_SOCKSTREAM;
#ifdef DEBUG
@@ -119,6 +121,26 @@ static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
static int tlitosyserr(int terr);
/*
+ * Sodirect kmem_cache and put/wakeup functions.
+ */
+struct kmem_cache *socktpi_sod_cache;
+static int sodput(sodirect_t *, mblk_t *);
+static void sodwakeup(sodirect_t *);
+
+/*
+ * Called by sockinit() when sockfs is loaded.
+ */
+int
+sostr_init()
+{
+ /* Allocate sodirect_t kmem_cache */
+ socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
+ sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+ return (0);
+}
+
+/*
* Convert a socket to a stream. Invoked when the illusory sockmod
* is popped from the stream.
* Change the stream head back to default operation without losing
@@ -468,6 +490,34 @@ so_strinit(struct sonode *so, struct sonode *tso)
stp->sd_qn_minpsz = 0;
mutex_exit(&stp->sd_lock);
+ /*
+ * If sodirect capable allocate and initialize sodirect_t.
+ * Note, SS_SODIRECT is set in socktpi_open().
+ */
+ if (so->so_state & SS_SODIRECT) {
+ sodirect_t *sodp;
+
+ ASSERT(so->so_direct == NULL);
+
+ sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
+ sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
+ sodp->sod_want = 0;
+ sodp->sod_q = RD(stp->sd_wrq);
+ sodp->sod_enqueue = sodput;
+ sodp->sod_wakeup = sodwakeup;
+ sodp->sod_uioafh = NULL;
+ sodp->sod_uioaft = NULL;
+ sodp->sod_lock = &stp->sd_lock;
+ /*
+ * Remainder of the sod_uioa members are left uninitialized
+ * but will be initialized later by uioainit() before uioa
+ * is enabled.
+ */
+ sodp->sod_uioa.uioa_state = UIOA_ALLOC;
+ so->so_direct = sodp;
+ stp->sd_sodirect = sodp;
+ }
+
return (0);
}
@@ -2872,3 +2922,121 @@ tlitosyserr(int terr)
else
return (tli_errs[terr]);
}
+
+/*
+ * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
+ * transport driver/module with an mblk_t chain.
+ *
+ * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
+ * bp are of type M_DATA. All other cases we call putq().
+ *
+ * On success a zero will be return, else an errno will be returned.
+ */
+int
+sodput(sodirect_t *sodp, mblk_t *bp)
+{
+ queue_t *q = sodp->sod_q;
+ struct stdata *stp = (struct stdata *)q->q_ptr;
+ mblk_t *nbp;
+ int ret;
+ mblk_t *last = q->q_last;
+ int bytecnt = 0;
+ int mblkcnt = 0;
+
+
+ ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+ if (stp->sd_flag == STREOF) {
+ ret = 0;
+ goto error;
+ }
+
+ if (q->q_first == NULL) {
+ /* Q empty, really fast fast-path */
+ bp->b_prev = NULL;
+ bp->b_next = NULL;
+ q->q_first = bp;
+ q->q_last = bp;
+
+ } else if (last->b_datap->db_type == M_DATA &&
+ bp->b_datap->db_type == M_DATA) {
+ /*
+ * Last mblk_t chain and bp are both type M_DATA so
+ * in-line putq() here, if the DBLK_UIOA state match
+ * add bp to the end of the current last chain, else
+ * start a new last chain with bp.
+ */
+ if ((last->b_datap->db_flags & DBLK_UIOA) ==
+ (bp->b_datap->db_flags & DBLK_UIOA)) {
+ /* Added to end */
+ while ((nbp = last->b_cont) != NULL)
+ last = nbp;
+ last->b_cont = bp;
+ } else {
+ /* New last */
+ last->b_next = bp;
+ bp->b_next = NULL;
+ bp->b_prev = last;
+ q->q_last = bp;
+ }
+ } else {
+ /*
+ * Can't use q_last so just call putq().
+ */
+ (void) putq(q, bp);
+ return (0);
+ }
+
+ /* Count bytes and mblk_t's */
+ do {
+ bytecnt += MBLKL(bp);
+ mblkcnt++;
+ } while ((bp = bp->b_cont) != NULL);
+ q->q_count += bytecnt;
+ q->q_mblkcnt += mblkcnt;
+
+ /* Check for QFULL */
+ if (q->q_count >= q->q_hiwat + sodp->sod_want ||
+ q->q_mblkcnt >= q->q_hiwat) {
+ q->q_flag |= QFULL;
+ }
+
+ return (0);
+
+error:
+ do {
+ if ((nbp = bp->b_next) != NULL)
+ bp->b_next = NULL;
+ freemsg(bp);
+ } while ((bp = nbp) != NULL);
+
+ return (ret);
+}
+
+/*
+ * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
+ * driver/module to indicate that read-side data is available.
+ *
+ * On return the sodirect_t.lock mutex will be exited so this must be the
+ * last sodirect_t call to guarantee atomic access of *sodp.
+ */
+void
+sodwakeup(sodirect_t *sodp)
+{
+ queue_t *q = sodp->sod_q;
+ struct stdata *stp = (struct stdata *)q->q_ptr;
+
+ ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+ if (stp->sd_flag & RSLEEP) {
+ stp->sd_flag &= ~RSLEEP;
+ cv_broadcast(&q->q_wait);
+ }
+
+ if (stp->sd_rput_opt & SR_POLLIN) {
+ stp->sd_rput_opt &= ~SR_POLLIN;
+ mutex_exit(sodp->sod_lock);
+ pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
+ } else
+ mutex_exit(sodp->sod_lock);
+}
diff --git a/usr/src/uts/common/fs/sockfs/socksubr.c b/usr/src/uts/common/fs/sockfs/socksubr.c
index 9a6e9147e3..c857c34225 100644
--- a/usr/src/uts/common/fs/sockfs/socksubr.c
+++ b/usr/src/uts/common/fs/sockfs/socksubr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -44,6 +44,7 @@
#include <sys/file.h>
#include <sys/open.h>
#include <sys/user.h>
+#include <sys/uio.h>
#include <sys/termios.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
@@ -90,6 +91,7 @@
#define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
+struct kmem_cache *socktpi_sod_cache;
dev_t sockdev; /* For fsid in getattr */
@@ -105,6 +107,8 @@ extern void sendfile_init();
extern void nl7c_init(void);
+extern int sostr_init();
+
#define ADRSTRLEN (2 * sizeof (void *) + 1)
/*
* kernel structure for passing the sockinfo data back up to the user.
@@ -523,6 +527,15 @@ sockfree(struct sonode *so)
so->so_nl7c_flags = 0;
}
+ if (so->so_direct != NULL) {
+ sodirect_t *sodp = so->so_direct;
+
+ ASSERT(sodp->sod_uioafh == NULL);
+
+ so->so_direct = NULL;
+ kmem_cache_free(socktpi_sod_cache, sodp);
+ }
+
ASSERT(so->so_ux_bound_vp == NULL);
if ((mp = so->so_unbind_mp) != NULL) {
freemsg(mp);
@@ -567,6 +580,8 @@ socktpi_constructor(void *buf, void *cdrarg, int kmflags)
struct sonode *so = buf;
struct vnode *vp;
+ so->so_direct = NULL;
+
so->so_nl7c_flags = 0;
so->so_nl7c_uri = NULL;
so->so_nl7c_rcv_mp = NULL;
@@ -606,6 +621,8 @@ socktpi_destructor(void *buf, void *cdrarg)
struct sonode *so = buf;
struct vnode *vp = SOTOV(so);
+ ASSERT(so->so_direct == NULL);
+
ASSERT(so->so_nl7c_flags == 0);
ASSERT(so->so_nl7c_uri == NULL);
ASSERT(so->so_nl7c_rcv_mp == NULL);
@@ -713,6 +730,12 @@ sockinit(int fstype, char *name)
goto failure;
}
+ error = sostr_init();
+ if (error != 0) {
+ err_str = NULL;
+ goto failure;
+ }
+
/*
* Create sonode caches. We create a special one for AF_UNIX so
* that we can track them for netstat(1m).
diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c
index d6f9ebb57f..e632e234e2 100644
--- a/usr/src/uts/common/fs/sockfs/socktpi.c
+++ b/usr/src/uts/common/fs/sockfs/socktpi.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -58,6 +58,7 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
+#include <sys/sodirect.h>
#include <netinet/in.h>
#include <sys/un.h>
#include <sys/strsun.h>
@@ -186,6 +187,9 @@ extern mblk_t *strsock_kssl_output(vnode_t *, mblk_t *, strwakeup_t *,
static int sotpi_unbind(struct sonode *, int);
+extern int sodput(sodirect_t *, mblk_t *);
+extern void sodwakeup(sodirect_t *);
+
/* TPI sockfs sonode operations */
static int sotpi_accept(struct sonode *, int, struct sonode **);
static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
@@ -2910,11 +2914,13 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
t_uscalar_t namelen;
int so_state = so->so_state; /* Snapshot */
ssize_t saved_resid;
- int error;
rval_t rval;
int flags;
clock_t timout;
int first;
+ int error = 0;
+ struct uio *suiop = NULL;
+ sodirect_t *sodp = so->so_direct;
flags = msg->msg_flags;
msg->msg_flags = 0;
@@ -3062,6 +3068,53 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
opflag = pflag;
first = 1;
+ if (uiop->uio_resid >= uioasync.mincnt &&
+ sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
+ uioasync.enabled && !(flags & MSG_PEEK) &&
+ !(so_state & SS_CANTRCVMORE)) {
+ /*
+ * Big enough I/O for uioa min setup and an sodirect socket
+ * and sodirect enabled and uioa enabled and I/O will be done
+ * and not EOF so initialize the sodirect_t uioa_t with "uiop".
+ */
+ mutex_enter(sodp->sod_lock);
+ if (!uioainit(uiop, &sodp->sod_uioa)) {
+ /*
+ * Successful uioainit() so the uio_t part of the
+ * uioa_t will be used for all uio_t work to follow,
+ * we save the original "uiop" in "suiop".
+ */
+ suiop = uiop;
+ uiop = (uio_t *)&sodp->sod_uioa;
+ /*
+ * Before returning to the caller the passed in uio_t
+ * "uiop" will be updated via a call to uioafini()
+ * below.
+ *
+ * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
+ * here as first we have to uioamove() any currently
+ * queued M_DATA mblk_t(s) so it will be done in
+ * kstrgetmsg().
+ */
+ }
+ /*
+ * In either uioainit() success or not case note the number
+ * of uio bytes the caller wants for sod framework and/or
+ * transport (e.g. TCP) strategy.
+ */
+ sodp->sod_want = uiop->uio_resid;
+ mutex_exit(sodp->sod_lock);
+ } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
+ /*
+ * No uioa but still using sodirect so note the number of
+ * uio bytes the caller wants for sodirect framework and/or
+ * transport (e.g. TCP) strategy.
+ *
+ * Note, sod_lock not held, only writer is in this function
+ * and only one thread at a time so not needed just to init.
+ */
+ sodp->sod_want = uiop->uio_resid;
+ }
retry:
saved_resid = uiop->uio_resid;
pri = 0;
@@ -3091,10 +3144,7 @@ retry:
eprintsoline(so, error);
break;
}
- mutex_enter(&so->so_lock);
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (error);
+ goto out;
}
/*
* For datagrams the MOREDATA flag is used to set MSG_TRUNC.
@@ -3137,9 +3187,7 @@ retry:
pflag = opflag | MSG_NOMARK;
goto retry;
}
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (0);
+ goto out_locked;
}
/* strsock_proto has already verified length and alignment */
@@ -3179,9 +3227,7 @@ retry:
pflag = opflag | MSG_NOMARK;
goto retry;
}
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (0);
+ goto out_locked;
}
case T_UNITDATA_IND: {
void *addr;
@@ -3207,7 +3253,7 @@ retry:
freemsg(mp);
error = EPROTO;
eprintsoline(so, error);
- goto err;
+ goto out;
}
if (so->so_family == AF_UNIX) {
/*
@@ -3236,7 +3282,7 @@ retry:
freemsg(mp);
error = EPROTO;
eprintsoline(so, error);
- goto err;
+ goto out;
}
if (so->so_family == AF_UNIX)
so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
@@ -3283,17 +3329,14 @@ retry:
msg->msg_namelen);
kmem_free(control, controllen);
eprintsoline(so, error);
- goto err;
+ goto out;
}
msg->msg_control = control;
msg->msg_controllen = controllen;
}
freemsg(mp);
- mutex_enter(&so->so_lock);
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (0);
+ goto out;
}
case T_OPTDATA_IND: {
struct T_optdata_req *tdr;
@@ -3322,7 +3365,7 @@ retry:
freemsg(mp);
error = EPROTO;
eprintsoline(so, error);
- goto err;
+ goto out;
}
ncontrollen = so_cmsglen(mp, opt, optlen,
@@ -3350,7 +3393,7 @@ retry:
freemsg(mp);
kmem_free(control, controllen);
eprintsoline(so, error);
- goto err;
+ goto out;
}
msg->msg_control = control;
msg->msg_controllen = controllen;
@@ -3382,9 +3425,7 @@ retry:
pflag = opflag | MSG_NOMARK;
goto retry;
}
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (0);
+ goto out_locked;
}
case T_EXDATA_IND: {
dprintso(so, 1,
@@ -3441,10 +3482,7 @@ retry:
eprintsoline(so, error);
}
#endif /* SOCK_DEBUG */
- mutex_enter(&so->so_lock);
- so_unlock_read(so); /* Clear SOREADLOCKED */
- mutex_exit(&so->so_lock);
- return (error);
+ goto out;
}
ASSERT(mp);
tpr = (union T_primitives *)mp->b_rptr;
@@ -3490,11 +3528,40 @@ retry:
freemsg(mp);
error = EPROTO;
eprintsoline(so, error);
- goto err;
+ goto out;
}
/* NOTREACHED */
-err:
+out:
mutex_enter(&so->so_lock);
+out_locked:
+ if (sodp != NULL) {
+ /* Finish any sodirect and uioa processing */
+ mutex_enter(sodp->sod_lock);
+ if (suiop != NULL) {
+ /* Finish any uioa_t processing */
+ int ret;
+
+ ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
+ ret = uioafini(suiop, (uioa_t *)uiop);
+ if (error == 0 && ret != 0) {
+ /* If no error yet, set it */
+ error = ret;
+ }
+ if ((mp = sodp->sod_uioafh) != NULL) {
+ sodp->sod_uioafh = NULL;
+ sodp->sod_uioaft = NULL;
+ freemsg(mp);
+ }
+ }
+ if (!(sodp->sod_state & SOD_WAKE_NOT)) {
+ /* Awoke */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NOT;
+ }
+ /* Last, clear sod_want value */
+ sodp->sod_want = 0;
+ mutex_exit(sodp->sod_lock);
+ }
so_unlock_read(so); /* Clear SOREADLOCKED */
mutex_exit(&so->so_lock);
return (error);
diff --git a/usr/src/uts/common/fs/sockfs/sockvnops.c b/usr/src/uts/common/fs/sockfs/sockvnops.c
index 6c122c679d..c85a76d6e6 100644
--- a/usr/src/uts/common/fs/sockfs/sockvnops.c
+++ b/usr/src/uts/common/fs/sockfs/sockvnops.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -239,6 +239,10 @@ socktpi_open(struct vnode **vpp, int flag, struct cred *cr,
* udp case, when some other module is autopushed
* above it, or for some reasons the expected module
* isn't purely D_MP (which is the main requirement).
+ *
+ * Else, SS_DIRECT is valid. If the read-side Q has
+ * _QSODIRECT set then and uioasync is enabled then
+ * set SS_SODIRECT to enable sodirect.
*/
if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
!(_OTHERQ(tq)->q_flag & _QDIRECT)) {
@@ -255,6 +259,10 @@ socktpi_open(struct vnode **vpp, int flag, struct cred *cr,
return (error);
}
}
+ } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) &&
+ uioasync.enabled) {
+ /* Enable sodirect */
+ so->so_state |= SS_SODIRECT;
}
}
} else {
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index aa5ba3a075..26e1b12f4e 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -37,6 +37,7 @@ extern "C" {
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
+#include <sys/sodirect.h>
#include <sys/multidata.h>
#include <sys/md5.h>
#include <inet/common.h>
@@ -598,6 +599,13 @@ typedef struct tcp_s {
*/
boolean_t tcp_flow_stopped;
+ /*
+ * tcp_sodirect is used by tcp on the receive side to push mblk_t(s)
+ * directly to sockfs. Also, to schedule asynchronous copyout directly
+ * to a pending user-land uio buffer.
+ */
+ sodirect_t *tcp_sodirect;
+
#ifdef DEBUG
pc_t tcmp_stk[15];
#endif
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 470f6cad1d..766a7db59e 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -66,6 +66,8 @@ const char tcp_version[] = "%Z%%M% %I% %E% SMI";
#include <sys/isa_defs.h>
#include <sys/md5.h>
#include <sys/random.h>
+#include <sys/sodirect.h>
+#include <sys/uio.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netinet/ip6.h>
@@ -216,6 +218,23 @@ const char tcp_version[] = "%Z%%M% %I% %E% SMI";
* behaviour. Once tcp_issocket is unset, its never set for the
* life of that connection.
*
+ * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT)
+ * two consoldiation private KAPIs are used to enqueue M_DATA mblk_t's
+ * directly to the socket (sodirect) and start an asynchronous copyout
+ * to a user-land receive-side buffer (uioa) when a blocking socket read
+ * (e.g. read, recv, ...) is pending.
+ *
+ * This is accomplished when tcp_issocket is set and tcp_sodirect is not
+ * NULL so points to an sodirect_t and if marked enabled then we enqueue
+ * all mblk_t's directly to the socket.
+ *
+ * Further, if the sodirect_t sod_uioa and if marked enabled (due to a
+ * blocking socket read, e.g. user-land read, recv, ...) then an asynchronous
+ * copyout will be started directly to the user-land uio buffer. Also, as we
+ * have a pending read, TCP's push logic can take into account the number of
+ * bytes to be received and only awake the blocked read()er when the uioa_t
+ * byte count has been satisfied.
+ *
* IPsec notes :
*
* Since a packet is always executed on the correct TCP perimeter
@@ -246,6 +265,37 @@ squeue_func_t tcp_squeue_close_proc;
squeue_func_t tcp_squeue_wput_proc;
/*
+ * Macros for sodirect:
+ *
+ * SOD_PTR_ENTER(tcp, sodp) - for the tcp_t pointer "tcp" set the
+ * sodirect_t pointer "sodp" to the socket/tcp shared sodirect_t
+ * if it exists and is enabled, else to NULL. Note, in the current
+ * sodirect implementation the sod_lock must not be held across any
+ * STREAMS call (e.g. putnext) else a "recursive mutex_enter" PANIC
+ * will result as sod_lock is the streamhead stdata.sd_lock.
+ *
+ * SOD_NOT_ENABLED(tcp) - return true if not a sodirect tcp_t or the
+ * sodirect_t isn't enabled, usefull for ASSERT()ing that a recieve
+ * side tcp code path dealing with a tcp_rcv_list or putnext() isn't
+ * being used when sodirect code paths should be.
+ */
+
+#define SOD_PTR_ENTER(tcp, sodp) \
+ (sodp) = (tcp)->tcp_sodirect; \
+ \
+ if ((sodp) != NULL) { \
+ mutex_enter((sodp)->sod_lock); \
+ if (!((sodp)->sod_state & SOD_ENABLED)) { \
+ mutex_exit((sodp)->sod_lock); \
+ (sodp) = NULL; \
+ } \
+ }
+
+#define SOD_NOT_ENABLED(tcp) \
+ ((tcp)->tcp_sodirect == NULL || \
+ !((tcp)->tcp_sodirect->sod_state & SOD_ENABLED))
+
+/*
* This controls how tiny a write must be before we try to copy it
* into the the mblk on the tail of the transmit queue. Not much
* speedup is observed for values larger than sixteen. Zero will
@@ -3808,6 +3858,7 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
mblk_t *mp;
queue_t *q;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ sodirect_t *sodp;
TCP_CLD_STAT(tag);
@@ -3872,6 +3923,13 @@ tcp_clean_death(tcp_t *tcp, int err, uint8_t tag)
return (-1);
}
+ /* If sodirect, not anymore */
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ tcp->tcp_sodirect = NULL;
+ mutex_exit(sodp->sod_lock);
+ }
+
q = tcp->tcp_rq;
/* Trash all inbound data */
@@ -4236,6 +4294,11 @@ tcp_close_output(void *arg, mblk_t *mp, void *arg2)
*/
/* FALLTHRU */
default:
+ if (tcp->tcp_sodirect != NULL) {
+ /* Ok, no more sodirect */
+ tcp->tcp_sodirect = NULL;
+ }
+
if (tcp->tcp_fused)
tcp_unfuse(tcp);
@@ -6381,6 +6444,15 @@ tcp_connect(tcp_t *tcp, mblk_t *mp)
*(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
}
+ if (tcp->tcp_issocket) {
+ /*
+ * TCP is _D_SODIRECT and sockfs is directly above so save
+ * the shared sonode sodirect_t pointer (if any) to enable
+ * TCP sodirect.
+ */
+ tcp->tcp_sodirect = SOD_QTOSODP(tcp->tcp_rq);
+ }
+
switch (tcp->tcp_state) {
case TCPS_IDLE:
/*
@@ -8190,6 +8262,9 @@ tcp_reinit_values(tcp)
ASSERT(!tcp->tcp_kssl_pending);
PRESERVE(tcp->tcp_kssl_ent);
+ /* Sodirect */
+ tcp->tcp_sodirect = NULL;
+
tcp->tcp_closemp_used = B_FALSE;
#ifdef DEBUG
@@ -8282,6 +8357,9 @@ tcp_init_values(tcp_t *tcp)
tcp->tcp_fuse_rcv_unread_hiwater = 0;
tcp->tcp_fuse_rcv_unread_cnt = 0;
+ /* Sodirect */
+ tcp->tcp_sodirect = NULL;
+
/* Initialize the header template */
if (tcp->tcp_ipversion == IPV4_VERSION) {
err = tcp_header_init_ipv4(tcp);
@@ -11680,6 +11758,9 @@ tcp_rcv_drain(queue_t *q, tcp_t *tcp)
if (tcp->tcp_listener != NULL)
return (ret);
+ /* Can't be sodirect enabled */
+ ASSERT(SOD_NOT_ENABLED(tcp));
+
/*
* Handle two cases here: we are currently fused or we were
* previously fused and have some urgent data to be delivered
@@ -11779,6 +11860,216 @@ tcp_rcv_enqueue(tcp_t *tcp, mblk_t *mp, uint_t seg_len)
}
/*
+ * The tcp_rcv_sod_XXX() functions enqueue data directly to the socket
+ * above, in addition when uioa is enabled schedule an asynchronous uio
+ * prior to enqueuing. They implement the combinhed semantics of the
+ * tcp_rcv_XXX() functions, tcp_rcv_list push logic, and STREAMS putnext()
+ * canputnext(), i.e. flow-control with backenable.
+ *
+ * tcp_sod_wakeup() is called where tcp_rcv_drain() would be called in the
+ * non sodirect connection but as there are no tcp_tcv_list mblk_t's we deal
+ * with the rcv_wnd and push timer and call the sodirect wakeup function.
+ *
+ * Must be called with sodp->sod_lock held and will return with the lock
+ * released.
+ */
+static uint_t
+tcp_rcv_sod_wakeup(tcp_t *tcp, sodirect_t *sodp)
+{
+ queue_t *q = tcp->tcp_rq;
+ uint_t thwin;
+ tcp_stack_t *tcps = tcp->tcp_tcps;
+ uint_t ret = 0;
+
+ /* Can't be an eager connection */
+ ASSERT(tcp->tcp_listener == NULL);
+
+ /* Caller must have lock held */
+ ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+ /* Sodirect mode so must not be a tcp_rcv_list */
+ ASSERT(tcp->tcp_rcv_list == NULL);
+
+ if (SOD_QFULL(sodp)) {
+ /* Q is full, mark Q for need backenable */
+ SOD_QSETBE(sodp);
+ }
+ /* Last advertised rwnd, i.e. rwnd last sent in a packet */
+ thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
+ << tcp->tcp_rcv_ws;
+ /* This is peer's calculated send window (our available rwnd). */
+ thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+ /*
+ * Increase the receive window to max. But we need to do receiver
+ * SWS avoidance. This means that we need to check the increase of
+ * of receive window is at least 1 MSS.
+ */
+ if (!SOD_QFULL(sodp) && (q->q_hiwat - thwin >= tcp->tcp_mss)) {
+ /*
+ * If the window that the other side knows is less than max
+ * deferred acks segments, send an update immediately.
+ */
+ if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) {
+ BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
+ ret = TH_ACK_NEEDED;
+ }
+ tcp->tcp_rwnd = q->q_hiwat;
+ }
+
+ if (!SOD_QEMPTY(sodp)) {
+ /* Wakeup to socket */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_DONE;
+ (sodp->sod_wakeup)(sodp);
+ /* wakeup() does the mutex_ext() */
+ } else {
+ /* Q is empty, no need to wake */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NOT;
+ mutex_exit(sodp->sod_lock);
+ }
+
+ /* No need for the push timer now. */
+ if (tcp->tcp_push_tid != 0) {
+ (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
+ tcp->tcp_push_tid = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Called where tcp_rcv_enqueue()/putnext(RD(q)) would be. For M_DATA
+ * mblk_t's if uioa enabled then start a uioa asynchronous copy directly
+ * to the user-land buffer and flag the mblk_t as such.
+ *
+ * Also, handle tcp_rwnd.
+ */
+uint_t
+tcp_rcv_sod_enqueue(tcp_t *tcp, sodirect_t *sodp, mblk_t *mp, uint_t seg_len)
+{
+ uioa_t *uioap = &sodp->sod_uioa;
+ boolean_t qfull;
+ uint_t thwin;
+
+ /* Can't be an eager connection */
+ ASSERT(tcp->tcp_listener == NULL);
+
+ /* Caller must have lock held */
+ ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+ /* Sodirect mode so must not be a tcp_rcv_list */
+ ASSERT(tcp->tcp_rcv_list == NULL);
+
+ /* Passed in segment length must be equal to mblk_t chain data size */
+ ASSERT(seg_len == msgdsize(mp));
+
+ if (DB_TYPE(mp) != M_DATA) {
+ /* Only process M_DATA mblk_t's */
+ goto enq;
+ }
+ if (uioap->uioa_state & UIOA_ENABLED) {
+ /* Uioa is enabled */
+ mblk_t *mp1 = mp;
+
+ if (seg_len > uioap->uio_resid) {
+ /*
+ * There isn't enough uio space for the mblk_t chain
+ * so disable uioa such that this and any additional
+ * mblk_t data is handled by the socket and schedule
+ * the socket for wakeup to finish this uioa.
+ */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+ if (sodp->sod_state & SOD_WAKE_NOT) {
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+ }
+ goto enq;
+ }
+ do {
+ uint32_t len = MBLKL(mp1);
+
+ if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) {
+ /* Scheduled, mark dblk_t as such */
+ DB_FLAGS(mp1) |= DBLK_UIOA;
+ } else {
+ /* Error, turn off async processing */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+ break;
+ }
+ } while ((mp1 = mp1->b_cont) != NULL);
+
+ if (mp1 != NULL || uioap->uio_resid == 0) {
+ /*
+ * Not all mblk_t(s) uioamoved (error) or all uio
+ * space has been consumed so schedule the socket
+ * for wakeup to finish this uio.
+ */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+ }
+ } else if (uioap->uioa_state & UIOA_FINI) {
+ /*
+ * Post UIO_ENABLED waiting for socket to finish processing
+ * so just enqueue and update tcp_rwnd.
+ */
+ if (SOD_QFULL(sodp))
+ tcp->tcp_rwnd -= seg_len;
+ } else if (sodp->sod_want > 0) {
+ /*
+ * Uioa isn't enabled but sodirect has a pending read().
+ */
+ if (SOD_QCNT(sodp) + seg_len >= sodp->sod_want) {
+ if (sodp->sod_state & SOD_WAKE_NOT) {
+ /* Schedule socket for wakeup */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+ }
+ tcp->tcp_rwnd -= seg_len;
+ }
+ } else if (SOD_QCNT(sodp) + seg_len >= tcp->tcp_rq->q_hiwat >> 3) {
+ /*
+ * No pending sodirect read() so used the default
+ * TCP push logic to guess that a push is needed.
+ */
+ if (sodp->sod_state & SOD_WAKE_NOT) {
+ /* Schedule socket for wakeup */
+ sodp->sod_state &= SOD_WAKE_CLR;
+ sodp->sod_state |= SOD_WAKE_NEED;
+ }
+ tcp->tcp_rwnd -= seg_len;
+ } else {
+ /* Just update tcp_rwnd */
+ tcp->tcp_rwnd -= seg_len;
+ }
+enq:
+ qfull = SOD_QFULL(sodp);
+
+ (sodp->sod_enqueue)(sodp, mp);
+
+ if (! qfull && SOD_QFULL(sodp)) {
+ /* Wasn't QFULL, now QFULL, need back-enable */
+ SOD_QSETBE(sodp);
+ }
+
+ /*
+ * Check to see if remote avail swnd < mss due to delayed ACK,
+ * first get advertised rwnd.
+ */
+ thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win));
+ /* Minus delayed ACK count */
+ thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+ if (thwin < tcp->tcp_mss) {
+ /* Remote avail swnd < mss, need ACK now */
+ return (TH_ACK_NEEDED);
+ }
+
+ return (0);
+}
+
+/*
* DEFAULT TCP ENTRY POINT via squeue on READ side.
*
* This is the default entry function into TCP on the read side. TCP is
@@ -14976,13 +15267,39 @@ est:
tcp_rcv_enqueue(tcp, mp, seg_len);
}
} else {
+ sodirect_t *sodp = tcp->tcp_sodirect;
+
+ /*
+ * If an sodirect connection and an enabled sodirect_t then
+ * sodp will be set to point to the tcp_t/sonode_t shared
+ * sodirect_t and the sodirect_t's lock will be held.
+ */
+ if (sodp != NULL) {
+ mutex_enter(sodp->sod_lock);
+ if (!(sodp->sod_state & SOD_ENABLED)) {
+ mutex_exit(sodp->sod_lock);
+ sodp = NULL;
+ } else if (tcp->tcp_kssl_ctx != NULL &&
+ DB_TYPE(mp) == M_DATA) {
+ mutex_exit(sodp->sod_lock);
+ sodp = NULL;
+ }
+ }
if (mp->b_datap->db_type != M_DATA ||
(flags & TH_MARKNEXT_NEEDED)) {
- if (tcp->tcp_rcv_list != NULL) {
+ if (sodp != NULL) {
+ if (!SOD_QEMPTY(sodp) &&
+ (sodp->sod_state & SOD_WAKE_NOT)) {
+ flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() did the mutex_exit() */
+ mutex_enter(sodp->sod_lock);
+ }
+ } else if (tcp->tcp_rcv_list != NULL) {
flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
}
ASSERT(tcp->tcp_rcv_list == NULL ||
tcp->tcp_fused_sigurg);
+
if (flags & TH_MARKNEXT_NEEDED) {
#ifdef DEBUG
(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
@@ -15001,10 +15318,42 @@ est:
mblk_t *, mp);
tcp_kssl_input(tcp, mp);
} else {
+ if (sodp) {
+ /*
+ * Done with sodirect, use putnext
+ * to push this non M_DATA headed
+ * mblk_t chain.
+ */
+ mutex_exit(sodp->sod_lock);
+ }
putnext(tcp->tcp_rq, mp);
if (!canputnext(tcp->tcp_rq))
tcp->tcp_rwnd -= seg_len;
}
+ } else if ((tcp->tcp_kssl_ctx != NULL) &&
+ (DB_TYPE(mp) == M_DATA)) {
+ /* Do SSL processing first */
+ DTRACE_PROBE1(kssl_mblk__ksslinput_data2,
+ mblk_t *, mp);
+ tcp_kssl_input(tcp, mp);
+ } else if (sodp != NULL) {
+ /*
+ * Sodirect so all mblk_t's are queued on the
+ * socket directly, check for wakeup of blocked
+ * reader (if any), and last if flow-controled.
+ */
+ flags |= tcp_rcv_sod_enqueue(tcp, sodp, mp, seg_len);
+ if ((sodp->sod_state & SOD_WAKE_NEED) ||
+ (flags & (TH_PUSH|TH_FIN))) {
+ flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() did the mutex_exit() */
+ } else {
+ if (SOD_QFULL(sodp)) {
+ /* Q is full, need backenable */
+ SOD_QSETBE(sodp);
+ }
+ mutex_exit(sodp->sod_lock);
+ }
} else if ((flags & (TH_PUSH|TH_FIN)) ||
tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_rq->q_hiwat >> 3) {
if (tcp->tcp_rcv_list != NULL) {
@@ -15024,41 +15373,33 @@ est:
tcp_rcv_enqueue(tcp, mp, seg_len);
flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
} else {
- /* Does this need SSL processing first? */
- if ((tcp->tcp_kssl_ctx != NULL) &&
- (DB_TYPE(mp) == M_DATA)) {
- DTRACE_PROBE1(
- kssl_mblk__ksslinput_data2,
- mblk_t *, mp);
- tcp_kssl_input(tcp, mp);
- } else {
- putnext(tcp->tcp_rq, mp);
- if (!canputnext(tcp->tcp_rq))
- tcp->tcp_rwnd -= seg_len;
- }
+ putnext(tcp->tcp_rq, mp);
+ if (!canputnext(tcp->tcp_rq))
+ tcp->tcp_rwnd -= seg_len;
}
} else {
/*
* Enqueue all packets when processing an mblk
* from the co queue and also enqueue normal packets.
- * For packets which belong to SSL stream do SSL
- * processing first.
*/
- if ((tcp->tcp_kssl_ctx != NULL) &&
- (DB_TYPE(mp) == M_DATA)) {
- DTRACE_PROBE1(kssl_mblk__tcpksslin3,
- mblk_t *, mp);
- tcp_kssl_input(tcp, mp);
- } else {
- tcp_rcv_enqueue(tcp, mp, seg_len);
- }
+ tcp_rcv_enqueue(tcp, mp, seg_len);
}
/*
* Make sure the timer is running if we have data waiting
* for a push bit. This provides resiliency against
* implementations that do not correctly generate push bits.
+ *
+ * Note, for sodirect if Q isn't empty and there's not a
+ * pending wakeup then we need a timer. Also note that sodp
+ * is assumed to be still valid after exit()ing the sod_lock
+ * above and while the SOD state can change it can only change
+ * such that the Q is empty now even though data was added
+ * above.
*/
- if (tcp->tcp_rcv_list != NULL && tcp->tcp_push_tid == 0) {
+ if (((sodp != NULL && !SOD_QEMPTY(sodp) &&
+ (sodp->sod_state & SOD_WAKE_NOT)) ||
+ (sodp == NULL && tcp->tcp_rcv_list != NULL)) &&
+ tcp->tcp_push_tid == 0) {
/*
* The connection may be closed at this point, so don't
* do anything for a detached tcp.
@@ -15070,6 +15411,7 @@ est:
tcps->tcps_push_timer_interval));
}
}
+
xmit_check:
/* Is there anything left to do? */
ASSERT(!(flags & TH_MARKNEXT_NEEDED));
@@ -15145,13 +15487,26 @@ ack_check:
/*
* Send up any queued data and then send the mark message
*/
- if (tcp->tcp_rcv_list != NULL) {
- flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
- }
- ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+ sodirect_t *sodp;
+
+ SOD_PTR_ENTER(tcp, sodp);
mp1 = tcp->tcp_urp_mark_mp;
tcp->tcp_urp_mark_mp = NULL;
+ if (sodp != NULL) {
+
+ ASSERT(tcp->tcp_rcv_list == NULL);
+
+ flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() does the mutex_exit() */
+ } else if (tcp->tcp_rcv_list != NULL) {
+ flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+
+ ASSERT(tcp->tcp_rcv_list == NULL ||
+ tcp->tcp_fused_sigurg);
+
+ }
+ putnext(tcp->tcp_rq, mp1);
#ifdef DEBUG
(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
"tcp_rput: sending zero-length %s %s",
@@ -15159,7 +15514,6 @@ ack_check:
"MSGNOTMARKNEXT"),
tcp_display(tcp, NULL, DISP_PORT_ONLY));
#endif /* DEBUG */
- putnext(tcp->tcp_rq, mp1);
flags &= ~TH_SEND_URP_MARK;
}
if (flags & TH_ACK_NEEDED) {
@@ -15197,14 +15551,32 @@ ack_check:
* In the eager case tcp_rsrv will do this when run
* after tcp_accept is done.
*/
+ sodirect_t *sodp;
+
ASSERT(tcp->tcp_listener == NULL);
- if (tcp->tcp_rcv_list != NULL) {
+
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ /* No more sodirect */
+ tcp->tcp_sodirect = NULL;
+ if (!SOD_QEMPTY(sodp)) {
+ /* Mblk(s) to process, notify */
+ flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() does the mutex_exit() */
+ } else {
+ /* Nothing to process */
+ mutex_exit(sodp->sod_lock);
+ }
+ } else if (tcp->tcp_rcv_list != NULL) {
/*
* Push any mblk(s) enqueued from co processing.
*/
flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+
+ ASSERT(tcp->tcp_rcv_list == NULL ||
+ tcp->tcp_fused_sigurg);
}
- ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+
if ((mp1 = mi_tpi_ordrel_ind()) != NULL) {
tcp->tcp_ordrel_done = B_TRUE;
putnext(tcp->tcp_rq, mp1);
@@ -15974,6 +16346,8 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
queue_t *q = tcp->tcp_rq;
uint_t thwin;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ sodirect_t *sodp;
+ boolean_t fc;
freeb(mp);
@@ -16024,7 +16398,27 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
return;
}
- if (canputnext(q)) {
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ /* An sodirect connection */
+ if (SOD_QFULL(sodp)) {
+ /* Flow-controlled, need another back-enable */
+ fc = B_TRUE;
+ SOD_QSETBE(sodp);
+ } else {
+ /* Not flow-controlled */
+ fc = B_FALSE;
+ }
+ mutex_exit(sodp->sod_lock);
+ } else if (canputnext(q)) {
+ /* STREAMS, not flow-controlled */
+ fc = B_FALSE;
+ } else {
+ /* STREAMS, flow-controlled */
+ fc = B_TRUE;
+ }
+ if (!fc) {
+ /* Not flow-controlled, open rwnd */
tcp->tcp_rwnd = q->q_hiwat;
thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
<< tcp->tcp_rcv_ws;
@@ -16043,13 +16437,32 @@ tcp_rsrv_input(void *arg, mblk_t *mp, void *arg2)
BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
}
}
+
/* Handle a failure to allocate a T_ORDREL_IND here */
if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
ASSERT(tcp->tcp_listener == NULL);
- if (tcp->tcp_rcv_list != NULL) {
- (void) tcp_rcv_drain(q, tcp);
+
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ /* No more sodirect */
+ tcp->tcp_sodirect = NULL;
+ if (!SOD_QEMPTY(sodp)) {
+ /* Notify mblk(s) to process */
+ (void) tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() does the mutex_exit() */
+ } else {
+ /* Nothing to process */
+ mutex_exit(sodp->sod_lock);
+ }
+ } else if (tcp->tcp_rcv_list != NULL) {
+ /*
+ * Push any mblk(s) enqueued from co processing.
+ */
+ (void) tcp_rcv_drain(tcp->tcp_rq, tcp);
+ ASSERT(tcp->tcp_rcv_list == NULL ||
+ tcp->tcp_fused_sigurg);
}
- ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+
mp = mi_tpi_ordrel_ind();
if (mp) {
tcp->tcp_ordrel_done = B_TRUE;
@@ -18097,6 +18510,8 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
*/
if (tcp->tcp_rcv_list != NULL) {
/* We drain directly in case of fused tcp loopback */
+ sodirect_t *sodp;
+
if (!tcp->tcp_fused && canputnext(q)) {
tcp->tcp_rwnd = q->q_hiwat;
thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
@@ -18112,7 +18527,26 @@ tcp_accept_finish(void *arg, mblk_t *mp, void *arg2)
}
}
- (void) tcp_rcv_drain(q, tcp);
+
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ /* Sodirect, move from rcv_list */
+ ASSERT(!tcp->tcp_fused);
+ while ((mp = tcp->tcp_rcv_list) != NULL) {
+ tcp->tcp_rcv_list = mp->b_next;
+ mp->b_next = NULL;
+ (void) tcp_rcv_sod_enqueue(tcp, sodp, mp,
+ msgdsize(mp));
+ }
+ tcp->tcp_rcv_last_head = NULL;
+ tcp->tcp_rcv_last_tail = NULL;
+ tcp->tcp_rcv_cnt = 0;
+ (void) tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() did the mutex_exit() */
+ } else {
+ /* Not sodirect, drain */
+ (void) tcp_rcv_drain(q, tcp);
+ }
/*
* For fused tcp loopback, back-enable peer endpoint
@@ -18304,6 +18738,21 @@ tcp_wput_accept(queue_t *q, mblk_t *mp)
listener = eager->tcp_listener;
eager->tcp_issocket = B_TRUE;
+ /*
+ * TCP is _D_SODIRECT and sockfs is directly above so
+ * save shared sodirect_t pointer (if any).
+ *
+ * If tcp_fused and sodirect enabled disable it.
+ */
+ eager->tcp_sodirect = SOD_QTOSODP(eager->tcp_rq);
+ if (eager->tcp_fused && eager->tcp_sodirect != NULL) {
+ /* Fused, disable sodirect */
+ mutex_enter(eager->tcp_sodirect->sod_lock);
+ SOD_DISABLE(eager->tcp_sodirect);
+ mutex_exit(eager->tcp_sodirect->sod_lock);
+ eager->tcp_sodirect = NULL;
+ }
+
econnp->conn_zoneid = listener->tcp_connp->conn_zoneid;
econnp->conn_allzones = listener->tcp_connp->conn_allzones;
ASSERT(econnp->conn_netstack ==
@@ -22140,6 +22589,7 @@ tcp_wput_ioctl(void *arg, mblk_t *mp, void *arg2)
tcp_fuse_disable_pair(tcp, B_FALSE);
}
tcp->tcp_issocket = B_FALSE;
+ tcp->tcp_sodirect = NULL;
TCP_STAT(tcps, tcp_sock_fallback);
DB_TYPE(mp) = M_IOCACK;
@@ -23420,6 +23870,8 @@ tcp_push_timer(void *arg)
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ uint_t flags;
+ sodirect_t *sodp;
TCP_DBGSTAT(tcps, tcp_push_timer_cnt);
@@ -23431,9 +23883,17 @@ tcp_push_timer(void *arg)
*/
TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
tcp->tcp_push_tid = 0;
- if ((tcp->tcp_rcv_list != NULL) &&
- (tcp_rcv_drain(tcp->tcp_rq, tcp) == TH_ACK_NEEDED))
+
+ SOD_PTR_ENTER(tcp, sodp);
+ if (sodp != NULL) {
+ flags = tcp_rcv_sod_wakeup(tcp, sodp);
+ /* sod_wakeup() does the mutex_exit() */
+ } else if (tcp->tcp_rcv_list != NULL) {
+ flags = tcp_rcv_drain(tcp->tcp_rq, tcp);
+ }
+ if (flags == TH_ACK_NEEDED)
tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
+
TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp);
}
diff --git a/usr/src/uts/common/inet/tcp/tcp6ddi.c b/usr/src/uts/common/inet/tcp/tcp6ddi.c
index e724bdd022..1eda50d9a6 100644
--- a/usr/src/uts/common/inet/tcp/tcp6ddi.c
+++ b/usr/src/uts/common/inet/tcp/tcp6ddi.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -40,7 +40,7 @@
* for TCP Fusion (loopback); this is why we don't define
* D_SYNCSTR here.
*/
-#define INET_DEVMTFLAGS (D_MP|_D_DIRECT)
+#define INET_DEVMTFLAGS (D_MP|_D_DIRECT|_D_SODIRECT)
#include "../inetddi.c"
diff --git a/usr/src/uts/common/inet/tcp/tcp_fusion.c b/usr/src/uts/common/inet/tcp/tcp_fusion.c
index 2503a13e29..5e2a8b23cb 100644
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c
@@ -287,6 +287,20 @@ tcp_fuse(tcp_t *tcp, uchar_t *iphdr, tcph_t *tcph)
if ((mp = allocb(sizeof (*stropt), BPRI_HI)) == NULL)
goto failed;
+ /* If either tcp or peer_tcp sodirect enabled then disable */
+ if (tcp->tcp_sodirect != NULL) {
+ mutex_enter(tcp->tcp_sodirect->sod_lock);
+ SOD_DISABLE(tcp->tcp_sodirect);
+ mutex_exit(tcp->tcp_sodirect->sod_lock);
+ tcp->tcp_sodirect = NULL;
+ }
+ if (peer_tcp->tcp_sodirect != NULL) {
+ mutex_enter(peer_tcp->tcp_sodirect->sod_lock);
+ SOD_DISABLE(peer_tcp->tcp_sodirect);
+ mutex_exit(peer_tcp->tcp_sodirect->sod_lock);
+ peer_tcp->tcp_sodirect = NULL;
+ }
+
/* Fuse both endpoints */
peer_tcp->tcp_loopback_peer = tcp;
tcp->tcp_loopback_peer = peer_tcp;
diff --git a/usr/src/uts/common/inet/tcp/tcpddi.c b/usr/src/uts/common/inet/tcp/tcpddi.c
index 436786b846..dc08ad23c4 100644
--- a/usr/src/uts/common/inet/tcp/tcpddi.c
+++ b/usr/src/uts/common/inet/tcp/tcpddi.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -44,7 +44,7 @@
* for TCP Fusion (loopback); this is why we don't define
* D_SYNCSTR here.
*/
-#define INET_DEVMTFLAGS (D_MP|_D_DIRECT)
+#define INET_DEVMTFLAGS (D_MP|_D_DIRECT|_D_SODIRECT)
#include "../inetddi.c"
diff --git a/deleted_files/usr/src/uts/common/io/dcopy.c b/usr/src/uts/common/io/dcopy.c
index 2dc5a311bc..02163c7e9e 100644
--- a/deleted_files/usr/src/uts/common/io/dcopy.c
+++ b/usr/src/uts/common/io/dcopy.c
@@ -689,6 +689,10 @@ dcopy_device_register(void *device_private, dcopy_device_info_t *info,
mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);
*handle = device;
+
+ /* last call-back into kernel for dcopy KAPI enabled */
+ uioa_dcopy_enable();
+
return (DCOPY_SUCCESS);
registerfail_alloc:
@@ -723,6 +727,8 @@ dcopy_device_unregister(dcopy_device_handle_t *handle)
dcopy_device_handle_t device;
boolean_t device_busy;
+ /* first call-back into kernel for dcopy KAPI disable */
+ uioa_dcopy_disable();
device = *handle;
device_busy = B_FALSE;
diff --git a/usr/src/uts/common/io/stream.c b/usr/src/uts/common/io/stream.c
index 28a9a4928f..90fbf3cbf1 100644
--- a/usr/src/uts/common/io/stream.c
+++ b/usr/src/uts/common/io/stream.c
@@ -23,7 +23,7 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -318,8 +318,8 @@ streams_msg_init(void)
int offset;
mblk_cache = kmem_cache_create("streams_mblk",
- sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
- mblk_kmem_flags);
+ sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
+ mblk_kmem_flags);
for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
@@ -330,7 +330,7 @@ streams_msg_init(void)
*/
tot_size = size + sizeof (dblk_t);
ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
- < PAGESIZE);
+ < PAGESIZE);
ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
} else {
@@ -346,9 +346,9 @@ streams_msg_init(void)
(void) sprintf(name, "streams_dblk_%ld", size);
cp = kmem_cache_create(name, tot_size,
- DBLK_CACHE_ALIGN, dblk_constructor,
- dblk_destructor, NULL,
- (void *)(size), NULL, dblk_kmem_flags);
+ DBLK_CACHE_ALIGN, dblk_constructor,
+ dblk_destructor, NULL,
+ (void *)(size), NULL, dblk_kmem_flags);
while (lastsize <= size) {
dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
@@ -357,13 +357,13 @@ streams_msg_init(void)
}
dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
- sizeof (dblk_t), DBLK_CACHE_ALIGN,
- dblk_esb_constructor, dblk_destructor, NULL,
- (void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
+ sizeof (dblk_t), DBLK_CACHE_ALIGN,
+ dblk_esb_constructor, dblk_destructor, NULL,
+ (void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
fthdr_cache = kmem_cache_create("streams_fthdr",
- sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
+ sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
ftblk_cache = kmem_cache_create("streams_ftblk",
- sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
+ sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
/* Initialize Multidata caches */
mmd_init();
@@ -545,8 +545,8 @@ dblk_lastfree(mblk_t *mp, dblk_t *dbp)
dbp->db_struioflag = 0;
dbp->db_struioun.cksum.flags = 0;
- /* and the COOKED flag */
- dbp->db_flags &= ~DBLK_COOKED;
+ /* and the COOKED and/or UIOA flag(s) */
+ dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
kmem_cache_free(dbp->db_cache, dbp);
}
@@ -739,7 +739,7 @@ desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
*/
if (!str_ftnever) {
mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
- frp, dblk_lastfree_desb, KM_NOSLEEP);
+ frp, dblk_lastfree_desb, KM_NOSLEEP);
if (mp != NULL)
STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
@@ -857,7 +857,7 @@ bcache_create(char *name, size_t size, uint_t align)
(void) sprintf(buffer, "%s_dblk_cache", name);
bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
- NULL, (void *)bcp, NULL, 0);
+ NULL, (void *)bcp, NULL, 0);
return (bcp);
}
@@ -1584,7 +1584,7 @@ adjmsg(mblk_t *mp, ssize_t len)
*/
if ((save_bp != mp) &&
- (save_bp->b_wptr == save_bp->b_rptr)) {
+ (save_bp->b_wptr == save_bp->b_rptr)) {
bcont = save_bp->b_cont;
freeb(save_bp);
prev_bp->b_cont = bcont;
@@ -2129,8 +2129,8 @@ flushband(queue_t *q, unsigned char pri, int flag)
nmp = mp->b_next;
mp->b_next = mp->b_prev = NULL;
if ((mp->b_band == 0) &&
- ((flag == FLUSHALL) ||
- datamsg(mp->b_datap->db_type)))
+ ((flag == FLUSHALL) ||
+ datamsg(mp->b_datap->db_type)))
freemsg(mp);
else
(void) putq(q, mp);
@@ -2242,7 +2242,7 @@ bcanput(queue_t *q, unsigned char pri)
q->q_flag |= QWANTW;
mutex_exit(QLOCK(q));
TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
- "bcanput:%p %X %d", q, pri, 0);
+ "bcanput:%p %X %d", q, pri, 0);
return (0);
}
} else { /* pri != 0 */
@@ -2252,7 +2252,7 @@ bcanput(queue_t *q, unsigned char pri)
*/
mutex_exit(QLOCK(q));
TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
- "bcanput:%p %X %d", q, pri, 1);
+ "bcanput:%p %X %d", q, pri, 1);
return (1);
}
qbp = q->q_bandp;
@@ -2262,13 +2262,13 @@ bcanput(queue_t *q, unsigned char pri)
qbp->qb_flag |= QB_WANTW;
mutex_exit(QLOCK(q));
TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
- "bcanput:%p %X %d", q, pri, 0);
+ "bcanput:%p %X %d", q, pri, 0);
return (0);
}
}
mutex_exit(QLOCK(q));
TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
- "bcanput:%p %X %d", q, pri, 1);
+ "bcanput:%p %X %d", q, pri, 1);
return (1);
}
@@ -2847,7 +2847,7 @@ putnextctl1(queue_t *q, int type, int param)
mblk_t *bp;
if ((datamsg(type) && (type != M_DELAY)) ||
- ((bp = allocb_tryhard(1)) == NULL))
+ ((bp = allocb_tryhard(1)) == NULL))
return (0);
bp->b_datap->db_type = (unsigned char)type;
@@ -2864,7 +2864,7 @@ putnextctl(queue_t *q, int type)
mblk_t *bp;
if ((datamsg(type) && (type != M_DELAY)) ||
- ((bp = allocb_tryhard(0)) == NULL))
+ ((bp = allocb_tryhard(0)) == NULL))
return (0);
bp->b_datap->db_type = (unsigned char)type;
diff --git a/usr/src/uts/common/os/move.c b/usr/src/uts/common/os/move.c
index d5c63b167e..163a4cc2e5 100644
--- a/usr/src/uts/common/os/move.c
+++ b/usr/src/uts/common/os/move.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,6 +44,16 @@
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/errno.h>
+#include <sys/vmsystm.h>
+#include <sys/cmn_err.h>
+#include <vm/as.h>
+#include <vm/page.h>
+
+#include <sys/dcopy.h>
+
+int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */
+#define UIO_DCOPY_CHANNEL 0
+#define UIO_DCOPY_CMD 1
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -277,3 +286,386 @@ uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt)
duio->uio_iov = diov;
return (0);
}
+
+/*
+ * Shadow state for checking if a platform has hardware asynchronous
+ * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine,
+ *
+ * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls
+ * into dcopy to register and uioa_dcopy_disable() when the device calls
+ * into dcopy to unregister.
+ */
+uioasync_t uioasync = {B_FALSE, 1024};
+
+void
+uioa_dcopy_enable()
+{
+ uioasync.enabled = B_TRUE;
+}
+
+void
+uioa_dcopy_disable()
+{
+ uioasync.enabled = B_FALSE;
+}
+
+/*
+ * Schedule an asynchronous move of "n" bytes at byte address "p",
+ * "rw" indicates the direction of the move, I/O parameters and
+ * async state are provided in "uioa" which is update to reflect
+ * the data which is to be moved.
+ *
+ * Returns 0 on success or a non-zero errno on failure.
+ *
+ * Note, while the uioasync APIs are general purpose in design
+ * the current implementation is Intel I/OAT specific.
+ */
+int
+uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa)
+{
+ int soff, doff;
+ uint64_t pa;
+ int cnt;
+ iovec_t *iov;
+ dcopy_handle_t channel;
+ dcopy_cmd_t cmd;
+ int ret = 0;
+ int dcopy_flags;
+
+ if (!(uioa->uioa_state & UIOA_ENABLED)) {
+ /* The uioa_t isn't enabled */
+ return (ENXIO);
+ }
+
+ if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) {
+ /* Only support to user-land from kernel */
+ return (ENOTSUP);
+ }
+
+
+ channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL];
+ cmd = uioa->uioa_hwst[UIO_DCOPY_CMD];
+ dcopy_flags = DCOPY_NOSLEEP;
+
+ /*
+ * While source bytes and destination bytes.
+ */
+ while (n > 0 && uioa->uio_resid > 0) {
+ iov = uioa->uio_iov;
+ if (iov->iov_len == 0l) {
+ uioa->uio_iov++;
+ uioa->uio_iovcnt--;
+ uioa->uioa_lcur++;
+ uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp;
+ continue;
+ }
+ /*
+ * While source bytes schedule an async
+ * dma for destination page by page.
+ */
+ while (n > 0) {
+ /* Addr offset in page src/dst */
+ soff = (uintptr_t)p & PAGEOFFSET;
+ doff = (uintptr_t)iov->iov_base & PAGEOFFSET;
+ /* Min copy count src and dst and page sized */
+ cnt = MIN(n, iov->iov_len);
+ cnt = MIN(cnt, PAGESIZE - soff);
+ cnt = MIN(cnt, PAGESIZE - doff);
+ /* XXX if next page(s) contiguous could use multipage */
+
+ /*
+ * if we have an old command, we want to link all
+ * other commands to the next command we alloced so
+ * we only need to track the last command but can
+ * still free them all.
+ */
+ if (cmd != NULL) {
+ dcopy_flags |= DCOPY_ALLOC_LINK;
+ }
+ ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd);
+ if (ret != DCOPY_SUCCESS) {
+ /* Error of some sort */
+ return (EIO);
+ }
+ uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd;
+
+ ASSERT(cmd->dp_version == DCOPY_CMD_V0);
+ if (uioa_maxpoll >= 0) {
+ /* Blocking (>0 may be) used in uioafini() */
+ cmd->dp_flags = DCOPY_CMD_INTR;
+ } else {
+ /* Non blocking uioafini() so no intr */
+ cmd->dp_flags = DCOPY_CMD_NOFLAGS;
+ }
+ cmd->dp_cmd = DCOPY_CMD_COPY;
+ pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p));
+ cmd->dp.copy.cc_source = pa + soff;
+ if (uioa->uioa_lcur->uioa_pfncnt == 0) {
+ /* Have a (page_t **) */
+ pa = ptob((uint64_t)(
+ *(page_t **)uioa->uioa_lppp)->p_pagenum);
+ } else {
+ /* Have a (pfn_t *) */
+ pa = ptob((uint64_t)(
+ *(pfn_t *)uioa->uioa_lppp));
+ }
+ cmd->dp.copy.cc_dest = pa + doff;
+ cmd->dp.copy.cc_size = cnt;
+ ret = dcopy_cmd_post(cmd);
+ if (ret != DCOPY_SUCCESS) {
+ /* Error of some sort */
+ return (EIO);
+ }
+ ret = 0;
+
+ /* If UIOA_POLL not set, set it */
+ if (!(uioa->uioa_state & UIOA_POLL))
+ uioa->uioa_state |= UIOA_POLL;
+
+ /* Update iov, uio, and local pointers/counters */
+ iov->iov_base += cnt;
+ iov->iov_len -= cnt;
+ uioa->uio_resid -= cnt;
+ uioa->uio_loffset += cnt;
+ p = (caddr_t)p + cnt;
+ n -= cnt;
+
+ /* End of iovec? */
+ if (iov->iov_len == 0) {
+ /* Yup, next iovec */
+ break;
+ }
+
+ /* Next dst addr page? */
+ if (doff + cnt == PAGESIZE) {
+ /* Yup, next page_t */
+ uioa->uioa_lppp++;
+ }
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * Initialize a uioa_t for a given uio_t for the current user context,
+ * copy the common uio_t to the uioa_t, walk the shared iovec_t and
+ * lock down the user-land page(s) containing iovec_t data, then mapin
+ * user-land pages using segkpm.
+ */
+int
+uioainit(uio_t *uiop, uioa_t *uioap)
+{
+ caddr_t addr;
+ page_t **pages;
+ int off;
+ int len;
+ proc_t *procp = ttoproc(curthread);
+ struct as *as = procp->p_as;
+ iovec_t *iov = uiop->uio_iov;
+ int32_t iovcnt = uiop->uio_iovcnt;
+ uioa_page_t *locked = uioap->uioa_locked;
+ dcopy_handle_t channel;
+ int error;
+
+ if (! (uioap->uioa_state & UIOA_ALLOC)) {
+ /* Can only init() a freshly allocated uioa_t */
+ return (EINVAL);
+ }
+
+ error = dcopy_alloc(DCOPY_NOSLEEP, &channel);
+ if (error == DCOPY_NORESOURCES) {
+ /* Turn off uioa */
+ uioasync.enabled = B_FALSE;
+ return (ENODEV);
+ }
+ if (error != DCOPY_SUCCESS) {
+ /* Alloc failed */
+ return (EIO);
+ }
+
+ uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel;
+ uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
+
+ /* Indicate uioa_t (will be) initialized */
+ uioap->uioa_state = UIOA_INIT;
+
+ /* uio_t/uioa_t uio_t common struct copy */
+ *((uio_t *)uioap) = *uiop;
+
+ /* initialize *uiop->uio_iov */
+ if (iovcnt > UIOA_IOV_MAX) {
+ /* Too big? */
+ return (E2BIG);
+ }
+ uioap->uio_iov = iov;
+ uioap->uio_iovcnt = iovcnt;
+
+ /* Mark the uioap as such */
+ uioap->uio_extflg |= UIO_ASYNC;
+
+ /*
+ * For each iovec_t, lock-down the page(s) backing the iovec_t
+ * and save the page_t list for phys addr use in uioamove().
+ */
+ iov = uiop->uio_iov;
+ iovcnt = uiop->uio_iovcnt;
+ while (iovcnt > 0) {
+ addr = iov->iov_base;
+ off = (uintptr_t)addr & PAGEOFFSET;
+ addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
+ len = iov->iov_len + off;
+
+ /* Lock down page(s) for the iov span */
+ if ((error = as_pagelock(as, &pages,
+ iov->iov_base, iov->iov_len, S_WRITE)) != 0) {
+ /* Error */
+ goto cleanup;
+ }
+
+ if (pages == NULL) {
+ /*
+ * Need page_t list, really only need
+ * a pfn list so build one.
+ */
+ pfn_t *pfnp;
+ int pcnt = len >> PAGESHIFT;
+
+ if (off)
+ pcnt++;
+ if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp),
+ KM_NOSLEEP)) == NULL) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+ locked->uioa_ppp = (void **)pfnp;
+ locked->uioa_pfncnt = pcnt;
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ while (pcnt-- > 0) {
+ *pfnp++ = hat_getpfnum(as->a_hat, addr);
+ addr += PAGESIZE;
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+ } else {
+ /* Have a page_t list, save it */
+ locked->uioa_ppp = (void **)pages;
+ locked->uioa_pfncnt = 0;
+ }
+ /* Save for as_pageunlock() in uioafini() */
+ locked->uioa_base = iov->iov_base;
+ locked->uioa_len = iov->iov_len;
+ locked++;
+
+ /* Next iovec_t */
+ iov++;
+ iovcnt--;
+ }
+ /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */
+ uioap->uioa_lcur = uioap->uioa_locked;
+ uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp;
+ return (0);
+
+cleanup:
+ /* Unlock any previously locked page_t(s) */
+ while (locked > uioap->uioa_locked) {
+ locked--;
+ as_pageunlock(as, (page_t **)locked->uioa_ppp,
+ locked->uioa_base, locked->uioa_len, S_WRITE);
+ }
+
+ /* Last indicate uioa_t still in alloc state */
+ uioap->uioa_state = UIOA_ALLOC;
+
+ return (error);
+}
+
+/*
+ * Finish processing of a uioa_t by cleanup any pending "uioap" actions.
+ */
+int
+uioafini(uio_t *uiop, uioa_t *uioap)
+{
+ int32_t iovcnt = uiop->uio_iovcnt;
+ uioa_page_t *locked = uioap->uioa_locked;
+ struct as *as = ttoproc(curthread)->p_as;
+ dcopy_handle_t channel;
+ dcopy_cmd_t cmd;
+ int ret = 0;
+
+ ASSERT(uioap->uio_extflg & UIO_ASYNC);
+
+ if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) {
+ /* Must be an active uioa_t */
+ return (EINVAL);
+ }
+
+ channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL];
+ cmd = uioap->uioa_hwst[UIO_DCOPY_CMD];
+
+ /* XXX - why do we get cmd == NULL sometimes? */
+ if (cmd != NULL) {
+ if (uioap->uioa_state & UIOA_POLL) {
+ /* Wait for last dcopy() to finish */
+ int64_t poll = 1;
+ int poll_flag = DCOPY_POLL_NOFLAGS;
+
+ do {
+ if (uioa_maxpoll == 0 ||
+ (uioa_maxpoll > 0 &&
+ poll >= uioa_maxpoll)) {
+ /* Always block or after maxpoll */
+ poll_flag = DCOPY_POLL_BLOCK;
+ } else {
+ /* No block, poll */
+ poll++;
+ }
+ ret = dcopy_cmd_poll(cmd, poll_flag);
+ } while (ret == DCOPY_PENDING);
+
+ if (ret == DCOPY_COMPLETED) {
+ /* Poll/block succeeded */
+ ret = 0;
+ } else {
+ /* Poll/block failed */
+ ret = EIO;
+ }
+ }
+ dcopy_cmd_free(&cmd);
+ }
+
+ dcopy_free(&channel);
+
+ /* Unlock all page(s) iovec_t by iovec_t */
+ while (iovcnt-- > 0) {
+ page_t **pages;
+
+ if (locked->uioa_pfncnt == 0) {
+ /* A as_pagelock() returned (page_t **) */
+ pages = (page_t **)locked->uioa_ppp;
+ } else {
+ /* Our pfn_t array */
+ pages = NULL;
+ kmem_free(locked->uioa_ppp, locked->uioa_pfncnt *
+ sizeof (pfn_t *));
+ }
+ as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len,
+ S_WRITE);
+
+ locked++;
+ }
+ /* uioa_t->uio_t common struct copy */
+ *uiop = *((uio_t *)uioap);
+
+ /*
+ * Last, reset uioa state to alloc.
+ *
+ * Note, we only initialize the state here, all other members
+ * will be initialized in a subsequent uioainit().
+ */
+ uioap->uioa_state = UIOA_ALLOC;
+
+ uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
+ uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL;
+
+ return (ret);
+}
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index 3fcbf8634b..a1cada7964 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -144,6 +144,7 @@ static void putback(struct stdata *, queue_t *, mblk_t *, int);
static void strcleanall(struct vnode *);
static int strwsrv(queue_t *);
static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
+static void struioainit(queue_t *, sodirect_t *, uio_t *);
/*
* qinit and module_info structures for stream head read and write queues
@@ -189,6 +190,11 @@ static boolean_t msghasdata(mblk_t *bp);
* mirror this.
* 4. ioctl monitor: sd_lock is gotten to ensure that only one
* thread is doing an ioctl at a time.
+ *
+ * Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)()
+ * call-back from below, further the sodirect support is for code paths
+ * called via kstgetmsg(), all other code paths ASSERT() that sodirect
+ * uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed.
*/
static int
@@ -397,6 +403,7 @@ ckreturn:
stp->sd_qn_minpsz = 0;
stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */
stp->sd_maxblk = INFPSZ;
+ stp->sd_sodirect = NULL;
qp->q_ptr = _WR(qp)->q_ptr = stp;
STREAM(qp) = STREAM(_WR(qp)) = stp;
vp->v_stream = stp;
@@ -970,11 +977,14 @@ strcleanall(struct vnode *vp)
* It is the callers responsibility to call qbackenable after
* it is finished with the message. The caller should not call
* qbackenable until after any putback calls to avoid spurious backenabling.
+ *
+ * Also, handle uioa initialization and process any DBLK_UIOA flaged messages.
*/
mblk_t *
strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
int *errorp)
{
+ sodirect_t *sodp = stp->sd_sodirect;
mblk_t *bp;
int error;
@@ -1063,7 +1073,67 @@ strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
}
*errorp = 0;
ASSERT(MUTEX_HELD(&stp->sd_lock));
- return (getq_noenab(q));
+ if (sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
+ (sodp->sod_uioa.uioa_state & UIOA_INIT)) {
+ /*
+ * First kstrgetmsg() call for an uioa_t so if any
+ * queued mblk_t's need to consume them before uioa
+ * from below can occur.
+ */
+ sodp->sod_uioa.uioa_state &= UIOA_CLR;
+ sodp->sod_uioa.uioa_state |= UIOA_ENABLED;
+ if (q->q_first != NULL) {
+ struioainit(q, sodp, uiop);
+ }
+ }
+
+ bp = getq_noenab(q);
+
+ if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) {
+ /*
+ * A uioa flaged mblk_t chain, already uio processed,
+ * add it to the sodirect uioa pending free list.
+ *
+ * Note, a b_cont chain headed by a DBLK_UIOA enable
+ * mblk_t must have all mblk_t(s) DBLK_UIOA enabled.
+ */
+ mblk_t *bpt = sodp->sod_uioaft;
+
+ ASSERT(sodp != NULL);
+
+ /*
+ * Add first mblk_t of "bp" chain to current sodirect uioa
+ * free list tail mblk_t, if any, else empty list so new head.
+ */
+ if (bpt == NULL)
+ sodp->sod_uioafh = bp;
+ else
+ bpt->b_cont = bp;
+
+ /*
+ * Walk mblk_t "bp" chain to find tail and adjust rptr of
+ * each to reflect that uioamove() has consumed all data.
+ */
+ bpt = bp;
+ for (;;) {
+ bpt->b_rptr = bpt->b_wptr;
+ if (bpt->b_cont == NULL)
+ break;
+ bpt = bpt->b_cont;
+
+ ASSERT(bpt->b_datap->db_flags & DBLK_UIOA);
+ }
+ /* New sodirect uioa free list tail */
+ sodp->sod_uioaft = bpt;
+
+ /* Only 1 strget() with data returned per uioa_t */
+ if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
+ sodp->sod_uioa.uioa_state &= UIOA_CLR;
+ sodp->sod_uioa.uioa_state |= UIOA_FINI;
+ }
+ }
+
+ return (bp);
}
/*
@@ -1083,6 +1153,8 @@ struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
ASSERT(bp->b_wptr >= bp->b_rptr);
do {
+ ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
+
if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
ASSERT(n > 0);
@@ -1229,8 +1301,10 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
}
first = 0;
}
+
ASSERT(MUTEX_HELD(&stp->sd_lock));
ASSERT(bp);
+ ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
pri = bp->b_band;
/*
* Extract any mark information. If the message is not
@@ -6650,6 +6724,7 @@ strgetmsg(
bp = strget(stp, q, uiop, first, &error);
ASSERT(MUTEX_HELD(&stp->sd_lock));
if (bp != NULL) {
+ ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
if (bp->b_datap->db_type == M_SIG) {
strsignal_nolock(stp, *bp->b_rptr,
(int32_t)bp->b_band);
@@ -7288,7 +7363,7 @@ retry:
"kstrgetmsg calls strwaitq:%p, %p",
vp, uiop);
if (((error = strwaitq(stp, waitflag, (ssize_t)0,
- fmode, timout, &done)) != 0) || done) {
+ fmode, timout, &done))) != 0 || done) {
TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
"kstrgetmsg error or done:%p, %p",
vp, uiop);
@@ -7360,6 +7435,8 @@ retry:
* there is indeed a shortage of memory. dupmsg() may fail
* if db_ref in any of the messages reaches its limit.
*/
+
+ ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
/*
* Restore the state of the stream head since we
@@ -7418,6 +7495,7 @@ retry:
}
}
+ ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
NULL, NULL, NULL, NULL);
@@ -7468,6 +7546,8 @@ retry:
*/
if (uiop == NULL) {
/* Append data to tail of mctlp */
+
+ ASSERT(bp == NULL || !(bp->b_datap->db_flags & DBLK_UIOA));
if (mctlp != NULL) {
mblk_t **mpp = mctlp;
@@ -7476,6 +7556,14 @@ retry:
*mpp = bp;
bp = NULL;
}
+ } else if (bp && (bp->b_datap->db_flags & DBLK_UIOA)) {
+ /*
+ * A uioa mblk_t chain, as uio processing has already
+ * been done we simple skip over processing.
+ */
+ bp = NULL;
+ pr = 0;
+
} else if (uiop->uio_resid >= 0 && bp) {
size_t oldresid = uiop->uio_resid;
@@ -7564,6 +7652,8 @@ retry:
* again since the flush logic in strrput_nondata()
* may have cleared it while we had sd_lock dropped.
*/
+
+ ASSERT(!(savemp->b_datap->db_flags & DBLK_UIOA));
if (type >= QPCTL) {
ASSERT(type == M_PCPROTO);
if (queclass(savemp) < QPCTL)
@@ -8635,3 +8725,85 @@ msghasdata(mblk_t *bp)
}
return (B_FALSE);
}
+
+/*
+ * Called on the first strget() of a sodirect/uioa enabled streamhead,
+ * if any mblk_t(s) enqueued they must first be uioamove()d before uioa
+ * can be enabled for the underlying transport's use.
+ */
+void
+struioainit(queue_t *q, sodirect_t *sodp, uio_t *uiop)
+{
+ uioa_t *uioap = (uioa_t *)uiop;
+ mblk_t *bp = q->q_first;
+ mblk_t *lbp = NULL;
+ mblk_t *nbp, *wbp;
+ int len;
+ int error;
+
+ ASSERT(MUTEX_HELD(sodp->sod_lock));
+ ASSERT(&sodp->sod_uioa == uioap);
+
+ /*
+ * Walk the b_next/b_prev doubly linked list of b_cont chain(s)
+ * and schedule any M_DATA mblk_t's for uio asynchronous move.
+ */
+ do {
+ /* Next mblk_t chain */
+ nbp = bp->b_next;
+ /* Walk the chain */
+ wbp = bp;
+ do {
+ if (wbp->b_datap->db_type != M_DATA) {
+ /* Not M_DATA, no more uioa */
+ goto nouioa;
+ }
+ if ((len = wbp->b_wptr - wbp->b_rptr) > 0) {
+ /* Have a M_DATA mblk_t with data */
+ if (len > uioap->uio_resid) {
+ /* Not enough uio sapce */
+ goto nouioa;
+ }
+ error = uioamove(wbp->b_rptr, len,
+ UIO_READ, uioap);
+ if (!error) {
+ /* Scheduled, mark dblk_t as such */
+ wbp->b_datap->db_flags |= DBLK_UIOA;
+ } else {
+ /* Error of some sort, no more uioa */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+ return;
+ }
+ }
+ /* Save last wbp processed */
+ lbp = wbp;
+ } while ((wbp = wbp->b_cont) != NULL);
+ } while ((bp = nbp) != NULL);
+
+ return;
+
+nouioa:
+ /* No more uioa */
+ uioap->uioa_state &= UIOA_CLR;
+ uioap->uioa_state |= UIOA_FINI;
+
+ /*
+ * If we processed 1 or more mblk_t(s) then we need to split the
+ * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s)
+ * are in the current chain and the rest are in the following new
+ * chain.
+ */
+ if (lbp != NULL) {
+ /* New end of current chain */
+ lbp->b_cont = NULL;
+
+ /* Insert new chain wbp after bp */
+ if ((wbp->b_next = nbp) != NULL)
+ nbp->b_prev = wbp;
+ else
+ q->q_last = wbp;
+ wbp->b_prev = bp;
+ bp->b_next = wbp;
+ }
+}
diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c
index 650a4cfaf9..a7750e2ec3 100644
--- a/usr/src/uts/common/os/strsubr.c
+++ b/usr/src/uts/common/os/strsubr.c
@@ -2467,13 +2467,18 @@ devflg_to_qflag(struct streamtab *stp, uint32_t devflag, uint32_t *qflagp,
/*
* Private flag used by a transport module to indicate
* to sockfs that it supports direct-access mode without
- * having to go through STREAMS.
+ * having to go through STREAMS or the transport can use
+ * sodirect_t sharing to bypass STREAMS for receive-side
+ * M_DATA processing.
*/
- if (devflag & _D_DIRECT) {
+ if (devflag & (_D_DIRECT|_D_SODIRECT)) {
/* Reject unless the module is fully-MT (no perimeter) */
if ((qflag & QMT_TYPEMASK) != QMTSAFE)
goto bad;
- qflag |= _QDIRECT;
+ if (devflag & _D_DIRECT)
+ qflag |= _QDIRECT;
+ if (devflag & _D_SODIRECT)
+ qflag |= _QSODIRECT;
}
*qflagp = qflag;
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 37b908076b..728860594a 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -487,6 +487,7 @@ CHKHDRS= \
socket_impl.h \
socketvar.h \
sockio.h \
+ sodirect.h \
squeue.h \
squeue_impl.h \
srn.h \
diff --git a/usr/src/uts/common/sys/conf.h b/usr/src/uts/common/sys/conf.h
index 3f6300e581..435cffb35f 100644
--- a/usr/src/uts/common/sys/conf.h
+++ b/usr/src/uts/common/sys/conf.h
@@ -22,7 +22,7 @@
/* All Rights Reserved */
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -221,6 +221,9 @@ extern int cdev_prop_op(dev_t, dev_info_t *, ddi_prop_op_t,
#define D_OPEN_RETURNS_EINTR 0x100000 /* EINTR expected from open(9E) */
+#define _D_SODIRECT 0x200000 /* Private flag for transport modules used */
+ /* to enable _QSODIRECT for a STREAMS Q */
+
#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
#ifdef __cplusplus
diff --git a/deleted_files/usr/src/uts/common/sys/dcopy.h b/usr/src/uts/common/sys/dcopy.h
index e700ed9cf6..09e72e84e0 100644
--- a/deleted_files/usr/src/uts/common/sys/dcopy.h
+++ b/usr/src/uts/common/sys/dcopy.h
@@ -39,6 +39,10 @@ extern "C" {
* *** This interface is for private use by the IP stack only ***
*/
+/* Private dcopy/uioa interface for dcopy to enable/disable dcopy KAPI */
+extern void uioa_dcopy_enable();
+extern void uioa_dcopy_disable();
+
/* Function return status */
#define DCOPY_FAILURE (-1)
#define DCOPY_SUCCESS (0)
diff --git a/deleted_files/usr/src/uts/common/sys/dcopy_device.h b/usr/src/uts/common/sys/dcopy_device.h
index 25e95b2aa8..25e95b2aa8 100644
--- a/deleted_files/usr/src/uts/common/sys/dcopy_device.h
+++ b/usr/src/uts/common/sys/dcopy_device.h
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index 0680546ade..178a8a2905 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,14 +50,13 @@
#include <sys/file.h>
#include <sys/param.h>
#include <sys/zone.h>
+#include <sys/sodirect.h>
#include <inet/kssl/ksslapi.h>
#ifdef __cplusplus
extern "C" {
#endif
-
-
/*
* Internal representation used for addresses.
*/
@@ -333,6 +332,9 @@ struct sonode {
kssl_endpt_type_t so_kssl_type; /* is proxy/is proxied/none */
kssl_ent_t so_kssl_ent; /* SSL config entry */
kssl_ctx_t so_kssl_ctx; /* SSL session context */
+
+ /* != NULL for sodirect_t enabled socket */
+ sodirect_t *so_direct;
};
/* flags */
@@ -375,6 +377,7 @@ struct sonode {
#define SS_MOREDATA 0x00100000 /* NCAfs: NCA has more data */
#define SS_DIRECT 0x00200000 /* transport is directly below */
+#define SS_SODIRECT 0x00400000 /* transport supports sodirect */
#define SS_LADDR_VALID 0x01000000 /* so_laddr valid for user */
#define SS_FADDR_VALID 0x02000000 /* so_faddr valid for user */
diff --git a/deleted_files/usr/src/uts/common/sys/sodirect.h b/usr/src/uts/common/sys/sodirect.h
index 49609bc5af..49609bc5af 100644
--- a/deleted_files/usr/src/uts/common/sys/sodirect.h
+++ b/usr/src/uts/common/sys/sodirect.h
diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h
index 3eca2fefdf..0da91f7d38 100644
--- a/usr/src/uts/common/sys/stream.h
+++ b/usr/src/uts/common/sys/stream.h
@@ -190,6 +190,8 @@ typedef struct queue {
#define _QASSOCIATED 0x10000000 /* queue is associated with a device */
#define _QDIRECT 0x20000000 /* Private; transport module uses */
/* direct interface to/from sockfs */
+#define _QSODIRECT 0x40000000 /* Private, transport module shares */
+ /* an sodirect_t with sockfs */
/* queue sqflags (protected by SQLOCK). */
#define Q_SQQUEUED 0x01 /* Queue is in the syncq list */
@@ -400,6 +402,7 @@ typedef struct bcache {
*/
#define DBLK_REFMIN 0x01 /* min refcnt stored in low bit */
#define DBLK_COOKED 0x02 /* message has been processed once */
+#define DBLK_UIOA 0x04 /* uioamove() is pending */
/*
* db_struioflag values:
diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h
index df489c3dff..71c26a3212 100644
--- a/usr/src/uts/common/sys/strsubr.h
+++ b/usr/src/uts/common/sys/strsubr.h
@@ -46,6 +46,7 @@
#include <sys/proc.h>
#include <sys/netstack.h>
#include <sys/modhash.h>
+#include <sys/sodirect.h>
#ifdef __cplusplus
extern "C" {
@@ -94,9 +95,8 @@ extern "C" {
* sd_mark
* sd_closetime
* sd_wakeq
- * sd_uiordq
- * sd_uiowrq
* sd_maxblk
+ * sd_sodirect
*
* The following fields are modified only by the allocator, which
* has exclusive access to them at that time:
@@ -245,6 +245,10 @@ typedef struct stdata {
uint_t sd_copyflag; /* copy-related flags */
zoneid_t sd_anchorzone; /* Allow removal from same zone only */
struct msgb *sd_cmdblk; /* reply from _I_CMD */
+ /*
+ * Support for socket direct.
+ */
+ sodirect_t *sd_sodirect; /* pointer to shared sodirect_t */
} stdata_t;
/*
diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h
index 3e9e4a5eda..4f0aff49f6 100644
--- a/usr/src/uts/common/sys/uio.h
+++ b/usr/src/uts/common/sys/uio.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -101,6 +100,49 @@ typedef struct uio {
ssize_t uio_resid; /* residual count */
} uio_t;
+/*
+ * Extended uio_t uioa_t used for asynchronous uio.
+ *
+ * Note: UIOA_IOV_MAX is defined and used as it is in "fs/vncalls.c"
+ * as there isn't a formal definition of IOV_MAX for the kernel.
+ */
+#define UIOA_IOV_MAX 16
+
+typedef struct uioa_page_s { /* locked uio_iov state */
+ int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */
+ void **uioa_ppp; /* page_t or pfn_t arrary */
+ caddr_t uioa_base; /* address base */
+ size_t uioa_len; /* span length */
+} uioa_page_t;
+
+typedef struct uioa_s {
+ iovec_t *uio_iov; /* pointer to array of iovecs */
+ int uio_iovcnt; /* number of iovecs */
+ lloff_t _uio_offset; /* file offset */
+ uio_seg_t uio_segflg; /* address space (kernel or user) */
+ uint16_t uio_fmode; /* file mode flags */
+ uint16_t uio_extflg; /* extended flags */
+ lloff_t _uio_limit; /* u-limit (maximum byte offset) */
+ ssize_t uio_resid; /* residual count */
+ /*
+ * uioa extended members.
+ */
+ uint32_t uioa_state; /* state of asynch i/o */
+ uioa_page_t *uioa_lcur; /* pointer into uioa_locked[] */
+ void **uioa_lppp; /* pointer into lcur->uioa_ppp[] */
+ void *uioa_hwst[4]; /* opaque hardware state */
+ uioa_page_t uioa_locked[UIOA_IOV_MAX]; /* Per iov locked pages */
+} uioa_t;
+
+#define UIOA_ALLOC 0x0001 /* allocated but not yet initialized */
+#define UIOA_INIT 0x0002 /* initialized but not yet enabled */
+#define UIOA_ENABLED 0x0004 /* enabled, asynch i/o active */
+#define UIOA_FINI 0x0008 /* finished waiting for uioafini() */
+
+#define UIOA_CLR (~0x000F) /* clear mutually exclusive bits */
+
+#define UIOA_POLL 0x0010 /* need dcopy_poll() */
+
#define uio_loffset _uio_offset._f
#if !defined(_LP64)
#define uio_offset _uio_offset._p._l
@@ -127,10 +169,24 @@ typedef enum uio_rw { UIO_READ, UIO_WRITE } uio_rw_t;
* access, ie, access bypassing caches, should be used. Filesystems that
* don't initialize this field could experience suboptimal performance due to
* the random data the field contains.
+ *
+ * NOTE: This flag is also used by uioasync callers to pass an extended
+ * uio_t (uioa_t), to uioasync enabled consumers. Unlike above all
+ * consumers of a uioa_t require the uio_extflg to be initialized.
*/
#define UIO_COPY_DEFAULT 0x0000 /* no special options to copy */
#define UIO_COPY_CACHED 0x0001 /* copy should not bypass caches */
+#define UIO_ASYNC 0x0002 /* uio_t is really a uioa_t */
+
+/*
+ * Global uioasync capability shadow state.
+ */
+typedef struct uioasync_s {
+ boolean_t enabled; /* Is uioasync enabled? */
+ size_t mincnt; /* Minimum byte count for use of */
+} uioasync_t;
+
#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
#if defined(_KERNEL)
@@ -141,6 +197,11 @@ int uwritec(struct uio *);
void uioskip(uio_t *, size_t);
int uiodup(uio_t *, uio_t *, iovec_t *, int);
+int uioamove(void *, size_t, enum uio_rw, uioa_t *);
+int uioainit(uio_t *, uioa_t *);
+int uioafini(uio_t *, uioa_t *);
+extern uioasync_t uioasync;
+
#else /* defined(_KERNEL) */
#if defined(__STDC__)
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 5ae521c687..0e30822c0d 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -161,6 +161,7 @@ DBOOT_OBJS += \
#
GFX_PRIVATE_OBJS += gfx_private.o gfxp_pci.o gfxp_segmap.o \
gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o
+IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o
ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
PCI_E_MISC_OBJS += pcie.o pcie_fault.o
PCI_E_NEXUS_OBJS += npe.o npe_misc.o
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index 1e1c6abe1d..4d188a4dfd 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -257,6 +257,7 @@ DRV_KMODS += xsvc
DRV_KMODS += mc-amd
DRV_KMODS += tzmon
DRV_KMODS += acpi_drv
+DRV_KMODS += ioat
DRV_KMODS += cpudrv
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index 78d3832d9b..9195b8ffb7 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -73,6 +73,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/acpi_drv/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/mc/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -259,6 +263,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi_drv/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/mc/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c b/usr/src/uts/i86pc/io/ioat/ioat.c
index 7bf8a559c1..7bf8a559c1 100644
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c
+++ b/usr/src/uts/i86pc/io/ioat/ioat.c
diff --git a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf b/usr/src/uts/i86pc/io/ioat/ioat.conf
index 49d948eddb..49d948eddb 100644
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf
+++ b/usr/src/uts/i86pc/io/ioat/ioat.conf
diff --git a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c b/usr/src/uts/i86pc/io/ioat/ioat_chan.c
index 8615f9a7ad..8615f9a7ad 100644
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c
+++ b/usr/src/uts/i86pc/io/ioat/ioat_chan.c
diff --git a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c b/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c
index 70640dac4f..70640dac4f 100644
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c
+++ b/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c
diff --git a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c b/usr/src/uts/i86pc/io/ioat/ioat_rs.c
index 6d12798fda..6d12798fda 100644
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c
+++ b/usr/src/uts/i86pc/io/ioat/ioat_rs.c
diff --git a/deleted_files/usr/src/uts/i86pc/ioat/Makefile b/usr/src/uts/i86pc/ioat/Makefile
index 2dcd6e898e..2dcd6e898e 100644
--- a/deleted_files/usr/src/uts/i86pc/ioat/Makefile
+++ b/usr/src/uts/i86pc/ioat/Makefile
diff --git a/deleted_files/usr/src/uts/i86pc/sys/ioat.h b/usr/src/uts/i86pc/sys/ioat.h
index 1e32b54ebd..1e32b54ebd 100644
--- a/deleted_files/usr/src/uts/i86pc/sys/ioat.h
+++ b/usr/src/uts/i86pc/sys/ioat.h
diff --git a/usr/src/uts/i86xpv/Makefile.files b/usr/src/uts/i86xpv/Makefile.files
index 7c2ce261fd..88392b1855 100644
--- a/usr/src/uts/i86xpv/Makefile.files
+++ b/usr/src/uts/i86xpv/Makefile.files
@@ -179,12 +179,13 @@ DBOOT_OBJS += \
#
# driver & misc modules
#
-ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
-DOMCAPS_OBJS += domcaps.o
BALLOON_OBJS += balloon_drv.o
+DOMCAPS_OBJS += domcaps.o
EVTCHN_OBJS += evtchn_dev.o
GFX_PRIVATE_OBJS += gfx_private.o gfxp_pci.o gfxp_segmap.o \
gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o
+IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o
+ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
PCI_E_MISC_OBJS += pcie.o pcie_fault.o
PCI_E_NEXUS_OBJS += npe.o npe_misc.o
PCI_E_NEXUS_OBJS += pci_common.o pci_kstats.o pci_tools.o
diff --git a/usr/src/uts/i86xpv/Makefile.i86xpv.shared b/usr/src/uts/i86xpv/Makefile.i86xpv.shared
index 0461221216..6c41c1141d 100644
--- a/usr/src/uts/i86xpv/Makefile.i86xpv.shared
+++ b/usr/src/uts/i86xpv/Makefile.i86xpv.shared
@@ -240,6 +240,7 @@ MACH_NOT_YET_KMODS = $(AUTOCONF_OBJS)
#
DRV_KMODS += rootnex
+DRV_KMODS += ioat
DRV_KMODS += isa
DRV_KMODS += pci
DRV_KMODS += npe
diff --git a/usr/src/uts/i86xpv/Makefile.rules b/usr/src/uts/i86xpv/Makefile.rules
index cd22ca6278..8da47ca9d1 100644
--- a/usr/src/uts/i86xpv/Makefile.rules
+++ b/usr/src/uts/i86xpv/Makefile.rules
@@ -57,6 +57,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86xpv/io/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/pci/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -215,6 +219,9 @@ DBOOT_DEFS += -D__xpv
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/cpr/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/pci/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/deleted_files/usr/src/uts/i86xpv/ioat/Makefile b/usr/src/uts/i86xpv/ioat/Makefile
index 54354aedc7..54354aedc7 100644
--- a/deleted_files/usr/src/uts/i86xpv/ioat/Makefile
+++ b/usr/src/uts/i86xpv/ioat/Makefile
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 9a756bd90d..a1a4a1d66e 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -138,6 +138,7 @@ CMDK_OBJS += cmdk.o
CMLB_OBJS += cmlb.o
CPUNEX_OBJS += cpunex.o
DADK_OBJS += dadk.o
+DCOPY_OBJS += dcopy.o
DNET_OBJS += dnet.o mii.o
FD_OBJS += fd.o
GDA_OBJS += gda.o
diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared
index e8eef62150..476bd301e1 100644
--- a/usr/src/uts/intel/Makefile.intel.shared
+++ b/usr/src/uts/intel/Makefile.intel.shared
@@ -528,6 +528,7 @@ MISC_KMODS += cmlb
MISC_KMODS += consconfig
MISC_KMODS += ctf
MISC_KMODS += dadk
+MISC_KMODS += dcopy
MISC_KMODS += dls
MISC_KMODS += drm
MISC_KMODS += fssnap_if
diff --git a/deleted_files/usr/src/uts/intel/dcopy/Makefile b/usr/src/uts/intel/dcopy/Makefile
index e321465ec1..e321465ec1 100644
--- a/deleted_files/usr/src/uts/intel/dcopy/Makefile
+++ b/usr/src/uts/intel/dcopy/Makefile
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index fd7a606594..5ae7072e82 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -1313,6 +1313,22 @@ fcnname/**/_info: \
END_MODULE(kssl);
#endif
+/*
+ * Stubs for dcopy, for Intel IOAT KAPIs
+ */
+#ifndef DCOPY_MODULE
+ MODULE(dcopy,misc);
+ NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one);
+ END_MODULE(dcopy);
+#endif
+
/ this is just a marker for the area of text that contains stubs
ENTRY_NP(stubs_end)
diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s
index 8e4e06a008..b1936c4172 100644
--- a/usr/src/uts/sparc/ml/modstubs.s
+++ b/usr/src/uts/sparc/ml/modstubs.s
@@ -1265,6 +1265,22 @@ stubs_base:
END_MODULE(kssl);
#endif
+/*
+ * Stubs for dcopy, for Intel IOAT KAPIs
+ */
+#ifndef DCOPY_MODULE
+ MODULE(dcopy,misc);
+ NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one);
+ NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one);
+ END_MODULE(dcopy);
+#endif
+
! this is just a marker for the area of text that contains stubs
.seg ".text"
.global stubs_end