summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorlucy wang - Sun Microsystems - Beijing China <xiuyan.wang@Sun.COM>2009-03-26 09:54:44 +0800
committerlucy wang - Sun Microsystems - Beijing China <xiuyan.wang@Sun.COM>2009-03-26 09:54:44 +0800
commit94e7edb1e6b194fffd0e7901c0b32e9fc836bc5b (patch)
tree578105e1f80b27ca4200d731633225fbdbe1acf8 /usr/src
parent09ce0d4acf1a79c720d7e54b60e87cbfa0f1b2d6 (diff)
downloadillumos-joyent-94e7edb1e6b194fffd0e7901c0b32e9fc836bc5b.tar.gz
6812228 No need to do software checksum for multiple-mblk packets in ip_tcp_input()
6694625 Performance falls off the cliff with large IO sizes PSARC/2009/039 prefetch interfaces 6812282 PSARC 2009/039 prefetch interfaces Contributed by Paul Durrant (pdurrant@solarflare.com)
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/inet/ip/ip.c12
-rw-r--r--usr/src/uts/common/os/streamio.c12
-rw-r--r--usr/src/uts/common/sys/systm.h5
-rw-r--r--usr/src/uts/common/syscall/poll.c6
-rw-r--r--usr/src/uts/i86pc/ml/amd64.il30
-rw-r--r--usr/src/uts/i86pc/ml/ia32.il38
-rw-r--r--usr/src/uts/i86xpv/ml/amd64.il30
-rw-r--r--usr/src/uts/i86xpv/ml/ia32.il36
-rw-r--r--usr/src/uts/intel/amd64/ml/amd64.il30
-rw-r--r--usr/src/uts/intel/asm/cpu.h76
-rw-r--r--usr/src/uts/intel/ia32/ml/ia32.il39
-rw-r--r--usr/src/uts/sparc/asm/cpu.h96
-rw-r--r--usr/src/uts/sparc/ml/sparc.il16
-rw-r--r--usr/src/uts/sparc/sys/cpu.h25
14 files changed, 346 insertions, 105 deletions
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index 3e39cfb50a..6807eb080f 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -12988,12 +12988,8 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present,
/* multiple mblks of tcp data? */
if ((mp1 = mp->b_cont) != NULL) {
- /* more then two? */
- if (mp1->b_cont != NULL) {
- IP_STAT(ipst, ip_multipkttcp);
- goto multipkttcp;
- }
- len += mp1->b_wptr - mp1->b_rptr;
+ IP_STAT(ipst, ip_multipkttcp);
+ len += msgdsize(mp1);
}
up = (uint16_t *)(rptr + IP_SIMPLE_HDR_LENGTH + TCP_PORTS_OFFSET);
@@ -13301,10 +13297,8 @@ tcpoptions:
}
/* Get the total packet length in len, including headers. */
- if (mp->b_cont) {
-multipkttcp:
+ if (mp->b_cont)
len = msgdsize(mp);
- }
/*
* Check the TCP checksum by pulling together the pseudo-
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index e4744daa35..7d4d23cfee 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -7499,11 +7499,21 @@ retry:
* If a streams message is likely to consist
* of many small mblks, it is pulled up into
* one continuous chunk of memory.
+ * The size of the first mblk may be bogus because
+ * successive read() calls on the socket reduce
+ * the size of this mblk until it is exhausted
+ * and then the code walks on to the next. Thus
+ * the size of the mblk may not be the original size
+ * that was passed up, it's simply a remainder
+ * and hence can be very small without any
+ * implication that the packet is badly fragmented.
+ * So the size of the possible second mblk is
+ * used to spot a badly fragmented packet.
* see longer comment at top of page
* by mblk_pull_len declaration.
*/
- if (MBLKL(bp) < mblk_pull_len) {
+ if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
(void) pullupmsg(bp, -1);
}
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index c7421047cf..6857f5f057 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -247,7 +247,10 @@ extern void longjmp(label_t *)
#pragma unknown_control_flow(setjmp)
#endif
-void prefetch64(caddr_t);
+void prefetch_read_once(void *);
+void prefetch_write_once(void *);
+void prefetch_read_many(void *);
+void prefetch_write_many(void *);
caddr_t caller(void);
caddr_t callee(void);
int getpcstack(pc_t *, int);
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index 26dff3c39b..262830dd1f 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -997,8 +997,8 @@ pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n)
for (ix = 0; ix < n; ix++) {
/* Prefetch 64 bytes worth of 8-byte elements */
if ((ix & 0x7) == 0) {
- prefetch64((caddr_t)&current[ix + 8]);
- prefetch64((caddr_t)&cached[ix + 8]);
+ prefetch_write_many((caddr_t)&current[ix + 8]);
+ prefetch_write_many((caddr_t)&cached[ix + 8]);
}
if (current[ix].fd == cached[ix].fd) {
/*
diff --git a/usr/src/uts/i86pc/ml/amd64.il b/usr/src/uts/i86pc/ml/amd64.il
index 9639af663c..8b694c4915 100644
--- a/usr/src/uts/i86pc/ml/amd64.il
+++ b/usr/src/uts/i86pc/ml/amd64.il
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -178,11 +178,27 @@
bsrw %di, %ax
.end
-/
-/ prefetch 64 bytes
-/
- .inline prefetch64,8
- prefetcht0 (%rdi)
- prefetcht0 32(%rdi)
+/*
+ * prefetch 64 bytes
+ */
+
+ .inline prefetch_read_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_read_once,8
+ prefetchnta (%rdi)
+ prefetchnta 32(%rdi)
+ .end
+
+ .inline prefetch_write_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_write_once,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
.end
diff --git a/usr/src/uts/i86pc/ml/ia32.il b/usr/src/uts/i86pc/ml/ia32.il
index e137ca6c1d..d1b10939dc 100644
--- a/usr/src/uts/i86pc/ml/ia32.il
+++ b/usr/src/uts/i86pc/ml/ia32.il
@@ -177,14 +177,34 @@
bsrw %cx, %ax
.end
-/
-/ prefetch 64 bytes
-/ prefetch is an SSE extension which is not supported on older 32-bit processors
-/ so define this as a no-op for now
-/
- .inline prefetch64,4
-/ movl (%esp), %eax
-/ prefetcht0 (%eax)
-/ prefetcht0 32(%eax)
+/*
+ * prefetch 64 bytes
+ *
+ * prefetch is an SSE extension which is not supported on older 32-bit processors
+ * so define this as a no-op for now
+ */
+
+ .inline prefetch_read_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
+ .inline prefetch_read_once, 4
+/ movl (%esp), %eax
+/ prefetchnta (%eax)
+/ prefetchnta 32(%eax)
+ .end
+
+ .inline prefetch_write_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
+ .inline prefetch_write_once, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
.end
diff --git a/usr/src/uts/i86xpv/ml/amd64.il b/usr/src/uts/i86xpv/ml/amd64.il
index bca65ed47f..4874a7b264 100644
--- a/usr/src/uts/i86xpv/ml/amd64.il
+++ b/usr/src/uts/i86xpv/ml/amd64.il
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -87,11 +87,27 @@
orq %rdx, %rax
.end
-/
-/ prefetch 64 bytes
-/
- .inline prefetch64,8
- prefetcht0 (%rdi)
- prefetcht0 32(%rdi)
+/*
+ * prefetch 64 bytes
+ */
+
+ .inline prefetch_read_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_read_once,8
+ prefetchnta (%rdi)
+ prefetchnta 32(%rdi)
+ .end
+
+ .inline prefetch_write_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_write_once,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
.end
diff --git a/usr/src/uts/i86xpv/ml/ia32.il b/usr/src/uts/i86xpv/ml/ia32.il
index 8228f3423c..fc41a63bec 100644
--- a/usr/src/uts/i86xpv/ml/ia32.il
+++ b/usr/src/uts/i86xpv/ml/ia32.il
@@ -100,13 +100,33 @@
rdtsc
.end
-/
-/ prefetch 64 bytes
-/ prefetch is an SSE extension which is not supported on older 32-bit processors
-/ so define this as a no-op for now
-/
- .inline prefetch64, 4
+/*
+ * prefetch 64 bytes
+ *
+ * prefetch is an SSE extension which is not supported on older 32-bit processors
+ * so define this as a no-op for now
+ */
+
+ .inline prefetch_read_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
+ .inline prefetch_read_once, 4
+/ movl (%esp), %eax
+/ prefetchnta (%eax)
+/ prefetchnta 32(%eax)
+ .end
+
+ .inline prefetch_write_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
+ .inline prefetch_write_once, 4
/ movl (%esp), %eax
-/ prefetcht0 (%eax)
-/ prefetcht0 32(%eax)
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
.end
diff --git a/usr/src/uts/intel/amd64/ml/amd64.il b/usr/src/uts/intel/amd64/ml/amd64.il
index 297fa134f2..2afcf93bed 100644
--- a/usr/src/uts/intel/amd64/ml/amd64.il
+++ b/usr/src/uts/intel/amd64/ml/amd64.il
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -206,10 +206,26 @@
swapgs
.end
-/
-/ prefetch 64 bytes
-/
- .inline prefetch64,8
- prefetcht0 (%rdi)
- prefetcht0 32(%rdi)
+/*
+ * prefetch 64 bytes
+ */
+
+ .inline prefetch_read_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_read_once,8
+ prefetchnta (%rdi)
+ prefetchnta 32(%rdi)
+ .end
+
+ .inline prefetch_write_many,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
+ .end
+
+ .inline prefetch_write_once,8
+ prefetcht0 (%rdi)
+ prefetcht0 32(%rdi)
.end
diff --git a/usr/src/uts/intel/asm/cpu.h b/usr/src/uts/intel/asm/cpu.h
index 937c3c4103..c887111058 100644
--- a/usr/src/uts/intel/asm/cpu.h
+++ b/usr/src/uts/intel/asm/cpu.h
@@ -40,6 +40,57 @@ extern __inline__ void ht_pause(void)
"pause");
}
+/*
+ * prefetch 64 bytes
+ *
+ * prefetch is an SSE extension which is not supported on
+ * older 32-bit processors, so define this as a no-op for now
+ */
+
+extern __inline__ void prefetch_read_many(void *addr)
+{
+#if defined(__amd64)
+ __asm__(
+ "prefetcht0 (%0);"
+ "prefetcht0 32(%0);"
+ : /* no output */
+ : "r" (addr));
+#endif /* __amd64 */
+}
+
+extern __inline__ void prefetch_read_once(void *addr)
+{
+#if defined(__amd64)
+ __asm__(
+ "prefetchnta (%0);"
+ "prefetchnta 32(%0);"
+ : /* no output */
+ : "r" (addr));
+#endif /* __amd64 */
+}
+
+extern __inline__ void prefetch_write_many(void *addr)
+{
+#if defined(__amd64)
+ __asm__(
+ "prefetcht0 (%0);"
+ "prefetcht0 32(%0);"
+ : /* no output */
+ : "r" (addr));
+#endif /* __amd64 */
+}
+
+extern __inline__ void prefetch_write_once(void *addr)
+{
+#if defined(__amd64)
+ __asm__(
+ "prefetcht0 (%0);"
+ "prefetcht0 32(%0);"
+ : /* no output */
+ : "r" (addr));
+#endif /* __amd64 */
+}
+
#if !defined(__xpv)
extern __inline__ void cli(void)
@@ -112,31 +163,6 @@ extern __inline__ void __swapgs(void)
#endif /* !__lint && __GNUC__ */
-#if !defined(__lint) && defined(__GNUC__)
-
-#if defined(__i386) || defined(__amd64)
-
-/*
- * prefetch 64 bytes
- * prefetch is an SSE extension which is not supported on
- * older 32-bit processors, so define this as a no-op for now
- */
-
-extern __inline__ void prefetch64(caddr_t addr)
-{
-#if defined(__amd64)
- __asm__ __volatile__(
- "prefetcht0 (%0);"
- "prefetcht0 32(%0)"
- : /* no output */
- : "r" (addr));
-#endif /* __amd64 */
-}
-
-#endif /* __i386 || __amd64 */
-
-#endif /* !__lint && __GNUC__ */
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/intel/ia32/ml/ia32.il b/usr/src/uts/intel/ia32/ml/ia32.il
index 4b0b785784..9d763e0ab3 100644
--- a/usr/src/uts/intel/ia32/ml/ia32.il
+++ b/usr/src/uts/intel/ia32/ml/ia32.il
@@ -185,13 +185,34 @@
nop / so we're using "F3 90" opcode directly
.end
-/
-/ prefetch 64 bytes
-/ prefetch is an SSE extension which is not supported on older 32-bit processors
-/ so define this as a no-op for now
-/
- .inline prefetch64,4
-/ movl (%esp), %eax
-/ prefetcht0 (%eax)
-/ prefetcht0 32(%eax)
+/*
+ * prefetch 64 bytes
+ *
+ * prefetch is an SSE extension which is not supported on older 32-bit processors
+ * so define this as a no-op for now
+ */
+
+ .inline prefetch_read_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
.end
+
+ .inline prefetch_read_once, 4
+/ movl (%esp), %eax
+/ prefetchnta (%eax)
+/ prefetchnta 32(%eax)
+ .end
+
+ .inline prefetch_write_many, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
+ .inline prefetch_write_once, 4
+/ movl (%esp), %eax
+/ prefetcht0 (%eax)
+/ prefetcht0 32(%eax)
+ .end
+
diff --git a/usr/src/uts/sparc/asm/cpu.h b/usr/src/uts/sparc/asm/cpu.h
new file mode 100644
index 0000000000..12523a475d
--- /dev/null
+++ b/usr/src/uts/sparc/asm/cpu.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ASM_CPU_H
+#define _ASM_CPU_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(__lint) && defined(__GNUC__)
+
+extern __inline__ void
+prefetch_read_many(void *addr)
+{
+#if defined(__sparcv9)
+ __asm__ __volatile__(
+ "prefetch [%0],#n_reads\n\t"
+ : "=r" (addr)
+ : "0" (addr));
+#else
+#error "port me"
+#endif
+}
+
+extern __inline__ void
+prefetch_read_once(void *addr)
+{
+#if defined(__sparcv9)
+ __asm__ __volatile__(
+ "prefetch [%0],#one_read\n\t"
+ : "=r" (addr)
+ : "0" (addr));
+#else
+#error "port me"
+#endif
+}
+
+extern __inline__ void
+prefetch_write_many(void *addr)
+{
+#if defined(__sparcv9)
+ __asm__ __volatile__(
+ "prefetch [%0],#n_writes\n\t"
+ : "=r" (addr)
+ : "0" (addr));
+#else
+#error "port me"
+#endif
+}
+
+extern __inline__ void
+prefetch_write_once(void *addr)
+{
+#if defined(__sparcv9)
+ __asm__ __volatile__(
+ "prefetch [%0],#one_write\n\t"
+ : "=r" (addr)
+ : "0" (addr));
+#else
+#error "port me"
+#endif
+}
+
+#endif /* !__lint && __GNUC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ASM_CPU_H */
diff --git a/usr/src/uts/sparc/ml/sparc.il b/usr/src/uts/sparc/ml/sparc.il
index 4b8a186588..8337c30728 100644
--- a/usr/src/uts/sparc/ml/sparc.il
+++ b/usr/src/uts/sparc/ml/sparc.il
@@ -1,5 +1,5 @@
!
-! Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+! Copyright 2009 Sun Microsystems, Inc. All rights reserved.
! Use is subject to license terms.
!
! CDDL HEADER START
@@ -53,6 +53,18 @@
! prefetch 64 bytes into L2-cache
- .inline prefetch64,8
+ .inline prefetch_read_many,8
+ prefetch [%o0], #n_reads
+ .end
+
+ .inline prefetch_read_once,8
+ prefetch [%o0], #one_read
+ .end
+
+ .inline prefetch_write_many,8
prefetch [%o0], #n_writes
.end
+
+ .inline prefetch_write_once,8
+ prefetch [%o0], #one_write
+ .end
diff --git a/usr/src/uts/sparc/sys/cpu.h b/usr/src/uts/sparc/sys/cpu.h
index adf3d98c42..126b4826b8 100644
--- a/usr/src/uts/sparc/sys/cpu.h
+++ b/usr/src/uts/sparc/sys/cpu.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,10 +31,17 @@
*/
#include <sys/bustypes.h>
+#if defined(_KERNEL)
+#if defined(__GNUC__) && defined(_ASM_INLINES)
+#include <asm/cpu.h>
+#endif
+#endif /* _KERNEL */
+
#ifdef __cplusplus
extern "C" {
#endif
+
/*
* Global kernel variables of interest
*/
@@ -70,22 +77,6 @@ extern int vac;
extern void cpu_smt_pause();
#define SMT_PAUSE() { if (&cpu_smt_pause) cpu_smt_pause(); }
-/*
- * used to preload L2 cache
- */
-#if !defined(__lint) && defined(__GNUC__)
-
-extern __inline__ void
-prefetch64(caddr_t addr)
-{
- __asm__ __volatile__(
- "prefetch [%0], #n_writes\n\t"
- : "=r" (addr)
- : "0" (addr));
-}
-
-#endif /* !__lint && __GNUC__ */
-
#endif /* defined(_KERNEL) && !defined(_ASM) */
#ifdef __cplusplus