diff options
author | lucy wang - Sun Microsystems - Beijing China <xiuyan.wang@Sun.COM> | 2009-03-26 09:54:44 +0800 |
---|---|---|
committer | lucy wang - Sun Microsystems - Beijing China <xiuyan.wang@Sun.COM> | 2009-03-26 09:54:44 +0800 |
commit | 94e7edb1e6b194fffd0e7901c0b32e9fc836bc5b (patch) | |
tree | 578105e1f80b27ca4200d731633225fbdbe1acf8 /usr/src | |
parent | 09ce0d4acf1a79c720d7e54b60e87cbfa0f1b2d6 (diff) | |
download | illumos-joyent-94e7edb1e6b194fffd0e7901c0b32e9fc836bc5b.tar.gz |
6812228 No need to do software checksum for multiple-mblk packets in ip_tcp_input()
6694625 Performance falls off the cliff with large IO sizes
PSARC/2009/039 prefetch interfaces
6812282 PSARC 2009/039 prefetch interfaces
Contributed by Paul Durrant (pdurrant@solarflare.com)
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/ip/ip.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/os/streamio.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/sys/systm.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/poll.c | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/amd64.il | 30 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/ia32.il | 38 | ||||
-rw-r--r-- | usr/src/uts/i86xpv/ml/amd64.il | 30 | ||||
-rw-r--r-- | usr/src/uts/i86xpv/ml/ia32.il | 36 | ||||
-rw-r--r-- | usr/src/uts/intel/amd64/ml/amd64.il | 30 | ||||
-rw-r--r-- | usr/src/uts/intel/asm/cpu.h | 76 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/ml/ia32.il | 39 | ||||
-rw-r--r-- | usr/src/uts/sparc/asm/cpu.h | 96 | ||||
-rw-r--r-- | usr/src/uts/sparc/ml/sparc.il | 16 | ||||
-rw-r--r-- | usr/src/uts/sparc/sys/cpu.h | 25 |
14 files changed, 346 insertions, 105 deletions
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c index 3e39cfb50a..6807eb080f 100644 --- a/usr/src/uts/common/inet/ip/ip.c +++ b/usr/src/uts/common/inet/ip/ip.c @@ -12988,12 +12988,8 @@ ip_tcp_input(mblk_t *mp, ipha_t *ipha, ill_t *recv_ill, boolean_t mctl_present, /* multiple mblks of tcp data? */ if ((mp1 = mp->b_cont) != NULL) { - /* more then two? */ - if (mp1->b_cont != NULL) { - IP_STAT(ipst, ip_multipkttcp); - goto multipkttcp; - } - len += mp1->b_wptr - mp1->b_rptr; + IP_STAT(ipst, ip_multipkttcp); + len += msgdsize(mp1); } up = (uint16_t *)(rptr + IP_SIMPLE_HDR_LENGTH + TCP_PORTS_OFFSET); @@ -13301,10 +13297,8 @@ tcpoptions: } /* Get the total packet length in len, including headers. */ - if (mp->b_cont) { -multipkttcp: + if (mp->b_cont) len = msgdsize(mp); - } /* * Check the TCP checksum by pulling together the pseudo- diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index e4744daa35..7d4d23cfee 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -7499,11 +7499,21 @@ retry: * If a streams message is likely to consist * of many small mblks, it is pulled up into * one continuous chunk of memory. + * The size of the first mblk may be bogus because + * successive read() calls on the socket reduce + * the size of this mblk until it is exhausted + * and then the code walks on to the next. Thus + * the size of the mblk may not be the original size + * that was passed up, it's simply a remainder + * and hence can be very small without any + * implication that the packet is badly fragmented. + * So the size of the possible second mblk is + * used to spot a badly fragmented packet. * see longer comment at top of page * by mblk_pull_len declaration. */ - if (MBLKL(bp) < mblk_pull_len) { + if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) { (void) pullupmsg(bp, -1); } diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h index c7421047cf..6857f5f057 100644 --- a/usr/src/uts/common/sys/systm.h +++ b/usr/src/uts/common/sys/systm.h @@ -247,7 +247,10 @@ extern void longjmp(label_t *) #pragma unknown_control_flow(setjmp) #endif -void prefetch64(caddr_t); +void prefetch_read_once(void *); +void prefetch_write_once(void *); +void prefetch_read_many(void *); +void prefetch_write_many(void *); caddr_t caller(void); caddr_t callee(void); int getpcstack(pc_t *, int); diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index 26dff3c39b..262830dd1f 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -997,8 +997,8 @@ pcacheset_cmp(pollfd_t *current, pollfd_t *cached, pollfd_t *newlist, int n) for (ix = 0; ix < n; ix++) { /* Prefetch 64 bytes worth of 8-byte elements */ if ((ix & 0x7) == 0) { - prefetch64((caddr_t)¤t[ix + 8]); - prefetch64((caddr_t)&cached[ix + 8]); + prefetch_write_many((caddr_t)¤t[ix + 8]); + prefetch_write_many((caddr_t)&cached[ix + 8]); } if (current[ix].fd == cached[ix].fd) { /* diff --git a/usr/src/uts/i86pc/ml/amd64.il b/usr/src/uts/i86pc/ml/amd64.il index 9639af663c..8b694c4915 100644 --- a/usr/src/uts/i86pc/ml/amd64.il +++ b/usr/src/uts/i86pc/ml/amd64.il @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -178,11 +178,27 @@ bsrw %di, %ax .end -/ -/ prefetch 64 bytes -/ - .inline prefetch64,8 - prefetcht0 (%rdi) - prefetcht0 32(%rdi) +/* + * prefetch 64 bytes + */ + + .inline prefetch_read_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_read_once,8 + prefetchnta (%rdi) + prefetchnta 32(%rdi) + .end + + .inline prefetch_write_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_write_once,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) .end diff --git a/usr/src/uts/i86pc/ml/ia32.il b/usr/src/uts/i86pc/ml/ia32.il index e137ca6c1d..d1b10939dc 100644 --- a/usr/src/uts/i86pc/ml/ia32.il +++ b/usr/src/uts/i86pc/ml/ia32.il @@ -177,14 +177,34 @@ bsrw %cx, %ax .end -/ -/ prefetch 64 bytes -/ prefetch is an SSE extension which is not supported on older 32-bit processors -/ so define this as a no-op for now -/ - .inline prefetch64,4 -/ movl (%esp), %eax -/ prefetcht0 (%eax) -/ prefetcht0 32(%eax) +/* + * prefetch 64 bytes + * + * prefetch is an SSE extension which is not supported on older 32-bit processors + * so define this as a no-op for now + */ + + .inline prefetch_read_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + + .inline prefetch_read_once, 4 +/ movl (%esp), %eax +/ prefetchnta (%eax) +/ prefetchnta 32(%eax) + .end + + .inline prefetch_write_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + + .inline prefetch_write_once, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) .end diff --git a/usr/src/uts/i86xpv/ml/amd64.il b/usr/src/uts/i86xpv/ml/amd64.il index bca65ed47f..4874a7b264 100644 --- a/usr/src/uts/i86xpv/ml/amd64.il +++ b/usr/src/uts/i86xpv/ml/amd64.il @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -87,11 +87,27 @@ orq %rdx, %rax .end -/ -/ prefetch 64 bytes -/ - .inline prefetch64,8 - prefetcht0 (%rdi) - prefetcht0 32(%rdi) +/* + * prefetch 64 bytes + */ + + .inline prefetch_read_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_read_once,8 + prefetchnta (%rdi) + prefetchnta 32(%rdi) + .end + + .inline prefetch_write_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_write_once,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) .end diff --git a/usr/src/uts/i86xpv/ml/ia32.il b/usr/src/uts/i86xpv/ml/ia32.il index 8228f3423c..fc41a63bec 100644 --- a/usr/src/uts/i86xpv/ml/ia32.il +++ b/usr/src/uts/i86xpv/ml/ia32.il @@ -100,13 +100,33 @@ rdtsc .end -/ -/ prefetch 64 bytes -/ prefetch is an SSE extension which is not supported on older 32-bit processors -/ so define this as a no-op for now -/ - .inline prefetch64, 4 +/* + * prefetch 64 bytes + * + * prefetch is an SSE extension which is not supported on older 32-bit processors + * so define this as a no-op for now + */ + + .inline prefetch_read_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + + .inline prefetch_read_once, 4 +/ movl (%esp), %eax +/ prefetchnta (%eax) +/ prefetchnta 32(%eax) + .end + + .inline prefetch_write_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + + .inline prefetch_write_once, 4 / movl (%esp), %eax -/ prefetcht0 (%eax) -/ prefetcht0 32(%eax) +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) .end diff --git a/usr/src/uts/intel/amd64/ml/amd64.il b/usr/src/uts/intel/amd64/ml/amd64.il index 297fa134f2..2afcf93bed 100644 --- a/usr/src/uts/intel/amd64/ml/amd64.il +++ b/usr/src/uts/intel/amd64/ml/amd64.il @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -206,10 +206,26 @@ swapgs .end -/ -/ prefetch 64 bytes -/ - .inline prefetch64,8 - prefetcht0 (%rdi) - prefetcht0 32(%rdi) +/* + * prefetch 64 bytes + */ + + .inline prefetch_read_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_read_once,8 + prefetchnta (%rdi) + prefetchnta 32(%rdi) + .end + + .inline prefetch_write_many,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) + .end + + .inline prefetch_write_once,8 + prefetcht0 (%rdi) + prefetcht0 32(%rdi) .end diff --git a/usr/src/uts/intel/asm/cpu.h b/usr/src/uts/intel/asm/cpu.h index 937c3c4103..c887111058 100644 --- a/usr/src/uts/intel/asm/cpu.h +++ b/usr/src/uts/intel/asm/cpu.h @@ -40,6 +40,57 @@ extern __inline__ void ht_pause(void) "pause"); } +/* + * prefetch 64 bytes + * + * prefetch is an SSE extension which is not supported on + * older 32-bit processors, so define this as a no-op for now + */ + +extern __inline__ void prefetch_read_many(void *addr) +{ +#if defined(__amd64) + __asm__( + "prefetcht0 (%0);" + "prefetcht0 32(%0);" + : /* no output */ + : "r" (addr)); +#endif /* __amd64 */ +} + +extern __inline__ void prefetch_read_once(void *addr) +{ +#if defined(__amd64) + __asm__( + "prefetchnta (%0);" + "prefetchnta 32(%0);" + : /* no output */ + : "r" (addr)); +#endif /* __amd64 */ +} + +extern __inline__ void prefetch_write_many(void *addr) +{ +#if defined(__amd64) + __asm__( + "prefetcht0 (%0);" + "prefetcht0 32(%0);" + : /* no output */ + : "r" (addr)); +#endif /* __amd64 */ +} + +extern __inline__ void prefetch_write_once(void *addr) +{ +#if defined(__amd64) + __asm__( + "prefetcht0 (%0);" + "prefetcht0 32(%0);" + : /* no output */ + : "r" (addr)); +#endif /* __amd64 */ +} + #if !defined(__xpv) extern __inline__ void cli(void) @@ -112,31 +163,6 @@ extern __inline__ void __swapgs(void) #endif /* !__lint && __GNUC__ */ -#if !defined(__lint) && defined(__GNUC__) - -#if defined(__i386) || defined(__amd64) - -/* - * prefetch 64 bytes - * prefetch is an SSE extension which is not supported on - * older 32-bit processors, so define this as a no-op for now - */ - -extern __inline__ void prefetch64(caddr_t addr) -{ -#if defined(__amd64) - __asm__ __volatile__( - "prefetcht0 (%0);" - "prefetcht0 32(%0)" - : /* no output */ - : "r" (addr)); -#endif /* __amd64 */ -} - -#endif /* __i386 || __amd64 */ - -#endif /* !__lint && __GNUC__ */ - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/intel/ia32/ml/ia32.il b/usr/src/uts/intel/ia32/ml/ia32.il index 4b0b785784..9d763e0ab3 100644 --- a/usr/src/uts/intel/ia32/ml/ia32.il +++ b/usr/src/uts/intel/ia32/ml/ia32.il @@ -185,13 +185,34 @@ nop / so we're using "F3 90" opcode directly .end -/ -/ prefetch 64 bytes -/ prefetch is an SSE extension which is not supported on older 32-bit processors -/ so define this as a no-op for now -/ - .inline prefetch64,4 -/ movl (%esp), %eax -/ prefetcht0 (%eax) -/ prefetcht0 32(%eax) +/* + * prefetch 64 bytes + * + * prefetch is an SSE extension which is not supported on older 32-bit processors + * so define this as a no-op for now + */ + + .inline prefetch_read_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) .end + + .inline prefetch_read_once, 4 +/ movl (%esp), %eax +/ prefetchnta (%eax) +/ prefetchnta 32(%eax) + .end + + .inline prefetch_write_many, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + + .inline prefetch_write_once, 4 +/ movl (%esp), %eax +/ prefetcht0 (%eax) +/ prefetcht0 32(%eax) + .end + diff --git a/usr/src/uts/sparc/asm/cpu.h b/usr/src/uts/sparc/asm/cpu.h new file mode 100644 index 0000000000..12523a475d --- /dev/null +++ b/usr/src/uts/sparc/asm/cpu.h @@ -0,0 +1,96 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ASM_CPU_H +#define _ASM_CPU_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(__lint) && defined(__GNUC__) + +extern __inline__ void +prefetch_read_many(void *addr) +{ +#if defined(__sparcv9) + __asm__ __volatile__( + "prefetch [%0],#n_reads\n\t" + : "=r" (addr) + : "0" (addr)); +#else +#error "port me" +#endif +} + +extern __inline__ void +prefetch_read_once(void *addr) +{ +#if defined(__sparcv9) + __asm__ __volatile__( + "prefetch [%0],#one_read\n\t" + : "=r" (addr) + : "0" (addr)); +#else +#error "port me" +#endif +} + +extern __inline__ void +prefetch_write_many(void *addr) +{ +#if defined(__sparcv9) + __asm__ __volatile__( + "prefetch [%0],#n_writes\n\t" + : "=r" (addr) + : "0" (addr)); +#else +#error "port me" +#endif +} + +extern __inline__ void +prefetch_write_once(void *addr) +{ +#if defined(__sparcv9) + __asm__ __volatile__( + "prefetch [%0],#one_write\n\t" + : "=r" (addr) + : "0" (addr)); +#else +#error "port me" +#endif +} + +#endif /* !__lint && __GNUC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _ASM_CPU_H */ diff --git a/usr/src/uts/sparc/ml/sparc.il b/usr/src/uts/sparc/ml/sparc.il index 4b8a186588..8337c30728 100644 --- a/usr/src/uts/sparc/ml/sparc.il +++ b/usr/src/uts/sparc/ml/sparc.il @@ -1,5 +1,5 @@ ! -! Copyright 2008 Sun Microsystems, Inc. All rights reserved. +! Copyright 2009 Sun Microsystems, Inc. All rights reserved. ! Use is subject to license terms. ! ! CDDL HEADER START @@ -53,6 +53,18 @@ ! prefetch 64 bytes into L2-cache - .inline prefetch64,8 + .inline prefetch_read_many,8 + prefetch [%o0], #n_reads + .end + + .inline prefetch_read_once,8 + prefetch [%o0], #one_read + .end + + .inline prefetch_write_many,8 prefetch [%o0], #n_writes .end + + .inline prefetch_write_once,8 + prefetch [%o0], #one_write + .end diff --git a/usr/src/uts/sparc/sys/cpu.h b/usr/src/uts/sparc/sys/cpu.h index adf3d98c42..126b4826b8 100644 --- a/usr/src/uts/sparc/sys/cpu.h +++ b/usr/src/uts/sparc/sys/cpu.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,10 +31,17 @@ */ #include <sys/bustypes.h> +#if defined(_KERNEL) +#if defined(__GNUC__) && defined(_ASM_INLINES) +#include <asm/cpu.h> +#endif +#endif /* _KERNEL */ + #ifdef __cplusplus extern "C" { #endif + /* * Global kernel variables of interest */ @@ -70,22 +77,6 @@ extern int vac; extern void cpu_smt_pause(); #define SMT_PAUSE() { if (&cpu_smt_pause) cpu_smt_pause(); } -/* - * used to preload L2 cache - */ -#if !defined(__lint) && defined(__GNUC__) - -extern __inline__ void -prefetch64(caddr_t addr) -{ - __asm__ __volatile__( - "prefetch [%0], #n_writes\n\t" - : "=r" (addr) - : "0" (addr)); -} - -#endif /* !__lint && __GNUC__ */ - #endif /* defined(_KERNEL) && !defined(_ASM) */ #ifdef __cplusplus |