diff options
Diffstat (limited to 'usr/src/lib/libsum/common/sum-att.c')
-rw-r--r-- | usr/src/lib/libsum/common/sum-att.c | 66 |
1 files changed, 65 insertions, 1 deletions
diff --git a/usr/src/lib/libsum/common/sum-att.c b/usr/src/lib/libsum/common/sum-att.c index ef09d0c2e9..27b66c7aa3 100644 --- a/usr/src/lib/libsum/common/sum-att.c +++ b/usr/src/lib/libsum/common/sum-att.c @@ -1,7 +1,7 @@ /*********************************************************************** * * * This software is part of the ast package * -* Copyright (c) 1996-2008 AT&T Intellectual Property * +* Copyright (c) 1996-2009 AT&T Intellectual Property * * and is licensed under the * * Common Public License, Version 1.0 * * by AT&T Intellectual Property * @@ -35,6 +35,69 @@ #define att_data long_data #define att_scale 512 +#if defined(__SUNPRO_C) || defined(__GNUC__) + +#if defined(__SUNPRO_C) +# include <sun_prefetch.h> +# define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr)) +#elif defined(__GNUC__) +# define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3) +#else +# error Unknown compiler +#endif + +#define CBLOCK_SIZE (64) +#pragma unroll(16) + +/* Inmos transputer would love this algorithm */ +static int +att_block(register Sum_t* p, const void* s, size_t n) +{ + register uint32_t c = ((Integral_t*)p)->sum; + register const unsigned char* b = (const unsigned char*)s; + register const unsigned char* e = b + n; + register uint32_t s0, s1, s2, s3, s4, s5, s6, s7; + register unsigned int i; + + s0=s1=s2=s3=s4=s5=s6=s7=0U; + + sum_prefetch((void *)b); + + while (n > CBLOCK_SIZE) + { + sum_prefetch((b+CBLOCK_SIZE)); + + /* Compiler will unroll for() loops per #pragma unroll */ + for (i=0 ; i < (CBLOCK_SIZE/8) ; i++) + { + /* + * use s0-s7 to decouple calculations (this improves pipelining) + * because each operation is completely independent from it's + * siblings + */ + s0+=b[0]; + s1+=b[1]; + s2+=b[2]; + s3+=b[3]; + s4+=b[4]; + s5+=b[5]; + s6+=b[6]; + s7+=b[7]; + + b+=8; + n-=8; + } + } + + c+=s0+s1+s2+s3+s4+s5+s6+s7; + + while (b < e) + c += *b++; + ((Integral_t*)p)->sum = c; + return 0; +} + +#else static int att_block(register Sum_t* p, const void* s, size_t n) { @@ -47,6 +110,7 @@ att_block(register Sum_t* p, const void* s, size_t n) ((Integral_t*)p)->sum = c; return 0; } +#endif /* defined(__SUNPRO_C) || defined(__GNUC__) */ static int att_done(Sum_t* p) |