summaryrefslogtreecommitdiff
path: root/usr/src/lib/libsum/common/sum-att.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libsum/common/sum-att.c')
-rw-r--r--usr/src/lib/libsum/common/sum-att.c66
1 files changed, 65 insertions, 1 deletions
diff --git a/usr/src/lib/libsum/common/sum-att.c b/usr/src/lib/libsum/common/sum-att.c
index ef09d0c2e9..27b66c7aa3 100644
--- a/usr/src/lib/libsum/common/sum-att.c
+++ b/usr/src/lib/libsum/common/sum-att.c
@@ -1,7 +1,7 @@
/***********************************************************************
* *
* This software is part of the ast package *
-* Copyright (c) 1996-2008 AT&T Intellectual Property *
+* Copyright (c) 1996-2009 AT&T Intellectual Property *
* and is licensed under the *
* Common Public License, Version 1.0 *
* by AT&T Intellectual Property *
@@ -35,6 +35,69 @@
#define att_data long_data
#define att_scale 512
+#if defined(__SUNPRO_C) || defined(__GNUC__)
+
+#if defined(__SUNPRO_C)
+# include <sun_prefetch.h>
+# define sum_prefetch(addr) sun_prefetch_read_many((void *)(addr))
+#elif defined(__GNUC__)
+# define sum_prefetch(addr) __builtin_prefetch((addr), 0, 3)
+#else
+# error Unknown compiler
+#endif
+
+#define CBLOCK_SIZE (64)
+#pragma unroll(16)
+
+/* Inmos transputer would love this algorithm */
+static int
+att_block(register Sum_t* p, const void* s, size_t n)
+{
+ register uint32_t c = ((Integral_t*)p)->sum;
+ register const unsigned char* b = (const unsigned char*)s;
+ register const unsigned char* e = b + n;
+ register uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+ register unsigned int i;
+
+ s0=s1=s2=s3=s4=s5=s6=s7=0U;
+
+ sum_prefetch((void *)b);
+
+ while (n > CBLOCK_SIZE)
+ {
+ sum_prefetch((b+CBLOCK_SIZE));
+
+ /* Compiler will unroll for() loops per #pragma unroll */
+ for (i=0 ; i < (CBLOCK_SIZE/8) ; i++)
+ {
+ /*
+ * use s0-s7 to decouple calculations (this improves pipelining)
+ * because each operation is completely independent from it's
+ * siblings
+ */
+ s0+=b[0];
+ s1+=b[1];
+ s2+=b[2];
+ s3+=b[3];
+ s4+=b[4];
+ s5+=b[5];
+ s6+=b[6];
+ s7+=b[7];
+
+ b+=8;
+ n-=8;
+ }
+ }
+
+ c+=s0+s1+s2+s3+s4+s5+s6+s7;
+
+ while (b < e)
+ c += *b++;
+ ((Integral_t*)p)->sum = c;
+ return 0;
+}
+
+#else
static int
att_block(register Sum_t* p, const void* s, size_t n)
{
@@ -47,6 +110,7 @@ att_block(register Sum_t* p, const void* s, size_t n)
((Integral_t*)p)->sum = c;
return 0;
}
+#endif /* defined(__SUNPRO_C) || defined(__GNUC__) */
static int
att_done(Sum_t* p)