summaryrefslogtreecommitdiff
path: root/usr/src/uts/intel/ia32/ml/sseblk.s
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/intel/ia32/ml/sseblk.s')
-rw-r--r--usr/src/uts/intel/ia32/ml/sseblk.s250
1 files changed, 9 insertions, 241 deletions
diff --git a/usr/src/uts/intel/ia32/ml/sseblk.s b/usr/src/uts/intel/ia32/ml/sseblk.s
index 092b3e52fd..836b6b6c97 100644
--- a/usr/src/uts/intel/ia32/ml/sseblk.s
+++ b/usr/src/uts/intel/ia32/ml/sseblk.s
@@ -23,25 +23,21 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
#include <sys/asm_linkage.h>
#include <sys/regset.h>
#include <sys/privregs.h>
-#if defined(__lint)
-#include <sys/types.h>
-#include <sys/archsystm.h>
-#else
#include "assym.h"
-#endif
/*
* Do block operations using Streaming SIMD extensions
*/
#if defined(DEBUG)
-#if defined(__amd64)
#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \
movq %gs:CPU_THREAD, t; \
movsbl T_PREEMPT(t), r32; \
@@ -53,18 +49,6 @@
xorl %eax, %eax; \
call panic; \
5:
-#elif defined(__i386)
-#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \
- movl %gs:CPU_THREAD, t; \
- movsbl T_PREEMPT(t), r32; \
- testl r32, r32; \
- jne 5f; \
- pushl %ebp; \
- movl %esp, %ebp; \
- pushl $msg; \
- call panic; \
-5:
-#endif /* __i386 */
#else /* DEBUG */
#define ASSERT_KPREEMPT_DISABLED(t, r32, msg)
#endif /* DEBUG */
@@ -77,23 +61,6 @@
#error "mucked up constants"
#endif
-#if defined(__lint)
-
-/*ARGSUSED*/
-void
-hwblkclr(void *addr, size_t size)
-{}
-
-#else /* __lint */
-
-#if defined(__amd64)
-#define ADD addq
-#define SUB subq
-#else
-#define ADD addl
-#define SUB subl
-#endif
-
#define SAVE_XMM0(r) \
SAVE_XMM_PROLOG(r, 1); \
movdqa %xmm0, (r)
@@ -106,8 +73,8 @@ hwblkclr(void *addr, size_t size)
movntdq %xmm0, 0x10(dst); \
movntdq %xmm0, 0x20(dst); \
movntdq %xmm0, 0x30(dst); \
- ADD $BLOCKSIZE, dst; \
- SUB $1, cnt
+ addq $BLOCKSIZE, dst; \
+ subq $1, cnt
#define ZERO_LOOP_FINI_XMM(dst) \
mfence
@@ -116,8 +83,6 @@ hwblkclr(void *addr, size_t size)
movdqa 0x0(r), %xmm0; \
RSTOR_XMM_EPILOG(r, 1)
-#if defined(__amd64)
-
/*
* %rdi dst
* %rsi size
@@ -158,65 +123,6 @@ hwblkclr(void *addr, size_t size)
jmp bzero
SET_SIZE(hwblkclr)
-#elif defined(__i386)
-
- /*
- * %eax dst
- * %ecx size in bytes, loop count
- * %ebx saved %cr0 (#if DEBUG then t->t_preempt)
- * %edi pointer to %xmm register save area
- */
- ENTRY(hwblkclr)
- movl 4(%esp), %eax
- movl 8(%esp), %ecx
- testl $BLOCKMASK, %eax /* address must be BLOCKSIZE aligned */
- jne .dobzero
- cmpl $BLOCKSIZE, %ecx /* size must be at least BLOCKSIZE */
- jl .dobzero
- testl $BLOCKMASK, %ecx /* .. and be a multiple of BLOCKSIZE */
- jne .dobzero
- shrl $BLOCKSHIFT, %ecx
- movl 0xc(%esp), %edx
- pushl %ebx
-
- pushl %esi
- ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
- popl %esi
- movl %cr0, %ebx
- clts
- testl $CR0_TS, %ebx
- jnz 1f
-
- pushl %edi
- SAVE_XMM0(%edi)
-1: ZERO_LOOP_INIT_XMM(%eax)
-9: ZERO_LOOP_BODY_XMM(%eax, %ecx)
- jnz 9b
- ZERO_LOOP_FINI_XMM(%eax)
-
- testl $CR0_TS, %ebx
- jnz 2f
- RSTOR_XMM0(%edi)
- popl %edi
-2: movl %ebx, %cr0
- popl %ebx
- ret
-.dobzero:
- jmp bzero
- SET_SIZE(hwblkclr)
-
-#endif /* __i386 */
-#endif /* __lint */
-
-
-#if defined(__lint)
-
-/*ARGSUSED*/
-void
-hwblkpagecopy(const void *src, void *dst)
-{}
-
-#else /* __lint */
#define PREFETCH_START(src) \
prefetchnta 0x0(src); \
@@ -244,7 +150,7 @@ hwblkpagecopy(const void *src, void *dst)
movdqa 0x50(src), %xmm5; \
movdqa 0x60(src), %xmm6; \
movdqa 0x70(src), %xmm7; \
- ADD $0x80, src
+ addq $0x80, src
#define COPY_LOOP_BODY_XMM(src, dst, cnt) \
prefetchnta 0x80(src); \
@@ -265,10 +171,10 @@ hwblkpagecopy(const void *src, void *dst)
movntdq %xmm7, 0x70(dst); \
movdqa 0x40(src), %xmm4; \
movdqa 0x50(src), %xmm5; \
- ADD $0x80, dst; \
+ addq $0x80, dst; \
movdqa 0x60(src), %xmm6; \
movdqa 0x70(src), %xmm7; \
- ADD $0x80, src; \
+ addq $0x80, src; \
subl $1, cnt
#define COPY_LOOP_FINI_XMM(dst) \
@@ -292,8 +198,6 @@ hwblkpagecopy(const void *src, void *dst)
movdqa 0x70(r), %xmm7; \
RSTOR_XMM_EPILOG(r, 8)
-#if defined(__amd64)
-
/*
* %rdi src
* %rsi dst
@@ -330,70 +234,6 @@ hwblkpagecopy(const void *src, void *dst)
ret
SET_SIZE(hwblkpagecopy)
-#elif defined(__i386)
-
- /*
- * %eax src
- * %edx dst
- * %ecx loop count
- * %ebx saved %cr0 (#if DEBUG then t->t_prempt)
- * %edi pointer to %xmm register save area
- * %esi #if DEBUG temporary thread pointer
- */
- ENTRY(hwblkpagecopy)
- movl 4(%esp), %eax
- movl 8(%esp), %edx
- PREFETCH_START(%eax)
- pushl %ebx
- /*
- * PAGESIZE is 4096, each loop moves 128 bytes, but the initial
- * load and final store save us one loop count
- */
- movl $_CONST(32 - 1), %ecx
- pushl %esi
- ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled)
- popl %esi
- movl %cr0, %ebx
- clts
- testl $CR0_TS, %ebx
- jnz 3f
- pushl %edi
- SAVE_XMMS(%edi)
-3: COPY_LOOP_INIT_XMM(%eax)
-4: COPY_LOOP_BODY_XMM(%eax, %edx, %ecx)
- jnz 4b
- COPY_LOOP_FINI_XMM(%edx)
- testl $CR0_TS, %ebx
- jnz 5f
- RSTOR_XMMS(%edi)
- popl %edi
-5: movl %ebx, %cr0
- popl %ebx
- mfence
- ret
- SET_SIZE(hwblkpagecopy)
-
-#endif /* __i386 */
-#endif /* __lint */
-
-#if defined(__lint)
-
-/*
- * Version of hwblkclr which doesn't use XMM registers.
- * Note that it requires aligned dst and len.
- *
- * XXPV This needs to be performance tuned at some point.
- * Is 4 the best number of iterations to unroll?
- */
-/*ARGSUSED*/
-void
-block_zero_no_xmm(void *dst, int len)
-{}
-
-#else /* __lint */
-
-#if defined(__amd64)
-
ENTRY(block_zero_no_xmm)
pushq %rbp
movq %rsp, %rbp
@@ -412,49 +252,6 @@ block_zero_no_xmm(void *dst, int len)
ret
SET_SIZE(block_zero_no_xmm)
-#elif defined(__i386)
-
- ENTRY(block_zero_no_xmm)
- pushl %ebp
- movl %esp, %ebp
- xorl %eax, %eax
- movl 8(%ebp), %edx
- movl 12(%ebp), %ecx
- addl %ecx, %edx
- negl %ecx
-1:
- movnti %eax, (%edx, %ecx)
- movnti %eax, 4(%edx, %ecx)
- movnti %eax, 8(%edx, %ecx)
- movnti %eax, 12(%edx, %ecx)
- addl $16, %ecx
- jnz 1b
- mfence
- leave
- ret
- SET_SIZE(block_zero_no_xmm)
-
-#endif /* __i386 */
-#endif /* __lint */
-
-
-#if defined(__lint)
-
-/*
- * Version of page copy which doesn't use XMM registers.
- *
- * XXPV This needs to be performance tuned at some point.
- * Is 4 the right number of iterations to unroll?
- * Is the load/store order optimal? Should it use prefetch?
- */
-/*ARGSUSED*/
-void
-page_copy_no_xmm(void *dst, void *src)
-{}
-
-#else /* __lint */
-
-#if defined(__amd64)
ENTRY(page_copy_no_xmm)
movq $MMU_STD_PAGESIZE, %rcx
@@ -476,36 +273,7 @@ page_copy_no_xmm(void *dst, void *src)
ret
SET_SIZE(page_copy_no_xmm)
-#elif defined(__i386)
-
- ENTRY(page_copy_no_xmm)
- pushl %esi
- movl $MMU_STD_PAGESIZE, %ecx
- movl 8(%esp), %edx
- movl 12(%esp), %esi
- addl %ecx, %edx
- addl %ecx, %esi
- negl %ecx
-1:
- movl (%esi, %ecx), %eax
- movnti %eax, (%edx, %ecx)
- movl 4(%esi, %ecx), %eax
- movnti %eax, 4(%edx, %ecx)
- movl 8(%esi, %ecx), %eax
- movnti %eax, 8(%edx, %ecx)
- movl 12(%esi, %ecx), %eax
- movnti %eax, 12(%edx, %ecx)
- addl $16, %ecx
- jnz 1b
- mfence
- popl %esi
- ret
- SET_SIZE(page_copy_no_xmm)
-
-#endif /* __i386 */
-#endif /* __lint */
-
-#if defined(DEBUG) && !defined(__lint)
+#if defined(DEBUG)
.text
.not_disabled:
.string "sseblk: preemption not disabled!"