summaryrefslogtreecommitdiff
path: root/usr/src/uts/intel/ml/lock_prim.s
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/intel/ml/lock_prim.s')
-rw-r--r--usr/src/uts/intel/ml/lock_prim.s714
1 files changed, 714 insertions, 0 deletions
diff --git a/usr/src/uts/intel/ml/lock_prim.s b/usr/src/uts/intel/ml/lock_prim.s
new file mode 100644
index 0000000000..4267561bf7
--- /dev/null
+++ b/usr/src/uts/intel/ml/lock_prim.s
@@ -0,0 +1,714 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include "assym.h"
+
+#include <sys/mutex_impl.h>
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/rwlock_impl.h>
+#include <sys/lockstat.h>
+
+/*
+ * lock_try(lp), ulock_try(lp)
+ * - returns non-zero on success.
+ * - doesn't block interrupts so don't use this to spin on a lock.
+ *
+ * ulock_try() is for a lock in the user address space.
+ */
+
+ .globl kernelbase
+
+ ENTRY(lock_try)
+ movb $-1, %dl
+ movzbq %dl, %rax
+ xchgb %dl, (%rdi)
+ xorb %dl, %al
+.lock_try_lockstat_patch_point:
+ ret
+ testb %al, %al
+ jnz 0f
+ ret
+0:
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movq %rdi, %rsi /* rsi = lock addr */
+ movl $LS_LOCK_TRY_ACQUIRE, %edi /* edi = event */
+ jmp lockstat_wrapper
+ SET_SIZE(lock_try)
+
+ ENTRY(lock_spin_try)
+ movb $-1, %dl
+ movzbq %dl, %rax
+ xchgb %dl, (%rdi)
+ xorb %dl, %al
+ ret
+ SET_SIZE(lock_spin_try)
+
+ ENTRY(ulock_try)
+#ifdef DEBUG
+ movq kernelbase(%rip), %rax
+ cmpq %rax, %rdi /* test uaddr < kernelbase */
+ jb ulock_pass /* uaddr < kernelbase, proceed */
+
+ movq %rdi, %r12 /* preserve lock ptr for debugging */
+ leaq .ulock_panic_msg(%rip), %rdi
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ xorl %eax, %eax /* clear for varargs */
+ call panic
+
+#endif /* DEBUG */
+
+ulock_pass:
+ movl $1, %eax
+ xchgb %al, (%rdi)
+ xorb $1, %al
+ ret
+ SET_SIZE(ulock_try)
+
+#ifdef DEBUG
+ .data
+.ulock_panic_msg:
+ .string "ulock_try: Argument is above kernelbase"
+ .text
+#endif /* DEBUG */
+
+/*
+ * lock_clear(lp)
+ * - unlock lock without changing interrupt priority level.
+ */
+
+ ENTRY(lock_clear)
+ movb $0, (%rdi)
+.lock_clear_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */
+ jmp lockstat_wrapper
+ SET_SIZE(lock_clear)
+
+ ENTRY(ulock_clear)
+#ifdef DEBUG
+ movq kernelbase(%rip), %rcx
+ cmpq %rcx, %rdi /* test uaddr < kernelbase */
+ jb ulock_clr /* uaddr < kernelbase, proceed */
+
+ leaq .ulock_clear_msg(%rip), %rdi
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ xorl %eax, %eax /* clear for varargs */
+ call panic
+#endif
+
+ulock_clr:
+ movb $0, (%rdi)
+ ret
+ SET_SIZE(ulock_clear)
+
+#ifdef DEBUG
+ .data
+.ulock_clear_msg:
+ .string "ulock_clear: Argument is above kernelbase"
+ .text
+#endif /* DEBUG */
+
+
+/*
+ * lock_set_spl(lock_t *lp, int new_pil, u_short *old_pil)
+ * Drops lp, sets pil to new_pil, stores old pil in *old_pil.
+ */
+
+ ENTRY(lock_set_spl)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $32, %rsp
+ movl %esi, 8(%rsp) /* save priority level */
+ movq %rdx, 16(%rsp) /* save old pil ptr */
+ movq %rdi, 24(%rsp) /* save lock pointer */
+ movl %esi, %edi /* pass priority level */
+ call splr /* raise priority level */
+ movq 24(%rsp), %rdi /* rdi = lock addr */
+ movb $-1, %dl
+ xchgb %dl, (%rdi) /* try to set lock */
+ testb %dl, %dl /* did we get the lock? ... */
+ jnz .lss_miss /* ... no, go to C for the hard case */
+ movq 16(%rsp), %rdx /* rdx = old pil addr */
+ movw %ax, (%rdx) /* store old pil */
+ leave
+.lock_set_spl_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_SET_SPL_ACQUIRE, %edi
+ jmp lockstat_wrapper
+.lss_miss:
+ movl 8(%rsp), %esi /* new_pil */
+ movq 16(%rsp), %rdx /* old_pil_addr */
+ movl %eax, %ecx /* original pil */
+ leave /* unwind stack */
+ jmp lock_set_spl_spin
+ SET_SIZE(lock_set_spl)
+
+/*
+ * void
+ * lock_init(lp)
+ */
+
+ ENTRY(lock_init)
+ movb $0, (%rdi)
+ ret
+ SET_SIZE(lock_init)
+
+/*
+ * void
+ * lock_set(lp)
+ */
+
+ ENTRY(lock_set)
+ movb $-1, %dl
+ xchgb %dl, (%rdi) /* try to set lock */
+ testb %dl, %dl /* did we get it? */
+ jnz lock_set_spin /* no, go to C for the hard case */
+.lock_set_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi /* rsi = lock addr */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_SET_ACQUIRE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(lock_set)
+
+/*
+ * lock_clear_splx(lp, s)
+ */
+
+ ENTRY(lock_clear_splx)
+ movb $0, (%rdi) /* clear lock */
+.lock_clear_splx_lockstat_patch_point:
+ jmp 0f
+0:
+ movl %esi, %edi /* arg for splx */
+ jmp splx /* let splx do its thing */
+.lock_clear_splx_lockstat:
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ subq $16, %rsp /* space to save args across splx */
+ movq %rdi, 8(%rsp) /* save lock ptr across splx call */
+ movl %esi, %edi /* arg for splx */
+ call splx /* lower the priority */
+ movq 8(%rsp), %rsi /* rsi = lock ptr */
+ leave /* unwind stack */
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */
+ movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(lock_clear_splx)
+
+#if defined(__GNUC_AS__)
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
+ (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2)
+
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
+ (.lock_clear_splx_lockstat_patch_point + 1)
+#else
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \
+ [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2]
+
+#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \
+ [.lock_clear_splx_lockstat_patch_point + 1]
+#endif
+
+/*
+ * mutex_enter() and mutex_exit().
+ *
+ * These routines handle the simple cases of mutex_enter() (adaptive
+ * lock, not held) and mutex_exit() (adaptive lock, held, no waiters).
+ * If anything complicated is going on we punt to mutex_vector_enter().
+ *
+ * mutex_tryenter() is similar to mutex_enter() but returns zero if
+ * the lock cannot be acquired, nonzero on success.
+ *
+ * If mutex_exit() gets preempted in the window between checking waiters
+ * and clearing the lock, we can miss wakeups. Disabling preemption
+ * in the mutex code is prohibitively expensive, so instead we detect
+ * mutex preemption by examining the trapped PC in the interrupt path.
+ * If we interrupt a thread in mutex_exit() that has not yet cleared
+ * the lock, cmnint() resets its PC back to the beginning of
+ * mutex_exit() so it will check again for waiters when it resumes.
+ *
+ * The lockstat code below is activated when the lockstat driver
+ * calls lockstat_hot_patch() to hot-patch the kernel mutex code.
+ * Note that we don't need to test lockstat_event_mask here -- we won't
+ * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats.
+ */
+
+ ENTRY_NP(mutex_enter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz mutex_vector_enter
+.mutex_enter_lockstat_patch_point:
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_enter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+ movq %rdi, %rsi
+ movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+/*
+ * expects %rdx=thread, %rsi=lock, %edi=lockstat event
+ */
+ ALTENTRY(lockstat_wrapper)
+ incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */
+ leaq lockstat_probemap(%rip), %rax
+ movl (%rax, %rdi, DTRACE_IDSIZE), %eax
+ testl %eax, %eax /* check for non-zero probe */
+ jz 1f
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ movl %eax, %edi
+ movq lockstat_probe, %rax
+ INDIRECT_CALL_REG(rax)
+ leave /* unwind stack */
+1:
+ movq %gs:CPU_THREAD, %rdx /* reload thread ptr */
+ decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */
+ movl $1, %eax /* return success if tryenter */
+ ret
+ SET_SIZE(lockstat_wrapper)
+ SET_SIZE(mutex_enter)
+
+/*
+ * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event
+ */
+ ENTRY(lockstat_wrapper_arg)
+ incb T_LOCKSTAT(%rcx) /* curthread->t_lockstat++ */
+ leaq lockstat_probemap(%rip), %rax
+ movl (%rax, %rdi, DTRACE_IDSIZE), %eax
+ testl %eax, %eax /* check for non-zero probe */
+ jz 1f
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+ movl %eax, %edi
+ movq lockstat_probe, %rax
+ INDIRECT_CALL_REG(rax)
+ leave /* unwind stack */
+1:
+ movq %gs:CPU_THREAD, %rdx /* reload thread ptr */
+ decb T_LOCKSTAT(%rdx) /* curthread->t_lockstat-- */
+ movl $1, %eax /* return success if tryenter */
+ ret
+ SET_SIZE(lockstat_wrapper_arg)
+
+
+ ENTRY(mutex_tryenter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz mutex_vector_tryenter
+ not %eax /* return success (nonzero) */
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_tryenter_lockstat_patch_point:
+.mutex_tryenter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+.mutex_tryenter_lockstat_patch_point:
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+ movq %rdi, %rsi
+ movl $LS_MUTEX_ENTER_ACQUIRE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(mutex_tryenter)
+
+ ENTRY(mutex_adaptive_tryenter)
+ movq %gs:CPU_THREAD, %rdx /* rdx = thread ptr */
+ xorl %eax, %eax /* rax = 0 (unheld adaptive) */
+ lock
+ cmpxchgq %rdx, (%rdi)
+ jnz 0f
+ not %eax /* return success (nonzero) */
+#if defined(OPTERON_WORKAROUND_6323525)
+.mutex_atryenter_6323525_patch_point:
+ ret /* nop space for lfence */
+ nop
+ nop
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+0:
+ xorl %eax, %eax /* return failure */
+ ret
+ SET_SIZE(mutex_adaptive_tryenter)
+
+ .globl mutex_owner_running_critical_start
+
+ ENTRY(mutex_owner_running)
+mutex_owner_running_critical_start:
+ movq (%rdi), %r11 /* get owner field */
+ andq $MUTEX_THREAD, %r11 /* remove waiters bit */
+ cmpq $0, %r11 /* if free, skip */
+ je 1f /* go return 0 */
+ movq T_CPU(%r11), %r8 /* get owner->t_cpu */
+ movq CPU_THREAD(%r8), %r9 /* get t_cpu->cpu_thread */
+.mutex_owner_running_critical_end:
+ cmpq %r11, %r9 /* owner == running thread? */
+ je 2f /* yes, go return cpu */
+1:
+ xorq %rax, %rax /* return 0 */
+ ret
+2:
+ movq %r8, %rax /* return cpu */
+ ret
+ SET_SIZE(mutex_owner_running)
+
+ .globl mutex_owner_running_critical_size
+ .type mutex_owner_running_critical_size, @object
+ .align CPTRSIZE
+mutex_owner_running_critical_size:
+ .quad .mutex_owner_running_critical_end - mutex_owner_running_critical_start
+ SET_SIZE(mutex_owner_running_critical_size)
+
+ .globl mutex_exit_critical_start
+
+ ENTRY(mutex_exit)
+mutex_exit_critical_start: /* If interrupted, restart here */
+ movq %gs:CPU_THREAD, %rdx
+ cmpq %rdx, (%rdi)
+ jne mutex_vector_exit /* wrong type or wrong owner */
+ movq $0, (%rdi) /* clear owner AND lock */
+.mutex_exit_critical_end:
+.mutex_exit_lockstat_patch_point:
+ ret
+ movq %rdi, %rsi
+ movl $LS_MUTEX_EXIT_RELEASE, %edi
+ jmp lockstat_wrapper
+ SET_SIZE(mutex_exit)
+
+ .globl mutex_exit_critical_size
+ .type mutex_exit_critical_size, @object
+ .align CPTRSIZE
+mutex_exit_critical_size:
+ .quad .mutex_exit_critical_end - mutex_exit_critical_start
+ SET_SIZE(mutex_exit_critical_size)
+
+/*
+ * rw_enter() and rw_exit().
+ *
+ * These routines handle the simple cases of rw_enter (write-locking an unheld
+ * lock or read-locking a lock that's neither write-locked nor write-wanted)
+ * and rw_exit (no waiters or not the last reader). If anything complicated
+ * is going on we punt to rw_enter_sleep() and rw_exit_wakeup(), respectively.
+ */
+
+ ENTRY(rw_enter)
+ cmpl $RW_WRITER, %esi
+ je .rw_write_enter
+ movq (%rdi), %rax /* rax = old rw_wwwh value */
+ testl $RW_WRITE_LOCKED|RW_WRITE_WANTED, %eax
+ jnz rw_enter_sleep
+ leaq RW_READ_LOCK(%rax), %rdx /* rdx = new rw_wwwh value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to grab read lock */
+ jnz rw_enter_sleep
+.rw_read_enter_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_ENTER_ACQUIRE, %edi
+ movl $RW_READER, %edx
+ jmp lockstat_wrapper_arg
+.rw_write_enter:
+ movq %gs:CPU_THREAD, %rdx
+ orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */
+ xorl %eax, %eax /* rax = unheld value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to grab write lock */
+ jnz rw_enter_sleep
+
+#if defined(OPTERON_WORKAROUND_6323525)
+.rw_write_enter_lockstat_patch_point:
+.rw_write_enter_6323525_patch_point:
+ ret
+ nop
+ nop
+.rw_write_enter_lockstat_6323525_patch_point:
+ nop
+#else /* OPTERON_WORKAROUND_6323525 */
+.rw_write_enter_lockstat_patch_point:
+ ret
+#endif /* OPTERON_WORKAROUND_6323525 */
+
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_ENTER_ACQUIRE, %edi
+ movl $RW_WRITER, %edx
+ jmp lockstat_wrapper_arg
+ SET_SIZE(rw_enter)
+
+ ENTRY(rw_exit)
+ movq (%rdi), %rax /* rax = old rw_wwwh value */
+ cmpl $RW_READ_LOCK, %eax /* single-reader, no waiters? */
+ jne .rw_not_single_reader
+ xorl %edx, %edx /* rdx = new value (unheld) */
+.rw_read_exit:
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to drop read lock */
+ jnz rw_exit_wakeup
+.rw_read_exit_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi = lock ptr */
+ movl $LS_RW_EXIT_RELEASE, %edi
+ movl $RW_READER, %edx
+ jmp lockstat_wrapper_arg
+.rw_not_single_reader:
+ testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */
+ jnz .rw_write_exit
+ leaq -RW_READ_LOCK(%rax), %rdx /* rdx = new value */
+ cmpl $RW_READ_LOCK, %edx
+ jge .rw_read_exit /* not last reader, safe to drop */
+ jmp rw_exit_wakeup /* last reader with waiters */
+.rw_write_exit:
+ movq %gs:CPU_THREAD, %rax /* rax = thread ptr */
+ xorl %edx, %edx /* rdx = new value (unheld) */
+ orq $RW_WRITE_LOCKED, %rax /* eax = write-locked value */
+ lock
+ cmpxchgq %rdx, (%rdi) /* try to drop read lock */
+ jnz rw_exit_wakeup
+.rw_write_exit_lockstat_patch_point:
+ ret
+ movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */
+ movq %rdi, %rsi /* rsi - lock ptr */
+ movl $LS_RW_EXIT_RELEASE, %edi
+ movl $RW_WRITER, %edx
+ jmp lockstat_wrapper_arg
+ SET_SIZE(rw_exit)
+
+#if defined(OPTERON_WORKAROUND_6323525)
+
+/*
+ * If it is necessary to patch the lock enter routines with the lfence
+ * workaround, workaround_6323525_patched is set to a non-zero value so that
+ * the lockstat_hat_patch routine can patch to the new location of the 'ret'
+ * instruction.
+ */
+ DGDEF3(workaround_6323525_patched, 4, 4)
+ .long 0
+
+#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \
+ movq $size, %rbx; \
+ movq $dstaddr, %r13; \
+ addq %rbx, %r13; \
+ movq $srcaddr, %r12; \
+ addq %rbx, %r12; \
+0: \
+ decq %r13; \
+ decq %r12; \
+ movzbl (%r12), %esi; \
+ movq $1, %rdx; \
+ movq %r13, %rdi; \
+ call hot_patch_kernel_text; \
+ decq %rbx; \
+ testq %rbx, %rbx; \
+ jg 0b;
+
+/*
+ * patch_workaround_6323525: provide workaround for 6323525
+ *
+ * The workaround is to place a fencing instruction (lfence) between the
+ * mutex operation and the subsequent read-modify-write instruction.
+ *
+ * This routine hot patches the lfence instruction on top of the space
+ * reserved by nops in the lock enter routines.
+ */
+ ENTRY_NP(patch_workaround_6323525)
+ pushq %rbp
+ movq %rsp, %rbp
+ pushq %r12
+ pushq %r13
+ pushq %rbx
+
+ /*
+ * lockstat_hot_patch() to use the alternate lockstat workaround
+ * 6323525 patch points (points past the lfence instruction to the
+ * new ret) when workaround_6323525_patched is set.
+ */
+ movl $1, workaround_6323525_patched
+
+ /*
+ * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter
+ * routines. The 4 bytes are patched in reverse order so that the
+ * the existing ret is overwritten last. This provides lock enter
+ * sanity during the intermediate patching stages.
+ */
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4)
+ HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4)
+
+ popq %rbx
+ popq %r13
+ popq %r12
+ movq %rbp, %rsp
+ popq %rbp
+ ret
+_lfence_insn:
+ lfence
+ ret
+ SET_SIZE(patch_workaround_6323525)
+
+
+#endif /* OPTERON_WORKAROUND_6323525 */
+
+
+#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \
+ movq $normal_instr, %rsi; \
+ movq $active_instr, %rdi; \
+ leaq lockstat_probemap(%rip), %rax; \
+ movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \
+ testl %eax, %eax; \
+ jz 9f; \
+ movq %rdi, %rsi; \
+9: \
+ movq $len, %rdx; \
+ movq $addr, %rdi; \
+ call hot_patch_kernel_text
+
+ ENTRY(lockstat_hot_patch)
+ pushq %rbp /* align stack properly */
+ movq %rsp, %rbp
+
+#if defined(OPTERON_WORKAROUND_6323525)
+ cmpl $0, workaround_6323525_patched
+ je 1f
+ HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ jmp 2f
+1:
+ HOT_PATCH(.mutex_enter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+2:
+#else /* OPTERON_WORKAROUND_6323525 */
+ HOT_PATCH(.mutex_enter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.mutex_tryenter_lockstat_patch_point,
+ LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+#endif /* !OPTERON_WORKAROUND_6323525 */
+ HOT_PATCH(.mutex_exit_lockstat_patch_point,
+ LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_read_enter_lockstat_patch_point,
+ LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_write_exit_lockstat_patch_point,
+ LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.rw_read_exit_lockstat_patch_point,
+ LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_set_lockstat_patch_point,
+ LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_try_lockstat_patch_point,
+ LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_clear_lockstat_patch_point,
+ LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1)
+ HOT_PATCH(.lock_set_spl_lockstat_patch_point,
+ LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1)
+
+ HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT,
+ LS_LOCK_CLEAR_SPLX_RELEASE,
+ LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1);
+ leave /* unwind stack */
+ ret
+ SET_SIZE(lockstat_hot_patch)
+
+ ENTRY(membar_enter)
+ ALTENTRY(membar_exit)
+ ALTENTRY(membar_sync)
+ mfence /* lighter weight than lock; xorq $0,(%rsp) */
+ ret
+ SET_SIZE(membar_sync)
+ SET_SIZE(membar_exit)
+ SET_SIZE(membar_enter)
+
+ ENTRY(membar_producer)
+ sfence
+ ret
+ SET_SIZE(membar_producer)
+
+ ENTRY(membar_consumer)
+ lfence
+ ret
+ SET_SIZE(membar_consumer)
+
+/*
+ * thread_onproc()
+ * Set thread in onproc state for the specified CPU.
+ * Also set the thread lock pointer to the CPU's onproc lock.
+ * Since the new lock isn't held, the store ordering is important.
+ * If not done in assembler, the compiler could reorder the stores.
+ */
+
+ ENTRY(thread_onproc)
+ addq $CPU_THREAD_LOCK, %rsi /* pointer to disp_lock while running */
+ movl $ONPROC_THREAD, T_STATE(%rdi) /* set state to TS_ONPROC */
+ movq %rsi, T_LOCKP(%rdi) /* store new lock pointer */
+ ret
+ SET_SIZE(thread_onproc)
+
+/*
+ * mutex_delay_default(void)
+ * Spins for approx a few hundred processor cycles and returns to caller.
+ */
+
+ ENTRY(mutex_delay_default)
+ movq $92,%r11
+0: decq %r11
+ jg 0b
+ ret
+ SET_SIZE(mutex_delay_default)
+