/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * General assembly language routines.
 * It is the intent of this file to contain routines that are
 * independent of the specific kernel architecture, and those that are
 * common across kernel architectures.
 * As architectures diverge, and implementations of specific
 * architecture-dependent routines change, the routines should be moved
 * from this file into the respective ../`arch -k`/subr.s file.
 * Or, if you want to be really nice, move them to a file whose
 * name has something to do with the routine you are moving.
 */

#include <sys/asm_linkage.h>
#include <sys/privregs.h>
#include <sys/machparam.h>	/* To get SYSBASE and PAGESIZE */
#include <sys/machthread.h>
#include <sys/clock.h>
#include <sys/psr_compat.h>
#include <sys/isa_defs.h>
#include <sys/dditypes.h>
#include <sys/panic.h>
#include <sys/machlock.h>
#include <sys/ontrap.h>

#include "assym.h"

	.seg	".text"
	.align	4

/*
 * Macro to raise processor priority level.
 * Avoid dropping processor priority if already at high level.
 * Also avoid going below CPU->cpu_base_spl, which could've just been set by
 * a higher-level interrupt thread that just blocked.
 *
 * level can be %o0 (not other regs used here) or a constant.
 */
#define	RAISE(level) \
	rdpr	%pil, %o1;		/* get current PIL */		\
	cmp	%o1, level;		/* is PIL high enough? */	\
	bge	1f;			/* yes, return */		\
	nop;								\
	wrpr	%g0, PIL_MAX, %pil;	/* freeze CPU_BASE_SPL */	\
	ldn	[THREAD_REG + T_CPU], %o2;				\
	ld	[%o2 + CPU_BASE_SPL], %o2;				\
	cmp	%o2, level;		/* compare new to base */	\
	movl	%xcc, level, %o2;	/* use new if base lower */	\
	wrpr	%g0, %o2, %pil;						\
1:									\
	retl;								\
	mov	%o1, %o0		/* return old PIL */

/*
 * Macro to raise processor priority level to level >= DISP_LEVEL.
 * Doesn't require comparison to CPU->cpu_base_spl.
 *
 * newpil can be %o0 (not other regs used here) or a constant.
 */
#define	RAISE_HIGH(level) \
	rdpr	%pil, %o1;		/* get current PIL */		\
	cmp	%o1, level;		/* is PIL high enough? */	\
	bge	1f;			/* yes, return */		\
	nop;								\
	wrpr	%g0, level, %pil;	/* use chose value */		\
1:									\
	retl;								\
	mov	%o1, %o0		/* return old PIL */
	
/*
 * Macro to set the priority to a specified level.
 * Avoid dropping the priority below CPU->cpu_base_spl.
 *
 * newpil can be %o0 (not other regs used here) or a constant with
 * the new PIL in the PSR_PIL field of the level arg.
 */
#define SETPRI(level) \
	rdpr	%pil, %o1;		/* get current PIL */		\
	wrpr	%g0, PIL_MAX, %pil;	/* freeze CPU_BASE_SPL */	\
	ldn	[THREAD_REG + T_CPU], %o2;				\
	ld	[%o2 + CPU_BASE_SPL], %o2;				\
	cmp	%o2, level;		/* compare new to base */	\
	movl	%xcc, level, %o2;	/* use new if base lower */	\
	wrpr	%g0, %o2, %pil;						\
	retl;								\
	mov	%o1, %o0		/* return old PIL */

/*
 * Macro to set the priority to a specified level at or above LOCK_LEVEL.
 * Doesn't require comparison to CPU->cpu_base_spl.
 *
 * newpil can be %o0 (not other regs used here) or a constant with
 * the new PIL in the PSR_PIL field of the level arg.
 */
#define	SETPRI_HIGH(level) \
	rdpr	%pil, %o1;		/* get current PIL */		\
	wrpr	%g0, level, %pil;					\
	retl;								\
	mov	%o1, %o0		/* return old PIL */

	/*
	 * Berkley 4.3 introduced symbolically named interrupt levels
	 * as a way deal with priority in a machine independent fashion.
	 * Numbered priorities are machine specific, and should be
	 * discouraged where possible.
	 *
	 * Note, for the machine specific priorities there are
	 * examples listed for devices that use a particular priority.
	 * It should not be construed that all devices of that
	 * type should be at that priority.  It is currently were
	 * the current devices fit into the priority scheme based
	 * upon time criticalness.
	 *
	 * The underlying assumption of these assignments is that
	 * SPARC9 IPL 10 is the highest level from which a device
	 * routine can call wakeup.  Devices that interrupt from higher
	 * levels are restricted in what they can do.  If they need
	 * kernels services they should schedule a routine at a lower
	 * level (via software interrupt) to do the required
	 * processing.
	 *
	 * Examples of this higher usage:
	 *	Level	Usage
	 *	15	Asynchronous memory exceptions
	 *	14	Profiling clock (and PROM uart polling clock)
	 *	13	Audio device
	 *	12	Serial ports
	 *	11	Floppy controller
	 *
	 * The serial ports request lower level processing on level 6.
	 * Audio and floppy request lower level processing on level 4.
	 *
	 * Also, almost all splN routines (where N is a number or a
	 * mnemonic) will do a RAISE(), on the assumption that they are
	 * never used to lower our priority.
	 * The exceptions are:
	 *	spl8()		Because you can't be above 15 to begin with!
	 *	splzs()		Because this is used at boot time to lower our
	 *			priority, to allow the PROM to poll the uart.
	 *	spl0()		Used to lower priority to 0.
	 */

	/* locks out all interrupts, including memory errors */
	ENTRY(spl8)
	SETPRI_HIGH(15)
	SET_SIZE(spl8)

	/* just below the level that profiling runs */
	ENTRY(spl7)
	RAISE_HIGH(13)
	SET_SIZE(spl7)

	/* sun specific - highest priority onboard serial i/o zs ports */
	ENTRY(splzs)
	SETPRI_HIGH(12)	/* Can't be a RAISE, as it's used to lower us */
	SET_SIZE(splzs)

	/*
	 * should lock out clocks and all interrupts,
	 * as you can see, there are exceptions
	 */
	ENTRY(splhi)
	ALTENTRY(splhigh)
	ALTENTRY(spl6)
	ALTENTRY(i_ddi_splhigh)
	RAISE_HIGH(DISP_LEVEL)
	SET_SIZE(i_ddi_splhigh)
	SET_SIZE(spl6)
	SET_SIZE(splhigh)
	SET_SIZE(splhi)

	/* allow all interrupts */
	ENTRY(spl0)
	SETPRI(0)
	SET_SIZE(spl0)

/*
 * splx - set PIL back to that indicated by the old %pil passed as an argument,
 * or to the CPU's base priority, whichever is higher.
 */

	ENTRY(splx)
	ALTENTRY(i_ddi_splx)
	SETPRI(%o0)		/* set PIL */
	SET_SIZE(i_ddi_splx)
	SET_SIZE(splx)

/*
 * splr()
 *
 * splr is like splx but will only raise the priority and never drop it
 * Be careful not to set priority lower than CPU->cpu_base_pri,
 * even though it seems we're raising the priority, it could be set higher
 * at any time by an interrupt routine, so we must block interrupts and
 * look at CPU->cpu_base_pri.
 */

	ENTRY(splr)
	RAISE(%o0)
	SET_SIZE(splr)

/*
 * on_fault()
 * Catch lofault faults. Like setjmp except it returns one
 * if code following causes uncorrectable fault. Turned off
 * by calling no_fault().
 */

	ENTRY(on_fault)
	membar	#Sync			! sync error barrier (see copy.s)
	stn	%o0, [THREAD_REG + T_ONFAULT]
	set	catch_fault, %o1
	b	setjmp			! let setjmp do the rest
	stn	%o1, [THREAD_REG + T_LOFAULT]	! put catch_fault in t_lofault

catch_fault:
	save	%sp, -SA(WINDOWSIZE), %sp ! goto next window so that we can rtn
	ldn	[THREAD_REG + T_ONFAULT], %o0
	membar	#Sync				! sync error barrier
	stn	%g0, [THREAD_REG + T_ONFAULT]	! turn off onfault
	b	longjmp			! let longjmp do the rest
	stn	%g0, [THREAD_REG + T_LOFAULT]	! turn off lofault
	SET_SIZE(on_fault)

/*
 * no_fault()
 * turn off fault catching.
 */

	ENTRY(no_fault)
	membar	#Sync				! sync error barrier
	stn	%g0, [THREAD_REG + T_ONFAULT]
	retl
	stn	%g0, [THREAD_REG + T_LOFAULT]	! turn off lofault
	SET_SIZE(no_fault)

/*
 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  On sparcv9,
 * the trap code will complete trap processing but reset the return %pc to
 * ot_trampoline, which will by default be set to the address of this code.
 * We longjmp(&curthread->t_ontrap->ot_jmpbuf) to return back to on_trap().
 */

	ENTRY(on_trap_trampoline)
	ldn	[THREAD_REG + T_ONTRAP], %o0    
	b	longjmp                 
	add	%o0, OT_JMPBUF, %o0
	SET_SIZE(on_trap_trampoline)

/*
 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
 * more information about the on_trap() mechanism.  If the on_trap_data is the
 * same as the topmost stack element, we just modify that element.
 * On UltraSPARC, we need to issue a membar #Sync before modifying t_ontrap.
 * The issue barrier is defined to force all deferred errors to complete before
 * we go any further.  We want these errors to be processed before we modify
 * our current error protection.
 */

	ENTRY(on_trap)
	membar	#Sync				! force error barrier
	sth	%o1, [%o0 + OT_PROT]		! ot_prot = prot
	sth	%g0, [%o0 + OT_TRAP]		! ot_trap = 0
	set	on_trap_trampoline, %o2		! %o2 = &on_trap_trampoline
	stn	%o2, [%o0 + OT_TRAMPOLINE]	! ot_trampoline = %o2
	stn	%g0, [%o0 + OT_HANDLE]		! ot_handle = NULL
	ldn	[THREAD_REG + T_ONTRAP], %o2	! %o2 = curthread->t_ontrap
	cmp	%o0, %o2			! if (otp == %o2)
	be	0f				!    don't modify t_ontrap
	stn	%g0, [%o0 + OT_PAD1]		! delay - ot_pad1 = NULL

	stn	%o2, [%o0 + OT_PREV]		! ot_prev = t_ontrap
	membar	#Sync				! force error barrier
	stn	%o0, [THREAD_REG + T_ONTRAP]	! t_ontrap = otp

0:	b	setjmp				! let setjmp do the rest
	add	%o0, OT_JMPBUF, %o0		! %o0 = &ot_jmpbuf
	SET_SIZE(on_trap)

/*
 * Setjmp and longjmp implement non-local gotos using state vectors
 * type label_t.
 */

	ENTRY(setjmp)
	stn	%o7, [%o0 + L_PC]	! save return address
	stn	%sp, [%o0 + L_SP]	! save stack ptr
	retl
	clr	%o0			! return 0
	SET_SIZE(setjmp)


	ENTRY(longjmp)
	!
        ! The following save is required so that an extra register
        ! window is flushed.  Flushw flushes nwindows-2
        ! register windows.  If setjmp and longjmp are called from
        ! within the same window, that window will not get pushed
        ! out onto the stack without the extra save below.  Tail call
        ! optimization can lead to callers of longjmp executing
        ! from a window that could be the same as the setjmp,
        ! thus the need for the following save.
        !
	save    %sp, -SA(MINFRAME), %sp
	flushw				! flush all but this window
	ldn	[%i0 + L_PC], %i7	! restore return addr
	ldn	[%i0 + L_SP], %fp	! restore sp for dest on foreign stack
	ret				! return 1
	restore	%g0, 1, %o0		! takes underflow, switches stacks
	SET_SIZE(longjmp)

/*
 * movtuc(length, from, to, table)
 *
 * VAX movtuc instruction (sort of).
 */

	ENTRY(movtuc)
	tst     %o0
	ble,pn	%ncc, 2f		! check length
	clr     %o4

	ldub    [%o1 + %o4], %g1        ! get next byte in string
0:
	ldub    [%o3 + %g1], %g1        ! get corresponding table entry
	tst     %g1                     ! escape char?
	bnz     1f
	stb     %g1, [%o2 + %o4]        ! delay slot, store it

	retl                            ! return (bytes moved)
	mov     %o4, %o0
1:
	inc     %o4                     ! increment index
	cmp     %o4, %o0                ! index < length ?
	bl,a,pt	%ncc, 0b
	ldub    [%o1 + %o4], %g1        ! delay slot, get next byte in string
2:
	retl                            ! return (bytes moved)
	mov     %o4, %o0
	SET_SIZE(movtuc)

/*
 * scanc(length, string, table, mask)
 *
 * VAX scanc instruction.
 */

	ENTRY(scanc)
	tst	%o0	
	ble,pn	%ncc, 1f		! check length
	clr	%o4
0:
	ldub	[%o1 + %o4], %g1	! get next byte in string
	cmp	%o4, %o0		! interlock slot, index < length ?
	ldub	[%o2 + %g1], %g1	! get corresponding table entry
	bge,pn	%ncc, 1f		! interlock slot
	btst	%o3, %g1		! apply the mask
	bz,a	0b
	inc	%o4			! delay slot, increment index
1:
	retl				! return(length - index)
	sub	%o0, %o4, %o0
	SET_SIZE(scanc)

/*
 * if a() calls b() calls caller(),
 * caller() returns return address in a().
 */

	ENTRY(caller)
	retl
	mov	%i7, %o0
	SET_SIZE(caller)

/*
 * if a() calls callee(), callee() returns the
 * return address in a();
 */

	ENTRY(callee)
	retl
	mov	%o7, %o0
	SET_SIZE(callee)

/*
 * return the current frame pointer
 */

	ENTRY(getfp)
	retl
	mov	%fp, %o0
	SET_SIZE(getfp)

/*
 * Get vector base register
 */

	ENTRY(gettbr)
	retl
	mov     %tbr, %o0
	SET_SIZE(gettbr)

/*
 * Get processor state register, V9 faked to look like V8.
 * Note: does not provide ccr.xcc and provides FPRS.FEF instead of
 * PSTATE.PEF, because PSTATE.PEF is always on in order to allow the
 * libc_psr memcpy routines to run without hitting the fp_disabled trap.
 */

	ENTRY(getpsr)
	rd	%ccr, %o1			! get ccr
        sll	%o1, PSR_ICC_SHIFT, %o0		! move icc to V8 psr.icc
	rd	%fprs, %o1			! get fprs
	and	%o1, FPRS_FEF, %o1		! mask out dirty upper/lower
	sllx	%o1, PSR_FPRS_FEF_SHIFT, %o1	! shift fef to V8 psr.ef
        or	%o0, %o1, %o0			! or into psr.ef
        set	V9_PSR_IMPLVER, %o1		! SI assigned impl/ver: 0xef
        retl
        or	%o0, %o1, %o0			! or into psr.impl/ver
	SET_SIZE(getpsr)

/*
 * Get current processor interrupt level
 */

	ENTRY(getpil)
	retl
	rdpr	%pil, %o0
	SET_SIZE(getpil)

	ENTRY(setpil)
	retl
	wrpr	%g0, %o0, %pil
	SET_SIZE(setpil)


/*
 * _insque(entryp, predp)
 *
 * Insert entryp after predp in a doubly linked list.
 */

	ENTRY(_insque)
	ldn	[%o1], %g1		! predp->forw
	stn	%o1, [%o0 + CPTRSIZE]	! entryp->back = predp
	stn	%g1, [%o0]		! entryp->forw = predp->forw
	stn	%o0, [%o1]		! predp->forw = entryp
	retl
	stn	%o0, [%g1 + CPTRSIZE]	! predp->forw->back = entryp
	SET_SIZE(_insque)

/*
 * _remque(entryp)
 *
 * Remove entryp from a doubly linked list
 */

	ENTRY(_remque)
	ldn	[%o0], %g1		! entryp->forw
	ldn	[%o0 + CPTRSIZE], %g2	! entryp->back
	stn	%g1, [%g2]		! entryp->back->forw = entryp->forw
	retl
	stn	%g2, [%g1 + CPTRSIZE]	! entryp->forw->back = entryp->back
	SET_SIZE(_remque)


/*
 * strlen(str)
 *
 * Returns the number of non-NULL bytes in string argument.
 *
 * XXX -  why is this here, rather than the traditional file?
 *	  why does it have local labels which don't start with a `.'?
 */

	ENTRY(strlen)
	mov	%o0, %o1
	andcc	%o1, 3, %o3		! is src word aligned
	bz	$nowalgnd
	clr	%o0			! length of non-zero bytes
	cmp	%o3, 2			! is src half-word aligned
	be	$s2algn
	cmp	%o3, 3			! src is byte aligned
	ldub	[%o1], %o3		! move 1 or 3 bytes to align it
	inc	1, %o1			! in either case, safe to do a byte
	be	$s3algn
	tst	%o3
$s1algn:
	bnz,a	$s2algn			! now go align dest
	inc	1, %o0
	b,a	$done

$s2algn:
	lduh	[%o1], %o3		! know src is half-byte aligned
	inc	2, %o1
	srl	%o3, 8, %o4
	tst	%o4			! is the first byte zero
	bnz,a	1f
	inc	%o0
	b,a	$done
1:	andcc	%o3, 0xff, %o3		! is the second byte zero
	bnz,a	$nowalgnd
	inc	%o0
	b,a	$done
$s3algn:
	bnz,a	$nowalgnd
	inc	1, %o0
	b,a	$done

$nowalgnd:
	! use trick to check if any read bytes of a word are zero
	! the following two constants will generate "byte carries"
	! and check if any bit in a byte is set, if all characters
	! are 7bits (unsigned) this allways works, otherwise
	! there is a specil case that rarely happens, see below

	set	0x7efefeff, %o3
	set	0x81010100, %o4

3:	ld	[%o1], %o2		! main loop
	inc	4, %o1
	add	%o2, %o3, %o5		! generate byte-carries
	xor	%o5, %o2, %o5		! see if orignal bits set
	and	%o5, %o4, %o5
	cmp	%o5, %o4		! if ==,  no zero bytes
	be,a	3b
	inc	4, %o0

	! check for the zero byte and increment the count appropriately
	! some information (the carry bit) is lost if bit 31
	! was set (very rare), if this is the rare condition,
	! return to the main loop again

	sethi	%hi(0xff000000), %o5	! mask used to test for terminator
	andcc	%o2, %o5, %g0		! check if first byte was zero
	bnz	1f
	srl	%o5, 8, %o5
$done:
	retl
	nop
1:	andcc	%o2, %o5, %g0		! check if second byte was zero
	bnz	1f
	srl	%o5, 8, %o5
$done1:
	retl
	inc	%o0
1:	andcc 	%o2, %o5, %g0		! check if third byte was zero
	bnz	1f
	andcc	%o2, 0xff, %g0		! check if last byte is zero
$done2:
	retl
	inc	2, %o0
1:	bnz,a	3b
	inc	4, %o0			! count of bytes
$done3:
	retl
	inc	3, %o0
	SET_SIZE(strlen)

/*
 * Provide a C callable interface to the membar instruction.
 */

	ENTRY(membar_ldld)
	retl
	membar	#LoadLoad
	SET_SIZE(membar_ldld)

	ENTRY(membar_stld)
	retl
	membar	#StoreLoad
	SET_SIZE(membar_stld)

	ENTRY(membar_ldst)
	retl
	membar	#LoadStore
	SET_SIZE(membar_ldst)

	ENTRY(membar_stst)
	retl
	membar	#StoreStore
	SET_SIZE(membar_stst)

	ENTRY(membar_ldld_stld)
	ALTENTRY(membar_stld_ldld)
	retl
	membar	#LoadLoad|#StoreLoad
	SET_SIZE(membar_stld_ldld)
	SET_SIZE(membar_ldld_stld)

	ENTRY(membar_ldld_ldst)
	ALTENTRY(membar_ldst_ldld)
	retl
	membar	#LoadLoad|#LoadStore
	SET_SIZE(membar_ldst_ldld)
	SET_SIZE(membar_ldld_ldst)

	ENTRY(membar_ldld_stst)
	ALTENTRY(membar_stst_ldld)
	retl
	membar	#LoadLoad|#StoreStore
	SET_SIZE(membar_stst_ldld)
	SET_SIZE(membar_ldld_stst)

	ENTRY(membar_stld_ldst)
	ALTENTRY(membar_ldst_stld)
	retl
	membar	#StoreLoad|#LoadStore
	SET_SIZE(membar_ldst_stld)
	SET_SIZE(membar_stld_ldst)

	ENTRY(membar_stld_stst)
	ALTENTRY(membar_stst_stld)
	retl
	membar	#StoreLoad|#StoreStore
	SET_SIZE(membar_stst_stld)
	SET_SIZE(membar_stld_stst)

	ENTRY(membar_ldst_stst)
	ALTENTRY(membar_stst_ldst)
	retl
	membar	#LoadStore|#StoreStore
	SET_SIZE(membar_stst_ldst)
	SET_SIZE(membar_ldst_stst)

	ENTRY(membar_lookaside)
	retl
	membar	#Lookaside
	SET_SIZE(membar_lookaside)

	ENTRY(membar_memissue)
	retl
	membar	#MemIssue
	SET_SIZE(membar_memissue)

	ENTRY(membar_sync)
	retl
	membar	#Sync
	SET_SIZE(membar_sync)


/*
 * Since all of the fuword() variants are so similar, we have a macro to spit
 * them out.
 */

#define	FUWORD(NAME, LOAD, STORE, COPYOP)	\
	ENTRY(NAME);				\
	sethi	%hi(1f), %o5;			\
	ldn	[THREAD_REG + T_LOFAULT], %o3;	\
	or	%o5, %lo(1f), %o5;		\
	membar	#Sync;				\
	stn	%o5, [THREAD_REG + T_LOFAULT];	\
	LOAD	[%o0]ASI_USER, %o2;		\
	membar	#Sync;				\
	stn	%o3, [THREAD_REG + T_LOFAULT];	\
	mov	0, %o0;				\
	retl;					\
	STORE	%o2, [%o1];			\
1:						\
	membar	#Sync;				\
	stn	%o3, [THREAD_REG + T_LOFAULT];	\
	ldn	[THREAD_REG + T_COPYOPS], %o2;	\
	brz	%o2, 2f;			\
	nop;					\
	ldn	[%o2 + COPYOP], %g1;		\
	jmp	%g1;				\
	nop;					\
2:						\
	retl;					\
	mov	-1, %o0;			\
	SET_SIZE(NAME)

	FUWORD(fuword64, ldxa, stx, CP_FUWORD64)
	FUWORD(fuword32, lda, st, CP_FUWORD32)
	FUWORD(fuword16, lduha, sth, CP_FUWORD16)
	FUWORD(fuword8, lduba, stb, CP_FUWORD8)


/*
 * Since all of the suword() variants are so similar, we have a macro to spit
 * them out.
 */

#define	SUWORD(NAME, STORE, COPYOP)		\
	ENTRY(NAME)				\
	sethi	%hi(1f), %o5;			\
	ldn	[THREAD_REG + T_LOFAULT], %o3;	\
	or	%o5, %lo(1f), %o5;		\
	membar	#Sync;				\
	stn	%o5, [THREAD_REG + T_LOFAULT];	\
	STORE	%o1, [%o0]ASI_USER;		\
	membar	#Sync;				\
	stn	%o3, [THREAD_REG + T_LOFAULT];	\
	retl;					\
	clr	%o0;				\
1:						\
	membar	#Sync;				\
	stn	%o3, [THREAD_REG + T_LOFAULT];	\
	ldn	[THREAD_REG + T_COPYOPS], %o2;	\
	brz	%o2, 2f;			\
	nop;					\
	ldn	[%o2 + COPYOP], %g1;		\
	jmp	%g1;				\
	nop;					\
2:						\
	retl;					\
	mov	-1, %o0;			\
	SET_SIZE(NAME)

	SUWORD(suword64, stxa, CP_SUWORD64)
	SUWORD(suword32, sta, CP_SUWORD32)
	SUWORD(suword16, stha, CP_SUWORD16)
	SUWORD(suword8, stba, CP_SUWORD8)

	ENTRY(fuword8_noerr)
	lduba	[%o0]ASI_USER, %o0	
	retl
	stb	%o0, [%o1]
	SET_SIZE(fuword8_noerr)

	ENTRY(fuword16_noerr)
	lduha	[%o0]ASI_USER, %o0
	retl
	sth	%o0, [%o1]
	SET_SIZE(fuword16_noerr)

	ENTRY(fuword32_noerr)
	lda	[%o0]ASI_USER, %o0
	retl
	st	%o0, [%o1]
	SET_SIZE(fuword32_noerr)

	ENTRY(fuword64_noerr)
	ldxa	[%o0]ASI_USER, %o0
	retl
	stx	%o0, [%o1]
	SET_SIZE(fuword64_noerr)

	ENTRY(suword8_noerr)
	retl
	stba	%o1, [%o0]ASI_USER
	SET_SIZE(suword8_noerr)

	ENTRY(suword16_noerr)
	retl
	stha	%o1, [%o0]ASI_USER
	SET_SIZE(suword16_noerr)

	ENTRY(suword32_noerr)
	retl
	sta	%o1, [%o0]ASI_USER
	SET_SIZE(suword32_noerr)

	ENTRY(suword64_noerr)
	retl
	stxa	%o1, [%o0]ASI_USER
	SET_SIZE(suword64_noerr)

	.weak	subyte
	subyte=suword8
	.weak	subyte_noerr
	subyte_noerr=suword8_noerr
#ifdef _LP64
	.weak	fulword
	fulword=fuword64
	.weak	fulword_noerr
	fulword_noerr=fuword64_noerr
	.weak	sulword
	sulword=suword64
	.weak	sulword_noerr
	sulword_noerr=suword64_noerr
#else
	.weak	fulword
	fulword=fuword32
	.weak	fulword_noerr
	fulword_noerr=fuword32_noerr
	.weak	sulword
	sulword=suword32
	.weak	sulword_noerr
	sulword_noerr=suword32_noerr
#endif	/* LP64 */

/*
 * We define rdtick here, but not for sun4v. On sun4v systems, the %tick
 * and %stick should not be read directly without considering the tick
 * and stick offset kernel variables introduced to support sun4v OS
 * suspension.
 */
#if !defined (sun4v)

	ENTRY(rdtick)
	retl
	rd	%tick, %o0
        SET_SIZE(rdtick)

#endif /* !sun4v */

/*
 * Set tba to given address, no side effects.
 */

	ENTRY(set_tba)
	mov	%o0, %o1
	rdpr	%tba, %o0
	wrpr	%o1, %tba
	retl
	nop
	SET_SIZE(set_tba)

	ENTRY(get_tba)
	retl
	rdpr	%tba, %o0
	SET_SIZE(get_tba)

	ENTRY_NP(setpstate)
	retl
	wrpr	%g0, %o0, %pstate
	SET_SIZE(setpstate)

	ENTRY_NP(getpstate)
	retl
	rdpr	%pstate, %o0
	SET_SIZE(getpstate)

	ENTRY_NP(dtrace_interrupt_disable)
	rdpr	%pstate, %o0
	andn	%o0, PSTATE_IE, %o1
	retl
	wrpr	%g0, %o1, %pstate
	SET_SIZE(dtrace_interrupt_disable)

	ENTRY_NP(dtrace_interrupt_enable)
	retl
	wrpr	%g0, %o0, %pstate 
	SET_SIZE(dtrace_interrupt_enable)

#ifdef SF_ERRATA_51
	.align 32
	ENTRY(dtrace_membar_return)
	retl
	nop
	SET_SIZE(dtrace_membar_return)
#define	DTRACE_MEMBAR_RETURN	ba,pt %icc, dtrace_membar_return
#else
#define	DTRACE_MEMBAR_RETURN	retl
#endif

	ENTRY(dtrace_membar_producer)
	DTRACE_MEMBAR_RETURN
	membar	#StoreStore
	SET_SIZE(dtrace_membar_producer)

	ENTRY(dtrace_membar_consumer)
	DTRACE_MEMBAR_RETURN
	membar	#LoadLoad
	SET_SIZE(dtrace_membar_consumer)

	ENTRY_NP(dtrace_flush_windows)
	retl
	flushw
	SET_SIZE(dtrace_flush_windows)

	/*
	 * %g1	pcstack
	 * %g2	iteration count
	 * %g3	final %fp
	 * %g4	final %i7
	 * %g5	saved %cwp (so we can get back to the original window)
	 *
	 * %o0	pcstack / return value (iteration count)
	 * %o1	limit / saved %cansave
	 * %o2	lastfp
	 * %o3	lastpc
	 * %o4	saved %canrestore
	 * %o5	saved %pstate (to restore interrupts)
	 *
	 * Note:  The frame pointer returned via lastfp is safe to use as
	 *	long as getpcstack_top() returns either (0) or a value less
	 *	than (limit).
	 */
	ENTRY_NP(getpcstack_top)

	rdpr	%pstate, %o5
	andn	%o5, PSTATE_IE, %g1
	wrpr	%g0, %g1, %pstate	! disable interrupts

	mov	%o0, %g1		! we need the pcstack pointer while
					! we're visiting other windows

	rdpr	%canrestore, %g2	! number of available windows
	sub	%g2, 1, %g2		! account for skipped frame
	cmp	%g2, %o1		! compare with limit
	movg	%icc, %o1, %g2		! %g2 = min(%canrestore-1, limit)

	brlez,a,pn %g2, 3f		! Use slow path if count <= 0 --
	clr	%o0			! return zero.

	mov	%g2, %o0		! set up return value

	rdpr	%cwp, %g5		! remember the register window state
	rdpr	%cansave, %o1		! 'restore' changes, so we can undo
	rdpr	%canrestore, %o4	! its effects when we finish.

	restore				! skip caller's frame
1:
	st	%i7, [%g1]		! stash return address in pcstack
	restore				! go to the next frame
	subcc	%g2, 1, %g2		! decrement the count
	bnz,pt	%icc, 1b		! loop until count reaches 0
	add	%g1, 4, %g1		! increment pcstack

	mov	%i6, %g3		! copy the final %fp and return PC
	mov	%i7, %g4		! aside so we can return them to our
					! caller

	wrpr	%g0, %g5, %cwp		! jump back to the original window
	wrpr	%g0, %o1, %cansave	! and restore the original register
	wrpr	%g0, %o4, %canrestore	! window state.
2:
	stn	%g3, [%o2]		! store the frame pointer and pc
	st	%g4, [%o3]		! so our caller can continue the trace

	retl				! return to caller
	wrpr	%g0, %o5, %pstate	! restore interrupts

3:
	flushw				! flush register windows, then
	ldn	[%fp + STACK_BIAS + 14*CLONGSIZE], %g3	! load initial fp
	ba	2b
	ldn	[%fp + STACK_BIAS + 15*CLONGSIZE], %g4	! and pc
	SET_SIZE(getpcstack_top)

	ENTRY_NP(setwstate)
	retl
	wrpr	%g0, %o0, %wstate
	SET_SIZE(setwstate)


	ENTRY_NP(getwstate)
	retl
	rdpr	%wstate, %o0
	SET_SIZE(getwstate)


/*
 * int panic_trigger(int *tp)
 *
 * A panic trigger is a word which is updated atomically and can only be set
 * once.  We atomically store 0xFF into the high byte and load the old value.
 * If the byte was 0xFF, the trigger has already been activated and we fail.
 * If the previous value was 0 or not 0xFF, we succeed.  This allows a
 * partially corrupt trigger to still trigger correctly.  DTrace has its own
 * version of this function to allow it to panic correctly from probe context.
 */

	ENTRY_NP(panic_trigger)
	ldstub	[%o0], %o0		! store 0xFF, load byte into %o0
	cmp	%o0, 0xFF		! compare %o0 to 0xFF
	set	1, %o1			! %o1 = 1
	be,a	0f			! if (%o0 == 0xFF) goto 0f (else annul)
	set	0, %o1			! delay - %o1 = 0
0:	retl
	mov	%o1, %o0		! return (%o1);
	SET_SIZE(panic_trigger)

	ENTRY_NP(dtrace_panic_trigger)
	ldstub	[%o0], %o0		! store 0xFF, load byte into %o0
	cmp	%o0, 0xFF		! compare %o0 to 0xFF
	set	1, %o1			! %o1 = 1
	be,a	0f			! if (%o0 == 0xFF) goto 0f (else annul)
	set	0, %o1			! delay - %o1 = 0
0:	retl
	mov	%o1, %o0		! return (%o1);
	SET_SIZE(dtrace_panic_trigger)

/*
 * void vpanic(const char *format, va_list alist)
 *
 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
 * into the panic code implemented in panicsys().  vpanic() is responsible
 * for passing through the format string and arguments, and constructing a
 * regs structure on the stack into which it saves the current register
 * values.  If we are not dying due to a fatal trap, these registers will
 * then be preserved in panicbuf as the current processor state.  Before
 * invoking panicsys(), vpanic() activates the first panic trigger (see
 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
 * DTrace takes a slightly different panic path if it must panic from probe
 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
 * branches back into vpanic().
 */

	ENTRY_NP(vpanic)

	save	%sp, -SA(MINFRAME + REGSIZE), %sp	! save and allocate regs

	!
	! The v9 struct regs has a 64-bit r_tstate field, which we use here
	! to store the %ccr, %asi, %pstate, and %cwp as they would appear
	! in %tstate if a trap occurred.  We leave it up to the debugger to
	! realize what happened and extract the register values.
	!
	rd	%ccr, %l0				! %l0 = %ccr
	sllx	%l0, TSTATE_CCR_SHIFT, %l0		! %l0 <<= CCR_SHIFT
	rd	%asi, %l1				! %l1 = %asi
	sllx	%l1, TSTATE_ASI_SHIFT, %l1		! %l1 <<= ASI_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1
	rdpr	%pstate, %l1				! %l1 = %pstate
	sllx	%l1, TSTATE_PSTATE_SHIFT, %l1		! %l1 <<= PSTATE_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1
	rdpr	%cwp, %l1				! %l1 = %cwp
	sllx	%l1, TSTATE_CWP_SHIFT, %l1		! %l1 <<= CWP_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1

	set	vpanic, %l1				! %l1 = %pc (vpanic)
	add	%l1, 4, %l2				! %l2 = %npc (vpanic+4)
	rd	%y, %l3					! %l3 = %y
	!
	! Flush register windows before panic_trigger() in order to avoid a
	! problem that a dump hangs if flush_windows() causes another panic.
	!
	call	flush_windows
	nop

	sethi	%hi(panic_quiesce), %o0
	call	panic_trigger
	or	%o0, %lo(panic_quiesce), %o0		! if (!panic_trigger(

vpanic_common:
	tst	%o0					!     &panic_quiesce))
	be	0f					!   goto 0f;
	mov	%o0, %l4				!   delay - %l4 = %o0

	!
	! If panic_trigger() was successful, we are the first to initiate a
	! panic: switch to the panic_stack.
	!
	set	panic_stack, %o0			! %o0 = panic_stack
	set	PANICSTKSIZE, %o1			! %o1 = size of stack
	add	%o0, %o1, %o0				! %o0 = top of stack

	sub	%o0, SA(MINFRAME + REGSIZE) + STACK_BIAS, %sp

	!
	! Now that we've got everything set up, store each register to its
	! designated location in the regs structure allocated on the stack.
	! The register set we store is the equivalent of the registers at
	! the time the %pc was pointing to vpanic, thus the %i's now contain
	! what the %o's contained prior to the save instruction.
	!
0:	stx	%l0, [%sp + STACK_BIAS + SA(MINFRAME) + TSTATE_OFF]
	stx	%g1, [%sp + STACK_BIAS + SA(MINFRAME) + G1_OFF]
	stx	%g2, [%sp + STACK_BIAS + SA(MINFRAME) + G2_OFF]
	stx	%g3, [%sp + STACK_BIAS + SA(MINFRAME) + G3_OFF]
	stx	%g4, [%sp + STACK_BIAS + SA(MINFRAME) + G4_OFF]
	stx	%g5, [%sp + STACK_BIAS + SA(MINFRAME) + G5_OFF]
	stx	%g6, [%sp + STACK_BIAS + SA(MINFRAME) + G6_OFF]
	stx	%g7, [%sp + STACK_BIAS + SA(MINFRAME) + G7_OFF]
	stx	%i0, [%sp + STACK_BIAS + SA(MINFRAME) + O0_OFF]
	stx	%i1, [%sp + STACK_BIAS + SA(MINFRAME) + O1_OFF]
	stx	%i2, [%sp + STACK_BIAS + SA(MINFRAME) + O2_OFF]
	stx	%i3, [%sp + STACK_BIAS + SA(MINFRAME) + O3_OFF]
	stx	%i4, [%sp + STACK_BIAS + SA(MINFRAME) + O4_OFF]
	stx	%i5, [%sp + STACK_BIAS + SA(MINFRAME) + O5_OFF]
	stx	%i6, [%sp + STACK_BIAS + SA(MINFRAME) + O6_OFF]
	stx	%i7, [%sp + STACK_BIAS + SA(MINFRAME) + O7_OFF]
	stn	%l1, [%sp + STACK_BIAS + SA(MINFRAME) + PC_OFF]
	stn	%l2, [%sp + STACK_BIAS + SA(MINFRAME) + NPC_OFF]
	st	%l3, [%sp + STACK_BIAS + SA(MINFRAME) + Y_OFF]

	mov	%l4, %o3				! %o3 = on_panic_stack
	add	%sp, STACK_BIAS + SA(MINFRAME), %o2	! %o2 = &regs
	mov	%i1, %o1				! %o1 = alist
	call	panicsys				! panicsys();
	mov	%i0, %o0				! %o0 = format
	ret
	restore

	SET_SIZE(vpanic)

	ENTRY_NP(dtrace_vpanic)

	save	%sp, -SA(MINFRAME + REGSIZE), %sp	! save and allocate regs

	!
	! The v9 struct regs has a 64-bit r_tstate field, which we use here
	! to store the %ccr, %asi, %pstate, and %cwp as they would appear
	! in %tstate if a trap occurred.  We leave it up to the debugger to
	! realize what happened and extract the register values.
	!
	rd	%ccr, %l0				! %l0 = %ccr
	sllx	%l0, TSTATE_CCR_SHIFT, %l0		! %l0 <<= CCR_SHIFT
	rd	%asi, %l1				! %l1 = %asi
	sllx	%l1, TSTATE_ASI_SHIFT, %l1		! %l1 <<= ASI_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1
	rdpr	%pstate, %l1				! %l1 = %pstate
	sllx	%l1, TSTATE_PSTATE_SHIFT, %l1		! %l1 <<= PSTATE_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1
	rdpr	%cwp, %l1				! %l1 = %cwp
	sllx	%l1, TSTATE_CWP_SHIFT, %l1		! %l1 <<= CWP_SHIFT
	or	%l0, %l1, %l0				! %l0 |= %l1

	set	dtrace_vpanic, %l1			! %l1 = %pc (vpanic)
	add	%l1, 4, %l2				! %l2 = %npc (vpanic+4)
	rd	%y, %l3					! %l3 = %y
	!
	! Flush register windows before panic_trigger() in order to avoid a
	! problem that a dump hangs if flush_windows() causes another panic.
	!
	call	dtrace_flush_windows
	nop

	sethi	%hi(panic_quiesce), %o0
	call	dtrace_panic_trigger
	or	%o0, %lo(panic_quiesce), %o0		! if (!panic_trigger(

	ba,a	vpanic_common
	SET_SIZE(dtrace_vpanic)
	
	ENTRY(get_subcc_ccr)
	wr	%g0, %ccr	! clear condition codes
	subcc	%o0, %o1, %g0
	retl
	rd	%ccr, %o0	! return condition codes
	SET_SIZE(get_subcc_ccr)

	ENTRY_NP(ftrace_interrupt_disable)
	rdpr	%pstate, %o0
	andn	%o0, PSTATE_IE, %o1
	retl
	wrpr	%g0, %o1, %pstate
	SET_SIZE(ftrace_interrupt_disable)

	ENTRY_NP(ftrace_interrupt_enable)
	retl
	wrpr	%g0, %o0, %pstate 
	SET_SIZE(ftrace_interrupt_enable)