summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4u/sys/fpras_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/sun4u/sys/fpras_impl.h')
-rw-r--r--usr/src/uts/sun4u/sys/fpras_impl.h336
1 files changed, 336 insertions, 0 deletions
diff --git a/usr/src/uts/sun4u/sys/fpras_impl.h b/usr/src/uts/sun4u/sys/fpras_impl.h
new file mode 100644
index 0000000000..2de8188451
--- /dev/null
+++ b/usr/src/uts/sun4u/sys/fpras_impl.h
@@ -0,0 +1,336 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_FPRAS_IMPL_H
+#define _SYS_FPRAS_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/fpras.h>
+
+#if !defined(_ASM)
+#include <sys/types.h>
+#else
+#include <sys/intreg.h>
+#include <sys/errno.h>
+#endif /* _ASM */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * sun4u/cheetah fpRAS implementation. Arrays etc will be allocated in sun4u
+ * post_startup() if fpras_implemented is set. This file may belong at
+ * the cpu level (eg, cheetahregs.h) but most of it should be common
+ * when fpRAS support is added for additional cpu types so we introduce
+ * it at the sun4u level (and set fpras_implemented in cpu_setup).
+ *
+ * If fpRAS is implemented on a sun4u/cpu combination that does not use
+ * an ASR for %stick then the FPRAS_INTERVAL macro will need some
+ * modification.
+ */
+
+/*
+ * Upper bound for check frequency per cpu and per operation. For example, if
+ * this is 100 then for cpuid N performing a bcopy if that cpu has not
+ * performed a checked bcopy in the the last 1/100th of a second then
+ * we'll check the current operation. A value of 0 will check every operation.
+ * Modifying fpras_frequency from its default is not recommended.
+ * fpras_interval is computed from fpras_frequency.
+ */
+#if !defined(_ASM)
+extern int fpras_frequency;
+extern int64_t fpras_interval;
+#endif /* _ASM */
+#define FPRAS_DEFAULT_FREQUENCY 100
+
+#if !defined(_ASM)
+
+/*
+ * Structure of a check function. The preamble prepares registers for the
+ * upcoming calculation that is performed in blk0 and blk1. One of those
+ * blocks will be rewritten as part of an FPRAS_REWRITE operation. Finally
+ * the result checked in chkresult should be as predetermined, and we should
+ * return zero on success and nonzero on failure. If an illegal instruction
+ * is encountered in the execution of the check function then we trampoline
+ * to the final three instructions to return a different value.
+ *
+ * Note that the size of this structure is a power of 2 as is the
+ * size of a struct fpras_chkfngrp. The asm macros below rely on this
+ * in performing bit shifts instead of mulx.
+ */
+struct fpras_chkfn {
+ uint32_t fpras_preamble[16];
+ uint32_t fpras_blk0[16];
+ uint32_t fpras_blk1[16];
+ uint32_t fpras_chkresult[13];
+ uint32_t fpras_trampoline[3];
+};
+
+/*
+ * Check function constructed to match a struct fpras_chkfn
+ */
+extern int fpras_chkfn_type1(void);
+
+/*
+ * A group of check functions, one for each operation type. These will
+ * be the check functions for copy operations on a particular processor.
+ */
+struct fpras_chkfngrp {
+ struct fpras_chkfn fpras_fn[FPRAS_NCOPYOPS];
+};
+
+/*
+ * Where we store check functions for execution. Indexed by cpuid and
+ * function within that for cacheline friendliness. Startup code
+ * copies the check function into this array. The fpRAS mechanism will
+ * rewrite one of fpras_blk0 or fpras_blk1 before calling the check function
+ * for a cpuid & copy function combination.
+ */
+extern struct fpras_chkfngrp *fpras_chkfngrps;
+
+#endif /* !_ASM */
+
+#if defined(_ASM)
+
+/* BEGIN CSTYLED */
+
+/*
+ * The INTERVAL macro decides whether we will check this copy operation,
+ * based on performing no more than 1 check per cpu & operation in a specified
+ * time interval. If it decides to abort this check (ie, we have checked
+ * recently) then it returns doex NULL, otherwise doex is the address of the
+ * check function to execute later. Migration must have been prevented before
+ * calling this macro. Args:
+ *
+ * operation (immediate): one of FPRAS_BCOPY etc
+ * blk (immediate): which block to copy
+ * doex (register): register in which to return check function address
+ * tmp1 (register): used for scratch, not preserved
+ * tmp2 (register): used for scratch, not preserved
+ * tmp3 (register): used for scratch, not preserved
+ * tmp4 (register): used for scratch, not preserved
+ * label: free local numeric label
+ */
+
+#define FPRAS_INTERVAL(operation, blk, doex, tmp1, tmp2, tmp3, tmp4, label) \
+ sethi %hi(fpras_interval), tmp1 ;\
+ ldx [tmp1 + %lo(fpras_interval)], tmp1 ;\
+ brlz,pn tmp1, label/**/f /* not initialized? */ ;\
+ clr doex ;\
+ sethi %hi(fpras_disableids), tmp2 ;\
+ ld [tmp2 + %lo(fpras_disableids)], tmp2 ;\
+ mov 0x1, tmp3 ;\
+ sll tmp3, operation, tmp3 ;\
+ btst tmp3, tmp2 ;\
+ bnz,a,pn %icc, label/**/f /* disabled for this op? */ ;\
+ nop ;\
+ set fpras_chkfn_type1, tmp2 ;\
+ prefetch [tmp2 + (FPRAS_BLK0 + blk * 64)], #one_read ;\
+ ldn [THREAD_REG + T_CPU], tmp2 ;\
+ ldn [tmp2 + CPU_PRIVATE], tmp2 ;\
+ brz,pn tmp2, label/**/f /* early in startup? */ ;\
+ mov operation, tmp3 ;\
+ sll tmp3, 3, tmp3 ;\
+ set CHPR_FPRAS_TIMESTAMP, tmp4 ;\
+ add tmp2, tmp4, tmp2 ;\
+ add tmp2, tmp3, tmp2 /* keep ptr for update */ ;\
+ ldx [tmp2], tmp3 /* last timestamp */ ;\
+ rd STICK, doex /* doex is a scratch here */ ;\
+ sub doex, tmp3, tmp4 /* delta since last check */ ;\
+ cmp tmp4, tmp1 /* compare delta to interval */ ;\
+ blu,a,pn %xcc, label/**/f ;\
+ clr doex ;\
+ stx doex, [tmp2] /* updated timestamp */ ;\
+ ldn [THREAD_REG + T_CPU], tmp1 ;\
+ ld [tmp1 + CPU_ID], tmp1 ;\
+ sethi %hi(fpras_chkfngrps), doex ;\
+ ldn [doex + %lo(fpras_chkfngrps)], doex ;\
+ sll tmp1, FPRAS_CHKFNGRP_SIZE_SHIFT, tmp1 ;\
+ add doex, tmp1, doex ;\
+ mov operation, tmp1 ;\
+ sll tmp1, FPRAS_CHKFN_SIZE_SHIFT, tmp1 ;\
+ add doex, tmp1, doex /* address of check function */ ;\
+label:
+
+/*
+ * The REWRITE macro copies an instruction block from fpras_chkfn_type1
+ * into a per-cpu fpras check function.
+ * If doex is NULL it must not attempt any copy, and must leave doex NULL.
+ * CPU migration of this thread must be prevented before we call this macro.
+ * We must have checked for fp in use (and saved state, including the
+ * quadrant of registers indicated by the fpq argument and fp enabled before
+ * using this macro. Args:
+ *
+ * blk (immediate): as above
+ * doex (register): register in which to return check function addr
+ * [fpq (fp register): frf quadrant to be used (%f0/%f16/%f32/%f48)]
+ * This is used on type 1 rewrite only - on others the
+ * quadrant is implicit/hardcoded in the macro name.
+ * tmp1 (register): used for scratch, not preserved
+ * label1: free local numeric label
+ * [label2: free local numeric label]
+ * This is used in type 2 only.
+ *
+ * Note that the REWRITE macros do not perform a flush instruction -
+ * flush is not necessary on Cheetah derivative processors in which
+ * i$ snoops for invalidations.
+ */
+
+/*
+ * Rewrite type 1 will work with any instruction pattern - it just block
+ * loads and block stores the given block. A membar after block store
+ * forces the block store to complete before upcoming reuse of the
+ * fpregs in the block; the block load is blocking on sun4u/cheetah
+ * so no need for a membar after it.
+ */
+
+#define FPRAS_REWRITE_TYPE1(blk, doex, fpq, tmp1, label) \
+ brz,pn doex, label/**/f ;\
+ sethi %hi(fpras_chkfn_type1), tmp1 ;\
+ add tmp1, %lo(fpras_chkfn_type1), tmp1 ;\
+ add tmp1, FPRAS_BLK0 + blk * 64, tmp1 ;\
+ ldda [tmp1]ASI_BLK_P, fpq ;\
+ add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\
+ stda fpq, [tmp1]ASI_BLK_P ;\
+ membar #Sync ;\
+label:
+
+/*
+ * Rewrite type 2 will only work with instruction blocks that satisfy
+ * this particular repeat pattern. Note that the frf quadrant to
+ * use is implicit in the macro name and had better match what the
+ * copy function is preserving.
+*
+ * The odd looking repetition in the initial loop is designed to open
+ * up boths paths from prefetch cache to the frf - unrolling the loop
+ * would defeat this. In addition we perform idempotent faligndata
+ * manipulations using %tick as a randomly aligned address (this only
+ * works for address that aren't doubleword aligned).
+ */
+#define FPRAS_REWRITE_TYPE2Q1(blk, doex, tmp1, tmp2, label1, label2) \
+ brz,pn doex, label1/**/f ;\
+ mov 0x2, tmp1 ;\
+ set fpras_chkfn_type1, tmp2 ;\
+label2: ;\
+ deccc tmp1 ;\
+ ldd [tmp2 + (FPRAS_BLK0 + blk * 64)], %f4 ;\
+ ldd [tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f2 ;\
+ bnz,a,pt %icc, label2/**/b ;\
+ fsrc1 %f4, %f0 ;\
+ rdpr %tick, tmp1 ;\
+ fsrc1 %f4, %f8 ;\
+ fsrc1 %f2, %f10 ;\
+ btst 0x7, tmp1 ;\
+ alignaddr tmp1, %g0, %g0 /* changes %gsr */ ;\
+ bz,pn %icc, label2/**/f ;\
+ faligndata %f2, %f4, %f6 ;\
+ faligndata %f0, %f2, %f12 ;\
+ alignaddrl tmp1, %g0, %g0 ;\
+ faligndata %f12, %f6, %f6 ;\
+label2: ;\
+ add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\
+ fsrc2 %f8, %f12 ;\
+ fsrc1 %f6, %f14 ;\
+ stda %f0, [tmp1]ASI_BLK_P ;\
+ membar #Sync ;\
+label1:
+
+#define FPRAS_REWRITE_TYPE2Q2(blk, doex, tmp1, tmp2, label1, label2) \
+ brz,pn doex, label1/**/f ;\
+ mov 0x2, tmp1 ;\
+ set fpras_chkfn_type1, tmp2 ;\
+label2: ;\
+ deccc tmp1 ;\
+ ldd [tmp2 + (FPRAS_BLK0 + blk * 64)], %f20 ;\
+ ldd [tmp2 + (FPRAS_BLK0 + blk * 64) + 8], %f18 ;\
+ bnz,a,pt %icc, label2/**/b ;\
+ fsrc1 %f20, %f16 ;\
+ rdpr %tick, tmp1 ;\
+ fsrc1 %f20, %f24 ;\
+ fsrc1 %f18, %f26 ;\
+ btst 0x7, tmp1 ;\
+ alignaddr tmp1, %g0, %g0 /* changes %gsr */ ;\
+ bz,pn %icc, label2/**/f ;\
+ faligndata %f18, %f20, %f22 ;\
+ faligndata %f16, %f18, %f28 ;\
+ alignaddrl tmp1, %g0, %g0 ;\
+ faligndata %f28, %f22, %f22 ;\
+label2: ;\
+ add doex, FPRAS_BLK0 + blk * 64, tmp1 ;\
+ fsrc2 %f24, %f28 ;\
+ fsrc1 %f22, %f30 ;\
+ stda %f16, [tmp1]ASI_BLK_P ;\
+ membar #Sync ;\
+label1:
+
+/*
+ * The CHECK macro takes the 'doex' address of the check function to
+ * execute and jumps to it (if not NULL). If the check function returns
+ * nonzero then the check has failed and the CHECK macro must initiate
+ * an appropriate failure action. Illegal instruction trap handlers
+ * will also recognise traps in this PC range as fp failures. Thread
+ * migration must only be reallowed after completion of this check. The
+ * CHECK macro should be treated as a CALL/JMPL - output registers are
+ * forfeit after using it. If the call to fpras_failure returns
+ * (it may decide to panic) then invoke lofault handler (which must exist)
+ * to return an error (be sure to use this macro before restoring original
+ * lofault setup in copy functions). Note that the lofault handler is the
+ * copyops aware proxy handler which will perform other tidy up operations
+ * (unbind, fp state restore) that would normally have been done in the tail
+ * of the copy function.
+ *
+ * operation (immedidate): as above
+ * doex (register): doex value returned from the REWRITE
+ * label: free local numeric label
+ */
+
+#define FPRAS_CHECK(operation, doex, label) \
+ brz,pn doex, label/**/f ;\
+ nop ;\
+ jmpl doex, %o7 ;\
+ nop ;\
+ cmp %o0, FPRAS_OK ;\
+ be %icc, label/**/f ;\
+ nop ;\
+ mov %o0, %o1 /* how detected */ ;\
+ call fpras_failure /* take failure action */ ;\
+ mov operation, %o0 ;\
+ ldn [THREAD_REG + T_LOFAULT], doex ;\
+ jmp doex ;\
+ mov EFAULT, %g1 ;\
+label:
+
+/* END CSTYLED */
+
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FPRAS_IMPL_H */