summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Beloro <Jason.Beloro@Sun.COM>2009-08-06 17:39:39 -0700
committerJason Beloro <Jason.Beloro@Sun.COM>2009-08-06 17:39:39 -0700
commit9d0d62ad2e60e8f742a2e723d06e88352ee6a1f3 (patch)
tree016e2a6b2f674016c46785258d0ff85e6b1bce09
parent32a6953793c636df949ca1ae3555438159bda3f6 (diff)
downloadillumos-joyent-9d0d62ad2e60e8f742a2e723d06e88352ee6a1f3.tar.gz
6858457 Remove Solaris support for UltraSPARC-AT10 processor
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kctl/kctl_main.c8
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_kvm.c4
-rw-r--r--usr/src/cmd/picl/plugins/inc/picldefs.h3
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.c6
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.h3
-rw-r--r--usr/src/common/atomic/sparcv9/atomic.s15
-rw-r--r--usr/src/common/elfcap/elfcap.c5
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/common/gen/memcpy.s1704
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/common/gen/memset.s767
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/common/gen/misc.s33
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/common/gen/strcpy.s340
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/common/gen/strlen.s127
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/sparc/Makefile5
-rw-r--r--usr/src/lib/libc/sparc_hwcap1/sparcv9/Makefile5
-rw-r--r--usr/src/lib/libdisasm/sparc/dis_sparc_fmt.c92
-rw-r--r--usr/src/lib/libdisasm/sparc/instr.c94
-rw-r--r--usr/src/lib/libprtdiag/common/display_sun4v.c74
-rw-r--r--usr/src/pkgdefs/Makefile1
-rw-r--r--usr/src/pkgdefs/SUNWusat10.v/Makefile35
-rw-r--r--usr/src/pkgdefs/SUNWusat10.v/pkginfo.tmpl55
-rw-r--r--usr/src/pkgdefs/SUNWusat10.v/prototype_com52
-rw-r--r--usr/src/pkgdefs/SUNWusat10.v/prototype_sparc30
-rw-r--r--usr/src/uts/common/io/mem.c14
-rw-r--r--usr/src/uts/common/sys/auxv_SPARC.h3
-rw-r--r--usr/src/uts/common/vm/hat.h19
-rw-r--r--usr/src/uts/common/vm/page.h2
-rw-r--r--usr/src/uts/common/vm/page_retire.c2
-rw-r--r--usr/src/uts/common/vm/seg_kmem.c11
-rw-r--r--usr/src/uts/common/vm/vm_page.c38
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_asm.s80
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_kdi.s18
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.c251
-rw-r--r--usr/src/uts/sfmmu/vm/hat_sfmmu.h48
-rw-r--r--usr/src/uts/sparc/fpu/fpu_simulator.c135
-rw-r--r--usr/src/uts/sparc/sys/fpu/fpu_simulator.h14
-rw-r--r--usr/src/uts/sun4/os/startup.c14
-rw-r--r--usr/src/uts/sun4/vm/sfmmu.c4
-rw-r--r--usr/src/uts/sun4/vm/vm_dep.h10
-rw-r--r--usr/src/uts/sun4u/sys/pte.h19
-rw-r--r--usr/src/uts/sun4u/vm/mach_sfmmu.h14
-rw-r--r--usr/src/uts/sun4v/Makefile.files2
-rw-r--r--usr/src/uts/sun4v/Makefile.sun4v.shared5
-rw-r--r--usr/src/uts/sun4v/cpu/rock.c1014
-rw-r--r--usr/src/uts/sun4v/cpu/rock_asm.s486
-rw-r--r--usr/src/uts/sun4v/cpu/rock_copy.s4941
-rw-r--r--usr/src/uts/sun4v/io/px/px_lib4v.c30
-rw-r--r--usr/src/uts/sun4v/io/px/px_lib4v.h3
-rw-r--r--usr/src/uts/sun4v/ml/hcall.s43
-rw-r--r--usr/src/uts/sun4v/ml/mach_interrupt.s72
-rw-r--r--usr/src/uts/sun4v/ml/mach_offsets.in7
-rw-r--r--usr/src/uts/sun4v/ml/trap_table.s4
-rw-r--r--usr/src/uts/sun4v/os/error.c50
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c112
-rw-r--r--usr/src/uts/sun4v/os/mach_cpu_states.c25
-rw-r--r--usr/src/uts/sun4v/pcbe/rock_pcbe.c2316
-rw-r--r--usr/src/uts/sun4v/rock/Makefile113
-rw-r--r--usr/src/uts/sun4v/rock_pcbe/Makefile77
-rw-r--r--usr/src/uts/sun4v/sys/error.h4
-rw-r--r--usr/src/uts/sun4v/sys/hsvc.h6
-rw-r--r--usr/src/uts/sun4v/sys/hypervisor_api.h12
-rw-r--r--usr/src/uts/sun4v/sys/machcpuvar.h3
-rw-r--r--usr/src/uts/sun4v/sys/machsystm.h1
-rw-r--r--usr/src/uts/sun4v/sys/mmu.h12
-rw-r--r--usr/src/uts/sun4v/sys/pte.h9
-rw-r--r--usr/src/uts/sun4v/sys/rock_hypervisor_api.h100
-rw-r--r--usr/src/uts/sun4v/sys/rockasi.h68
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu.c30
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu.h84
-rw-r--r--usr/src/uts/sun4v/vm/mach_sfmmu_asm.s57
-rw-r--r--usr/src/uts/sun4v/vm/mach_vm_dep.c136
70 files changed, 203 insertions, 13773 deletions
diff --git a/usr/src/cmd/mdb/common/kmdb/kctl/kctl_main.c b/usr/src/cmd/mdb/common/kmdb/kctl/kctl_main.c
index 697d1804f2..2d30b5e10c 100644
--- a/usr/src/cmd/mdb/common/kmdb/kctl/kctl_main.c
+++ b/usr/src/cmd/mdb/common/kmdb/kctl/kctl_main.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,7 +41,6 @@
#include <sys/kdi_impl.h>
#include <sys/ctf_api.h>
#include <vm/seg_kmem.h>
-#include <vm/hat.h>
kctl_t kctl;
@@ -153,9 +152,8 @@ kctl_dseg_alloc(caddr_t addr, size_t sz)
if (hat_getpfnum(kas.a_hat, addr) != PFN_INVALID)
return (EAGAIN);
- /* Set HAT_ATTR_TEXT to override soft execute mode */
- if (segkmem_xalloc(NULL, addr, sz, VM_NOSLEEP, HAT_ATTR_TEXT,
- segkmem_page_create, NULL) == NULL)
+ if (segkmem_xalloc(NULL, addr, sz, VM_NOSLEEP, 0, segkmem_page_create,
+ NULL) == NULL)
return (ENOMEM);
return (0);
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_kvm.c b/usr/src/cmd/mdb/common/kmdb/kmdb_kvm.c
index 45fae50829..cc48c21e17 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_kvm.c
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_kvm.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -43,7 +43,6 @@
#include <strings.h>
#include <dlfcn.h>
-#include <sys/kdi_impl.h>
#include <sys/isa_defs.h>
#include <sys/kobj.h>
#include <sys/kobj_impl.h>
@@ -218,7 +217,6 @@ ssize_t
kmt_writer(void *buf, size_t nbytes, uint64_t addr)
{
kmt_bcopy(buf, (void *)(uintptr_t)addr, nbytes);
- mdb.m_kdi->kdi_flush_caches();
return (nbytes);
}
diff --git a/usr/src/cmd/picl/plugins/inc/picldefs.h b/usr/src/cmd/picl/plugins/inc/picldefs.h
index 6fb6094423..95efac27f6 100644
--- a/usr/src/cmd/picl/plugins/inc/picldefs.h
+++ b/usr/src/cmd/picl/plugins/inc/picldefs.h
@@ -129,8 +129,6 @@ extern "C" {
#define PICL_CLASS_SENSOR "sensor"
#define PICL_CLASS_STACK "stack"
#define PICL_CLASS_UNKNOWN "unknown"
-#define PICL_CLASS_HUMIDITY_SENSOR "humidity-sensor"
-#define PICL_CLASS_HUMIDITY_INDICATOR "humidity-indicator"
/*
* Solaris driver property names
@@ -243,7 +241,6 @@ extern "C" {
#define PICL_PROP_BASE_UNITS "BaseUnits"
#define PICL_PROP_EXPONENT "Exponent"
#define PICL_PROP_RATE_UNITS "RateUnits"
-#define PICL_PROP_HUMIDITY "Humidity"
/*
* Various threshold property names
diff --git a/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.c b/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.c
index 2047e1a105..6dc7b10a3a 100644
--- a/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.c
+++ b/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.c
@@ -865,10 +865,6 @@ make_node(picl_nodehdl_t subtree_root, int row, int *snmp_syserr_p)
ADD_NODE(PICL_CLASS_RPM_SENSOR)
add_prop(nodeh, &proph, node_name, row,
PP_SPEED, snmp_syserr_p);
- } else if (sensor_type == SSST_HUMIDITY) {
- ADD_NODE(PICL_CLASS_HUMIDITY_SENSOR)
- add_prop(nodeh, &proph, node_name, row,
- PP_HUMIDITY, snmp_syserr_p);
} else {
ADD_NODE(PICL_CLASS_SENSOR)
add_prop(nodeh, &proph, node_name, row,
@@ -906,8 +902,6 @@ make_node(picl_nodehdl_t subtree_root, int row, int *snmp_syserr_p)
ADD_NODE(PICL_CLASS_RPM_INDICATOR)
} else if (sensor_type == SSST_PRESENCE) {
ADD_NODE(PICL_CLASS_PRESENCE_INDICATOR)
- } else if (sensor_type == SSST_HUMIDITY) {
- ADD_NODE(PICL_CLASS_HUMIDITY_INDICATOR)
} else {
ADD_NODE(PICL_CLASS_INDICATOR)
}
diff --git a/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.h b/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.h
index 871ed18f59..f1a1a1da46 100644
--- a/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.h
+++ b/usr/src/cmd/picl/plugins/sun4v/snmp/snmpplugin.h
@@ -111,8 +111,7 @@ typedef enum {
PP_MFG_NAME,
PP_MODEL_NAME,
PP_DESCRIPTION,
- PP_LABEL,
- PP_HUMIDITY
+ PP_LABEL
} sp_propid_t;
/*
diff --git a/usr/src/common/atomic/sparcv9/atomic.s b/usr/src/common/atomic/sparcv9/atomic.s
index 5cf3526e25..0a43cd4eef 100644
--- a/usr/src/common/atomic/sparcv9/atomic.s
+++ b/usr/src/common/atomic/sparcv9/atomic.s
@@ -82,13 +82,6 @@ label/**/0: ; \
mov tmp2, %o7 /* restore callee's return address */ ; \
label/**/1:
-#ifdef ATOMIC_SIMPLE_BO_ENABLE
-/*
- * For some processors, simple limit has proved benefical
- */
-#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label) \
- set 1 << ATOMIC_BO_ENABLE_SHIFT, limit
-#else
/*
* For the kernel, we take into consideration of cas failures
* and also scale the backoff limit w.r.t. the number of cpus.
@@ -111,7 +104,6 @@ label/**/0: ; \
mov %g0, cas_cnt ; \
mov 1, val ; \
label/**/1:
-#endif /* ATOMIC_SIMPLE_BO_ENABLE */
#endif /* ATOMIC_BO_ENABLE_SHIFT */
#else /* _KERNEL */
@@ -137,18 +129,11 @@ label/**/0:
* The cas_cnt counts the cas instruction failure and is
* initialized to 0.
*/
-#ifdef ATOMIC_SIMPLE_BO_ENABLE
-#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt) \
- mov 1, val
-
-#else /* If not defined ATOMIC_SIMPLE_BO_ENABLE */
#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt) \
mov 1, val ; \
mov %g0, ncpu ; \
mov %g0, cas_cnt
-#endif /* ATOMIC_SIMPLE_BO_ENABLE */
-
#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
bne,a,pn cr, backoff
diff --git a/usr/src/common/elfcap/elfcap.c b/usr/src/common/elfcap/elfcap.c
index 61f97deac7..2aa840fee6 100644
--- a/usr/src/common/elfcap/elfcap.c
+++ b/usr/src/common/elfcap/elfcap.c
@@ -150,10 +150,7 @@ static const elfcap_desc_t hw1_sparc[ELFCAP_NUM_HW1_SPARC] = {
AV_SPARC_FMAF, STRDESC("AV_SPARC_FMAF"),
STRDESC("FMAF"), STRDESC("fmaf"),
},
- { /* 0x00000200 */
- AV_SPARC_FMAU, STRDESC("AV_SPARC_FMAU"),
- STRDESC("FMAU"), STRDESC("fmau"),
- },
+ RESERVED_ELFCAP_DESC, /* 0x00000200 */
{ /* 0x00000400 */
AV_SPARC_VIS3, STRDESC("AV_SPARC_VIS3"),
STRDESC("VIS3"), STRDESC("vis3"),
diff --git a/usr/src/lib/libc/sparc_hwcap1/common/gen/memcpy.s b/usr/src/lib/libc/sparc_hwcap1/common/gen/memcpy.s
deleted file mode 100644
index 8fdb95268f..0000000000
--- a/usr/src/lib/libc/sparc_hwcap1/common/gen/memcpy.s
+++ /dev/null
@@ -1,1704 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
- .file "memcpy.s"
-
-/*
- * memcpy(s1, s2, len)
- *
- * Copy s2 to s1, always copy n bytes.
- * Note: this C code does not work for overlapped copies.
- * Memmove() and bcopy() do.
- *
- * Added entry __align_cpy_1 is generally for use of the compilers.
- *
- * Fast assembler language version of the following C-program for memcpy
- * which represents the `standard' for the C-library.
- *
- * void *
- * memcpy(void *s, const void *s0, size_t n)
- * {
- * if (n != 0) {
- * char *s1 = s;
- * const char *s2 = s0;
- * do {
- * *s1++ = *s2++;
- * } while (--n != 0);
- * }
- * return (s);
- * }
- */
-
-#include <sys/asm_linkage.h>
-#include <sys/sun4asi.h>
-#include <sys/trap.h>
-
-#ifdef __sparcv9
-#define SAVESIZE (8 * 1)
-#define STACK_OFFSET (STACK_BIAS + MINFRAME)
-#else
-#define SAVESIZE (8 * 3)
-#define STACK_OFFSET (STACK_BIAS + MINFRAME + 4)
-#endif
-
-#define scratch_offset 0
-#define g4_offset 8
-#define g5_offset 16
-
-#define ICACHE_LINE_SIZE 64
-#define BLOCK_SIZE 64
-#define FPRS_FEF 0x4
-#define PF_FAR 2048
-#define PF_NEAR 1024
-
-#define SHORTCOPY 3
-#define SMALL_MAX 39
-#define MEDIUM_MAX 255
-#define MED_WMAX 256 /* max copy for medium word-aligned case */
-#define MED_MAX 256 /* max copy for medium longword-aligned case */
-
-#ifndef BSTORE_SIZE
-#define BSTORE_SIZE 256 /* min copy size for block store */
-#endif
-
-/*
- * The LDDs will use the below ASI for performance
- * This ASI minimizes cache pollution.
- */
-#define ASI_CACHE_SPARING 0xf4
-#define ASI_CACHE_SPARING_PRIMARY 0xf4
-
- ANSI_PRAGMA_WEAK(memmove,function)
- ANSI_PRAGMA_WEAK(memcpy,function)
-
- ENTRY(memmove)
- cmp %o1, %o0 ! if from address is >= to use forward copy
- bgeu %ncc, .forcpy ! else use backward if ...
- sub %o0, %o1, %o4 ! get difference of two addresses
- cmp %o2, %o4 ! compare size and difference of addresses
- bleu %ncc, .forcpy ! if size is bigger, do overlapped copy
- nop
-
- !
- ! an overlapped copy that must be done "backwards"
- !
-.ovbc:
- mov %o0, %g1 ! save dest address for return val
- add %o1, %o2, %o1 ! get to end of source space
- add %o0, %o2, %o0 ! get to end of destination space
-
- cmp %o2, 24
- bgeu,pn %ncc, .dbalign
- nop
- cmp %o2, 4
- blt,pn %ncc, .byte
- sub %o2, 3, %o2
-.byte4loop:
- ldub [%o1-1], %o3 ! load last byte
- stb %o3, [%o0-1] ! store last byte
- sub %o1, 4, %o1
- ldub [%o1+2], %o3 ! load 2nd from last byte
- stb %o3, [%o0-2] ! store 2nd from last byte
- sub %o0, 4, %o0
- ldub [%o1+1], %o3 ! load 3rd from last byte
- stb %o3, [%o0+1] ! store 3rd from last byte
- subcc %o2, 4, %o2
- ldub [%o1], %o3 ! load 4th from last byte
- bgu,pt %ncc, .byte4loop
- stb %o3, [%o0] ! store 4th from last byte
-.byte:
- addcc %o2, 3, %o2
- bz,pt %ncc, .exit
-.byteloop:
- dec %o1 ! decrement src address
- ldub [%o1], %o3 ! read a byte
- dec %o0 ! decrement dst address
- deccc %o2 ! decrement count
- bgu,pt %ncc, .byteloop ! loop until done
- stb %o3, [%o0] ! write byte
-.exit:
- retl
- mov %g1, %o0
-
- .align 16
-.dbalign:
- andcc %o0, 7, %o5 ! bytes till DST 8 byte aligned
- bz,pt %ncc, .dbmed
- sub %o2, %o5, %o2 ! update count
-.dbalign1:
- dec %o1 ! decrement src address
- ldub [%o1], %o3 ! read a byte
- dec %o0 ! decrement dst address
- deccc %o5 ! decrement count
- bgu,pt %ncc, .dbalign1 ! loop until done
- stb %o3, [%o0] ! store a byte
-
-! check for src long word alignment
-.dbmed:
- mov %asi, %g5 ! save curr %asi
- wr %g0, ASI_CACHE_SPARING, %asi
- andcc %o1, 7, %g0 ! chk src long word alignment
- bnz,pn %ncc, .dbbck
- nop
-!
-! Following code is for overlapping copies where src and dest
-! are long word aligned
-!
- cmp %o2, 4095
- blt,pn %ncc, .dbmedl32enter ! go to no prefetch code
- nop
- prefetch [%o1 - (1 * BLOCK_SIZE)], #n_reads
- sub %o2, 63, %o2 ! adjust length to allow cc test
- ! for end of loop
- prefetch [%o1 - (2 * BLOCK_SIZE)], #n_reads
- prefetch [%o1 - (3 * BLOCK_SIZE)], #n_reads
- prefetch [%o1 - (4 * BLOCK_SIZE)], #n_reads
-.dbmedl64:
- prefetch [%o1 - (5 * BLOCK_SIZE)], #n_reads
- ldxa [%o1-8]%asi, %o3 ! load
- subcc %o2, 64, %o2 ! decrement length count
- stx %o3, [%o0-8] ! and store
- ldxa [%o1-16]%asi, %o3 ! a block of 64 bytes
- sub %o1, 64, %o1 ! decrease src ptr by 64
- stx %o3, [%o0-16]
- sub %o0, 64, %o0 ! decrease dst ptr by 64
- ldxa [%o1+40]%asi, %o3
- ldxa [%o1+32]%asi, %o4
- ldxa [%o1+24]%asi, %o5
- stx %o3, [%o0+40]
- stx %o4, [%o0+32]
- stx %o5, [%o0+24]
- ldxa [%o1+16]%asi, %o3
- ldxa [%o1+8]%asi, %o4
- stx %o3, [%o0+16]
- stx %o4, [%o0+8]
- ldxa [%o1]%asi, %o5
- bgu,pt %ncc, .dbmedl64 ! repeat if at least 64 bytes left
- stx %o5, [%o0]
- add %o2, 63, %o2 ! restore offset adjustment
-.dbmedl32enter:
- subcc %o2, 31, %o2 ! adjust length to allow cc test
- ! for end of loop
- ble,pt %ncc, .dbmedl31 ! skip big loop if less than 32
- nop
-.dbmedl32:
- ldx [%o1-8], %o4 ! load
- subcc %o2, 32, %o2 ! decrement length count
- stx %o4, [%o0-8] ! and store
- ldx [%o1-16], %o3 ! a block of 32 bytes
- sub %o1, 32, %o1 ! decrease src ptr by 32
- stx %o3, [%o0-16]
- ldx [%o1+8], %o4
- sub %o0, 32, %o0 ! decrease dst ptr by 32
- stx %o4, [%o0+8]
- ldx [%o1], %o3
- bgu,pt %ncc, .dbmedl32 ! repeat if at least 32 bytes left
- stx %o3, [%o0]
-.dbmedl31:
- addcc %o2, 16, %o2 ! adjust remaining count
- ble,pt %ncc, .dbmedl15 ! skip if 15 or fewer bytes left
- nop !
- ldx [%o1-8], %o4 ! load and store 16 bytes
- sub %o1, 16, %o1 ! decrease src ptr by 16
- stx %o4, [%o0-8] !
- sub %o2, 16, %o2 ! decrease count by 16
- ldx [%o1], %o3 !
- sub %o0, 16, %o0 ! decrease dst ptr by 16
- stx %o3, [%o0]
-.dbmedl15:
- addcc %o2, 15, %o2 ! restore count
- bz,pt %ncc, .dbexit ! exit if finished
- nop
- cmp %o2, 8
- blt,pt %ncc, .dbremain ! skip if 7 or fewer bytes left
- nop
- ldx [%o1-8], %o4 ! load 8 bytes
- sub %o1, 8, %o1 ! decrease src ptr by 8
- stx %o4, [%o0-8] ! and store 8 bytes
- subcc %o2, 8, %o2 ! decrease count by 8
- bnz %ncc, .dbremain ! exit if finished
- sub %o0, 8, %o0 ! decrease dst ptr by 8
- mov %g5, %asi ! restore %asi
- retl
- mov %g1, %o0
-
-!
-! Following code is for overlapping copies where src and dest
-! are not long word aligned
-!
- .align 16
-.dbbck:
- rd %fprs, %o3 ! o3 = fprs
-
- ! if fprs.fef == 0, set it. Checking it, requires 2 instructions.
- ! So set it anyway, without checking.
- wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1
-
- alignaddr %o1, %g0, %o5 ! align src
- ldda [%o5]%asi, %d0 ! get first 8 byte block
- andn %o2, 7, %o4 ! prepare src ptr for finishup code
- cmp %o2, 32
- blt,pn %ncc, .dbmv8
- sub %o1, %o4, %o1 !
- cmp %o2, 4095 ! check for short memmoves
- blt,pn %ncc, .dbmv32enter ! go to no prefetch code
-.dbmv64:
- ldda [%o5-8]%asi, %d2 ! load 8 bytes
- ldda [%o5-16]%asi, %d4 ! load 8 bytes
- sub %o5, 64, %o5 !
- ldda [%o5+40]%asi, %d6 ! load 8 bytes
- sub %o0, 64, %o0 !
- ldda [%o5+32]%asi, %d8 ! load 8 bytes
- sub %o2, 64, %o2 ! 64 less bytes to copy
- ldda [%o5+24]%asi, %d18 ! load 8 bytes
- cmp %o2, 64 ! do we have < 64 bytes remaining
- ldda [%o5+16]%asi, %d28 ! load 8 bytes
- ldda [%o5+8]%asi, %d30 ! load 8 bytes
- prefetch [%o5 - (5 * BLOCK_SIZE)], #n_reads
- faligndata %d2, %d0, %d10 ! extract 8 bytes out
- ldda [%o5]%asi, %d0 ! load 8 bytes
- std %d10, [%o0+56] ! store the current 8 bytes
- faligndata %d4, %d2, %d12 ! extract 8 bytes out
- std %d12, [%o0+48] ! store the current 8 bytes
- faligndata %d6, %d4, %d14 ! extract 8 bytes out
- std %d14, [%o0+40] ! store the current 8 bytes
- faligndata %d8, %d6, %d16 ! extract 8 bytes out
- std %d16, [%o0+32] ! store the current 8 bytes
- faligndata %d18, %d8, %d20 ! extract 8 bytes out
- std %d20, [%o0+24] ! store the current 8 bytes
- faligndata %d28, %d18, %d22 ! extract 8 bytes out
- std %d22, [%o0+16] ! store the current 8 bytes
- faligndata %d30, %d28, %d24 ! extract 8 bytes out
- std %d24, [%o0+8] ! store the current 8 bytes
- faligndata %d0, %d30, %d26 ! extract 8 bytes out
- bgeu,pt %ncc, .dbmv64
- std %d26, [%o0] ! store the current 8 bytes
-
- cmp %o2, 32
- blt,pn %ncc, .dbmvx
- nop
-.dbmv32:
- ldda [%o5-8]%asi, %d2 ! load 8 bytes
-.dbmv32enter:
- ldda [%o5-16]%asi, %d4 ! load 8 bytes
- sub %o5, 32, %o5 !
- ldda [%o5+8]%asi, %d6 ! load 8 bytes
- sub %o0, 32, %o0 !
- faligndata %d2, %d0, %d10 ! extract 8 bytes out
- ldda [%o5]%asi, %d0 ! load 8 bytes
- sub %o2,32, %o2 ! 32 less bytes to copy
- std %d10, [%o0+24] ! store the current 8 bytes
- cmp %o2, 32 ! do we have < 32 bytes remaining
- faligndata %d4, %d2, %d12 ! extract 8 bytes out
- std %d12, [%o0+16] ! store the current 8 bytes
- faligndata %d6, %d4, %d14 ! extract 8 bytes out
- std %d14, [%o0+8] ! store the current 8 bytes
- faligndata %d0, %d6, %d16 ! extract 8 bytes out
- bgeu,pt %ncc, .dbmv32
- std %d16, [%o0] ! store the current 8 bytes
-.dbmvx:
- cmp %o2, 8 ! do we have < 8 bytes remaining
- blt,pt %ncc, .dbmvfinish ! if yes, skip to finish up code
- nop
-.dbmv8:
- ldda [%o5-8]%asi, %d2
- sub %o0, 8, %o0 ! since we are at the end
- ! when we first enter the loop
- sub %o2, 8, %o2 ! 8 less bytes to copy
- sub %o5, 8, %o5
- cmp %o2, 8 ! do we have < 8 bytes remaining
- faligndata %d2, %d0, %d8 ! extract 8 bytes out
- std %d8, [%o0] ! store the current 8 bytes
- bgeu,pt %ncc, .dbmv8
- fmovd %d2, %d0
-.dbmvfinish:
- and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0
- tst %o2
- bz,pt %ncc, .dbexit
- wr %o3, %g0, %fprs ! fprs = o3 restore fprs
-
-.dbremain:
- cmp %o2, 4
- blt,pn %ncc, .dbbyte
- nop
- ldub [%o1-1], %o3 ! load last byte
- stb %o3, [%o0-1] ! store last byte
- sub %o1, 4, %o1
- ldub [%o1+2], %o3 ! load 2nd from last byte
- stb %o3, [%o0-2] ! store 2nd from last byte
- sub %o0, 4, %o0
- ldub [%o1+1], %o3 ! load 3rd from last byte
- stb %o3, [%o0+1] ! store 3rd from last byte
- subcc %o2, 4, %o2
- ldub [%o1], %o3 ! load 4th from last byte
- stb %o3, [%o0] ! store 4th from last byte
- bz,pt %ncc, .dbexit
-.dbbyte:
- dec %o1 ! decrement src address
- ldub [%o1], %o3 ! read a byte
- dec %o0 ! decrement dst address
- deccc %o2 ! decrement count
- bgu,pt %ncc, .dbbyte ! loop until done
- stb %o3, [%o0] ! write byte
-.dbexit:
- mov %g5, %asi ! restore %asi
- retl
- mov %g1, %o0
- SET_SIZE(memmove)
-
- .align ICACHE_LINE_SIZE
- ENTRY(memcpy)
- ENTRY(__align_cpy_1)
- ! adjust instruction alignment
- nop ! Do not remove, these nops affect
- nop ! icache alignment and performance
-.forcpy:
- cmp %o2, SMALL_MAX ! check for not small case
- bgu,pn %ncc, .medium ! go to larger cases
- mov %o0, %g1 ! save %o0
- cmp %o2, SHORTCOPY ! check for really short case
- ble,pt %ncc, .smallleft !
- or %o0, %o1, %o3 ! prepare alignment check
- andcc %o3, 0x3, %g0 ! test for alignment
- bz,pt %ncc, .smallword ! branch to word aligned case
- sub %o2, 3, %o2 ! adjust count to allow cc zero test
-.smallnotalign4:
- ldub [%o1], %o3 ! read byte
- subcc %o2, 4, %o2 ! reduce count by 4
- stb %o3, [%o0] ! write byte
- ldub [%o1+1], %o3 ! repeat for a total of 4 bytes
- add %o1, 4, %o1 ! advance SRC by 4
- stb %o3, [%o0+1]
- ldub [%o1-2], %o3
- add %o0, 4, %o0 ! advance DST by 4
- stb %o3, [%o0-2]
- ldub [%o1-1], %o3
- bgu,pt %ncc, .smallnotalign4 ! loop til 3 or fewer bytes remain
- stb %o3, [%o0-1]
- add %o2, 3, %o2 ! restore count
-.smallleft:
- tst %o2
- bz,pt %ncc, .smallexit
- nop
-.smallleft3: ! 1, 2, or 3 bytes remain
- ldub [%o1], %o3 ! load one byte
- deccc %o2 ! reduce count for cc test
- bz,pt %ncc, .smallexit
- stb %o3, [%o0] ! store one byte
- ldub [%o1+1], %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .smallexit
- stb %o3, [%o0+1] ! store second byte
- ldub [%o1+2], %o3 ! load third byte
- stb %o3, [%o0+2] ! store third byte
- retl
- mov %g1, %o0 ! restore %o0
-
- .align 16
- nop ! affects loop icache alignment
-.smallwords:
- lduw [%o1], %o3 ! read word
-.smallwordx:
- subcc %o2, 8, %o2 ! update count
- stw %o3, [%o0] ! write word
- add %o1, 8, %o1 ! update SRC
- lduw [%o1-4], %o3 ! read word
- add %o0, 8, %o0 ! update DST
- bgu,pt %ncc, .smallwords ! loop until done
- stw %o3, [%o0-4] ! write word
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .smallexit ! check for completion
- nop
- cmp %o2, 4 ! check for 4 or more bytes left
- blt .smallleft3 ! if not, go to finish up
- nop
- lduw [%o1], %o3
- add %o1, 4, %o1
- subcc %o2, 4, %o2
- stw %o3, [%o0]
- add %o0, 4, %o0
- bnz,pt %ncc, .smallleft3
- nop
- retl
- mov %g1, %o0 ! restore %o0
-
-.smallword:
- subcc %o2, 4, %o2 ! update count
- bgu,pt %ncc, .smallwordx
- lduw [%o1], %o3 ! read word
- addcc %o2, 3, %o2 ! restore count
- bz,pt %ncc, .smallexit
- stw %o3, [%o0] ! write word
- deccc %o2 ! reduce count for cc test
- ldub [%o1+4], %o3 ! load one byte
- bz,pt %ncc, .smallexit
- stb %o3, [%o0+4] ! store one byte
- ldub [%o1+5], %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .smallexit
- stb %o3, [%o0+5] ! store second byte
- ldub [%o1+6], %o3 ! load third byte
- stb %o3, [%o0+6] ! store third byte
-.smallexit:
- retl
- mov %g1, %o0 ! restore %o0
- .align 16
-.medium:
- neg %o0, %o5
- neg %o1, %o3
- andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned
- and %o3, 7, %o3 ! bytes till SRC 8 byte aligned
- cmp %o5, %o3
- bne %ncc, continue
- sub %o5, %o3, %o3 ! -(bytes till SRC aligned after DST aligned)
- ! o3={-7, -6, ... 7} o3>0 => SRC overaligned
- ! src and dst are aligned.
- mov %o3, %g5 ! save %o3
- andcc %o1, 7, %o3 ! is src buf aligned on a 8 byte bound
- brz,pt %o3, src_dst_aligned_on_8
- mov %o3, %o5
- mov 8, %o4
- sub %o4, %o3, %o3
- cmp %o3, %o2
- bg,a,pn %ncc, 1f
- mov %o2, %o3
-1:
- ! %o3 has the bytes to be written in partial store.
- sub %o2, %o3, %o2
- prefetch [%o1],2
-
-7:
- deccc %o3 ! byte clearing loop
- ldub [%o1], %o4 ! load one byte
- stb %o4, [%o0]
- inc %o1 ! increment src
- bgu,pt %ncc, 7b
- inc %o0 ! increment dst
-
- mov %g5, %o3 ! restore %o3
-src_dst_aligned_on_8:
- ! check if we are copying 1k or more bytes
- cmp %o2, 511
- bgu,pt %ncc, copying_ge_512
- nop
- ba .medlword
- nop
-
-continue:
- andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned
- bz %ncc, 2f
- nop
-
- sub %o2, %o5, %o2 ! update count
-
-1:
- ldub [%o1], %o4
- deccc %o5
- inc %o1
- stb %o4, [%o0]
- bgu,pt %ncc, 1b
- inc %o0
-
- ! Now DST is 8-byte aligned. o0, o1, o2 are current.
-
-2:
- andcc %o1, 0x3, %g0 ! test alignment
- bnz,pt %ncc, .mediumsetup ! branch to skip aligned cases
- ! if src, dst not aligned
- prefetch [%o1 + (1 * BLOCK_SIZE)], #n_reads
-
-/*
- * Handle all cases where src and dest are aligned on word
- * or long word boundaries. Use unrolled loops for better
- * performance. This option wins over standard large data
- * move when source and destination is in cache for medium
- * to short data moves.
- */
- andcc %o1, 0x7, %g0 ! test word alignment
- bz,pt %ncc, src_dst_lword_aligned ! branch to long word aligned case
- prefetch [%o1 + (2 * BLOCK_SIZE)], #n_reads
- cmp %o2, MED_WMAX ! limit to store buffer size
- bgu,pt %ncc, .mediumrejoin ! otherwise rejoin main loop
- nop
- subcc %o2, 15, %o2 ! adjust length to allow cc test
- ! for end of loop
- ble,pt %ncc, .medw15 ! skip big loop if less than 16
- prefetch [%o1 + (3 * BLOCK_SIZE)], #n_reads
-/*
- * no need to put prefetch in loop as prefetches have
- * already been issued for maximum loop size
- */
-.medw16:
- ld [%o1], %o4 ! load
- subcc %o2, 16, %o2 ! decrement length count
- stw %o4, [%o0] ! and store
- ld [%o1+4], %o3 ! a block of 16 bytes
- add %o1, 16, %o1 ! increase src ptr by 16
- stw %o3, [%o0+4]
- ld [%o1-8], %o4
- add %o0, 16, %o0 ! increase dst ptr by 16
- stw %o4, [%o0-8]
- ld [%o1-4], %o3
- bgu,pt %ncc, .medw16 ! repeat if at least 16 bytes left
- stw %o3, [%o0-4]
-.medw15:
- addcc %o2, 15, %o2 ! restore count
- bz,pt %ncc, .medwexit ! exit if finished
- nop
- cmp %o2, 8
- blt,pt %ncc, .medw7 ! skip if 7 or fewer bytes left
- nop !
- ld [%o1], %o4 ! load 4 bytes
- subcc %o2, 8, %o2 ! decrease count by 8
- stw %o4, [%o0] ! and store 4 bytes
- add %o1, 8, %o1 ! increase src ptr by 8
- ld [%o1-4], %o3 ! load 4 bytes
- add %o0, 8, %o0 ! increase dst ptr by 8
- stw %o3, [%o0-4] ! and store 4 bytes
- bz %ncc, .medwexit ! exit if finished
- nop
-.medw7: ! count is ge 1, less than 8
- cmp %o2, 3 ! check for 4 bytes left
- ble,pt %ncc, .medw3 ! skip if 3 or fewer bytes left
- nop !
- ld [%o1], %o4 ! load 4 bytes
- sub %o2, 4, %o2 ! decrease count by 4
- add %o1, 4, %o1 ! increase src ptr by 4
- stw %o4, [%o0] ! and store 4 bytes
- add %o0, 4, %o0 ! increase dst ptr by 4
- tst %o2 ! check for zero bytes left
- bz %ncc, .medwexit ! exit if finished
- nop
-.medw3: ! count is known to be 1, 2, or 3
- deccc %o2 ! reduce count by one
- ldub [%o1], %o3 ! load one byte
- bz,pt %ncc, .medwexit ! exit if last byte
- stb %o3, [%o0] ! store one byte
- ldub [%o1+1], %o3 ! load second byte
- deccc %o2 ! reduce count by one
- bz,pt %ncc, .medwexit ! exit if last byte
- stb %o3, [%o0+1] ! store second byte
- ldub [%o1+2], %o3 ! load third byte
- stb %o3, [%o0+2] ! store third byte
-.medwexit:
- retl
- mov %g1, %o0 ! restore %o0
-
-/*
- * Special case for handling when src and dest are both long word aligned
- * and total data to move is between SMALL_MAX and MED_MAX bytes
- */
-
- .align 16
- nop
-src_dst_lword_aligned:
-.medlword: ! long word aligned
- cmp %o2, MED_MAX ! limit to store buffer size
- bgu,pt %ncc, .mediumrejoin ! otherwise rejoin main loop
- nop
- subcc %o2, 31, %o2 ! adjust length to allow cc test
- ! for end of loop
- ble,pt %ncc, .medl31 ! skip big loop if less than 32
- prefetch [%o1 + (3 * BLOCK_SIZE)], #n_reads ! into the l2 cache
-/*
- * no need to put prefetch in loop as prefetches have
- * already been issued for maximum loop size
- */
-.medl32:
- ldx [%o1], %o4 ! load
- subcc %o2, 32, %o2 ! decrement length count
- stx %o4, [%o0] ! and store
- ldx [%o1+8], %o3 ! a block of 32 bytes
- add %o1, 32, %o1 ! increase src ptr by 32
- stx %o3, [%o0+8]
- ldx [%o1-16], %o4
- add %o0, 32, %o0 ! increase dst ptr by 32
- stx %o4, [%o0-16]
- ldx [%o1-8], %o3
- bgu,pt %ncc, .medl32 ! repeat if at least 32 bytes left
- stx %o3, [%o0-8]
-.medl31:
- addcc %o2, 16, %o2 ! adjust remaining count
- ble,pt %ncc, .medl15 ! skip if 15 or fewer bytes left
- nop !
- ldx [%o1], %o4 ! load and store 16 bytes
- add %o1, 16, %o1 ! increase src ptr by 16
- stx %o4, [%o0] !
- sub %o2, 16, %o2 ! decrease count by 16
- ldx [%o1-8], %o3 !
- add %o0, 16, %o0 ! increase dst ptr by 16
- stx %o3, [%o0-8]
-.medl15:
- addcc %o2, 15, %o2 ! restore count
- bz,pt %ncc, .medwexit ! exit if finished
- nop
- cmp %o2, 8
- blt,pt %ncc, .medw7 ! skip if 7 or fewer bytes left
- nop
- ldx [%o1], %o4 ! load 8 bytes
- add %o1, 8, %o1 ! increase src ptr by 8
- stx %o4, [%o0] ! and store 8 bytes
- subcc %o2, 8, %o2 ! decrease count by 8
- bz %ncc, .medwexit ! exit if finished
- add %o0, 8, %o0 ! increase dst ptr by 8
- ba .medw7
- nop
-
- .align 16
- nop
- nop
- nop
-unaligned_src_dst:
-
-.mediumsetup:
- prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read
-.mediumrejoin:
- rd %fprs, %o4 ! check for unused fp
-
- add %o1, 8, %o1 ! prepare to round SRC upward
-
- sethi %hi(0x1234567f), %o5 ! For GSR.MASK
- or %o5, 0x67f, %o5
- andcc %o4, FPRS_FEF, %o4 ! test FEF, fprs.du = fprs.dl = 0
- bz,a %ncc, 3f
- wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1
-3:
- cmp %o2, MEDIUM_MAX
- bmask %o5, %g0, %g0
-
- ! Compute o5 (number of bytes that need copying using the main loop).
- ! First, compute for the medium case.
- ! Then, if large case, o5 is replaced by count for block alignment.
- ! Be careful not to read past end of SRC
- ! Currently, o2 is the actual count remaining
- ! o3 is how much sooner we'll cross the alignment boundary
- ! in SRC compared to in DST
- !
- ! Examples: Let # denote bytes that should not be accessed
- ! Let x denote a byte already copied to align DST
- ! Let . and - denote bytes not yet copied
- ! Let | denote double alignment boundaries
- !
- ! DST: ######xx|........|--------|..###### o2 = 18
- ! o0
- !
- ! o3 = -3: SRC: ###xx...|.....---|-----..#|######## o5 = 8
- ! o1
- !
- ! o3 = 0: SRC: ######xx|........|--------|..###### o5 = 16-8 = 8
- ! o1
- !
- ! o3 = +1: SRC: #######x|x.......|.-------|-..##### o5 = 16-8 = 8
- ! o1
-
- mov %asi, %g5 ! save curr %asi
- wr %g0, ASI_CACHE_SPARING, %asi
-
- or %g0, -8, %o5
- alignaddr %o1, %g0, %o1 ! set GSR.ALIGN and align o1
-
- movrlz %o3, %g0, %o5 ! subtract 8 from o2+o3 only if o3>=0
- add %o5, %o2, %o5
- add %o5, %o3, %o5
-
- bleu %ncc, 4f
- andn %o5, 7, %o5 ! 8 byte aligned count
- neg %o0, %o5 ! 'large' case
- and %o5, BLOCK_SIZE-1, %o5 ! bytes till DST block aligned
-4:
- brgez,a %o3, .beginmedloop
- ldda [%o1-8]%asi, %d0
-
- add %o1, %o3, %o1 ! back up o1
-5:
- ldda [%o1]ASI_FL8_P, %d2
- inc %o1
- andcc %o1, 7, %g0
- bnz %ncc, 5b
- bshuffle %d0, %d2, %d0 ! shifts d0 left 1 byte and or's in d2
-
-.beginmedloop:
- tst %o5
- bz %ncc, .endmedloop
- sub %o2, %o5, %o2 ! update count for later
-
- ! Main loop to write out doubles. Note: o5 & 7 == 0
-
- ldd [%o1], %d2
- subcc %o5, 8, %o5 ! update local count
- bz,pn %ncc, 1f
- add %o1, 8, %o1 ! update SRC
-
-.medloop:
- faligndata %d0, %d2, %d4
- ldda [%o1]%asi, %d0
- subcc %o5, 8, %o5 ! update local count
- add %o1, 16, %o1 ! update SRC
- std %d4, [%o0]
- bz,pn %ncc, 2f
- faligndata %d2, %d0, %d6
- ldda [%o1 - 8]%asi, %d2
- subcc %o5, 8, %o5 ! update local count
- std %d6, [%o0 + 8]
- bnz,pt %ncc, .medloop
- add %o0, 16, %o0 ! update DST
-
-1:
- faligndata %d0, %d2, %d4
- fmovd %d2, %d0
- std %d4, [%o0]
- ba .endmedloop
- add %o0, 8, %o0
-
-2:
- std %d6, [%o0 + 8]
- sub %o1, 8, %o1
- add %o0, 16, %o0
-
-
-.endmedloop:
- ! Currently, o1 is pointing to the next double-aligned byte in SRC
- ! The 8 bytes starting at [o1-8] are available in d0
- ! At least one, and possibly all, of these need to be written.
-
- cmp %o2, BLOCK_SIZE
- bgu %ncc, .large ! otherwise, less than 16 bytes left
-
-#if 1
-
- /* This code will use partial stores. */
-
- mov %g0, %o5
- and %o3, 7, %o3 ! Number of bytes needed to completely
- ! fill %d0 with good (unwritten) data.
-
- subcc %o2, 8, %o2 ! update count (maybe too much)
- movl %ncc, %o2, %o5
- addcc %o3, %o5, %o5 ! extra bytes we can stuff into %d0
- sub %o3, %o5, %o3 ! update o3 (# bad bytes in %d0)
-
- bz %ncc, 2f
- alignaddr %o3, %g0, %g0 ! set GSR.ALIGN
-
-1:
- deccc %o5
- ldda [%o1]ASI_FL8_P, %d2
- inc %o1
- bgu %ncc, 1b
- bshuffle %d0, %d2, %d0 ! shifts d0 left 1 byte and or's in d2
-
-2:
- not %o3
- faligndata %d0, %d0, %d0 ! shift bytes to the left
- and %o3, 7, %o3 ! last byte to be stored in [%o0+%o3]
- edge8n %g0, %o3, %o5
- stda %d0, [%o0]%o5, ASI_PST8_P
- brlez %o2, .exit_memcpy
- add %o0, %o3, %o0 ! update DST to last stored byte
-3:
- inc %o0
- deccc %o2
- ldub [%o1], %o3
- stb %o3, [%o0]
- bgu %ncc, 3b
- inc %o1
-
-#else
-
- andcc %o3, 7, %o5 ! Number of bytes needed to completely
- ! fill %d0 with good (unwritten) data.
- bz %ncc, 2f
- sub %o5, 8, %o3 ! -(number of good bytes in %d0)
- cmp %o2, 8
- bl,a %ncc, 3f ! Not enough bytes to fill %d0
- add %o1, %o3, %o1 ! Back up %o1
-
-1:
- deccc %o5
- ldda [%o1]ASI_FL8_P, %d2
- inc %o1
- bgu %ncc, 1b
- bshuffle %d0, %d2, %d0 ! shifts d0 left 1 byte and or's in d2
-
-2:
- subcc %o2, 8, %o2
- std %d0, [%o0]
- bz %ncc, .exit_memcpy
- add %o0, 8, %o0
-3:
- ldub [%o1], %o3
- deccc %o2
- inc %o1
- stb %o3, [%o0]
- bgu %ncc, 3b
- inc %o0
-#endif
-
-.exit_memcpy:
- wr %o4, %g0, %fprs ! fprs = o4 restore fprs
- mov %g5, %asi ! restore %asi
- retl
- mov %g1, %o0
-
- .align ICACHE_LINE_SIZE
-.large:
- ! The following test for BSTORE_SIZE is used to decide whether
- ! to store data with a block store or with individual stores.
- ! The block store wins when the amount of data is so large
- ! that it is causes other application data to be moved out
- ! of the L1 or L2 cache.
- ! On a Panther, block store can lose more often because block
- ! store forces the stored data to be removed from the L3 cache.
- !
- sethi %hi(BSTORE_SIZE),%o5
- or %o5,%lo(BSTORE_SIZE),%o5
- cmp %o2, %o5
- bgu %ncc, .xlarge
-
- ! %o0 I/O DST is 64-byte aligned
- ! %o1 I/O 8-byte aligned (and we've set GSR.ALIGN)
- ! %d0 I/O already loaded with SRC data from [%o1-8]
- ! %o2 I/O count (number of bytes that need to be written)
- ! %o3 I Not written. If zero, then SRC is double aligned.
- ! %o4 I Not written. Holds fprs.
- ! %o5 O The number of doubles that remain to be written.
-
- ! Load the rest of the current block
- ! Recall that %o1 is further into SRC than %o0 is into DST
-
- prefetch [%o0 + (0 * BLOCK_SIZE)], #n_writes
- prefetch [%o0 + (1 * BLOCK_SIZE)], #n_writes
- prefetch [%o0 + (2 * BLOCK_SIZE)], #n_writes
- ldda [%o1]%asi, %d2
- prefetch [%o1 + (3 * BLOCK_SIZE)], #one_read
- ldda [%o1 + 0x8]%asi, %d4
- faligndata %d0, %d2, %d16
- ldda [%o1 + 0x10]%asi, %d6
- faligndata %d2, %d4, %d18
- ldda [%o1 + 0x18]%asi, %d8
- faligndata %d4, %d6, %d20
- ldda [%o1 + 0x20]%asi, %d10
- or %g0, -8, %o5 ! if %o3 >= 0, %o5 = -8
- prefetch [%o1 + (4 * BLOCK_SIZE)], #one_read
- faligndata %d6, %d8, %d22
- ldda [%o1 + 0x28]%asi, %d12
- movrlz %o3, %g0, %o5 ! if %o3 < 0, %o5 = 0 (needed lter)
- faligndata %d8, %d10, %d24
- ldda [%o1 + 0x30]%asi, %d14
- faligndata %d10, %d12, %d26
- ldda [%o1 + 0x38]%asi, %d0
- sub %o2, BLOCK_SIZE, %o2 ! update count
- prefetch [%o1 + (5 * BLOCK_SIZE)], #one_read
- add %o1, BLOCK_SIZE, %o1 ! update SRC
-
- ! Main loop. Write previous block. Load rest of current block.
- ! Some bytes will be loaded that won't yet be written.
-1:
- ldda [%o1]%asi, %d2
- faligndata %d12, %d14, %d28
- ldda [%o1 + 0x8]%asi, %d4
- faligndata %d14, %d0, %d30
- std %d16, [%o0]
- std %d18, [%o0+8]
- std %d20, [%o0+16]
- std %d22, [%o0+24]
- std %d24, [%o0+32]
- std %d26, [%o0+40]
- std %d28, [%o0+48]
- std %d30, [%o0+56]
- sub %o2, BLOCK_SIZE, %o2 ! update count
- prefetch [%o0 + (6 * BLOCK_SIZE)], #n_writes
- prefetch [%o0 + (3 * BLOCK_SIZE)], #n_writes
- add %o0, BLOCK_SIZE, %o0 ! update DST
- ldda [%o1 + 0x10]%asi, %d6
- faligndata %d0, %d2, %d16
- ldda [%o1 + 0x18]%asi, %d8
- faligndata %d2, %d4, %d18
- ldda [%o1 + 0x20]%asi, %d10
- faligndata %d4, %d6, %d20
- ldda [%o1 + 0x28]%asi, %d12
- faligndata %d6, %d8, %d22
- ldda [%o1 + 0x30]%asi, %d14
- faligndata %d8, %d10, %d24
- ldda [%o1 + 0x38]%asi, %d0
- faligndata %d10, %d12, %d26
- cmp %o2, BLOCK_SIZE + 8
- prefetch [%o1 + (5 * BLOCK_SIZE)], #one_read
- bgu,pt %ncc, 1b
- add %o1, BLOCK_SIZE, %o1 ! update SRC
- faligndata %d12, %d14, %d28
- faligndata %d14, %d0, %d30
- stda %d16, [%o0]ASI_BLK_P ! store 64 bytes, bypass cache
- cmp %o2, BLOCK_SIZE
- bne %ncc, 2f ! exactly 1 block remaining?
- add %o0, BLOCK_SIZE, %o0 ! update DST
- brz,a %o3, 3f ! is SRC double aligned?
- ldd [%o1], %d2
-
-2:
- add %o5, %o2, %o5 ! %o5 was already set to 0 or -8
- add %o5, %o3, %o5
-
- ba .beginmedloop
- andn %o5, 7, %o5 ! 8 byte aligned count
-
- ! This is when there is exactly 1 block remaining and SRC is aligned
-3:
- ! %d0 was loaded in the last iteration of the loop above, and
- ! %d2 was loaded in the branch delay slot that got us here.
- ldd [%o1 + 0x08], %d4
- ldd [%o1 + 0x10], %d6
- ldd [%o1 + 0x18], %d8
- ldd [%o1 + 0x20], %d10
- ldd [%o1 + 0x28], %d12
- ldd [%o1 + 0x30], %d14
- stda %d0, [%o0]ASI_BLK_P
-
- ba .exit_memcpy
- nop
-
-
- .align 16
- ! two nops here causes loop starting at 1f below to be
- ! on a cache line boundary, improving performance
- nop
- nop
-xlarge:
-.xlarge:
- /*
- set 4096, %l2
- subcc %o2, %l2, %g0
- bge %ncc, size_ge_4k
- nop
- */
- ! %o0 I/O DST is 64-byte aligned
- ! %o1 I/O 8-byte aligned (and we've set GSR.ALIGN)
- ! %d0 I/O already loaded with SRC data from [%o1-8]
- ! %o2 I/O count (number of bytes that need to be written)
- ! %o3 I Not written. If zero, then SRC is double aligned.
- ! %o4 I Not written. Holds fprs.
- ! %o5 O The number of doubles that remain to be written.
-
- ! Load the rest of the current block
- ! Recall that %o1 is further into SRC than %o0 is into DST
-
- ! prefetch [%o1 + (3 * BLOCK_SIZE)], #one_read
- ! executed in delay slot for branch to .xlarge
- prefetch [%o1 + (4 * BLOCK_SIZE)], #one_read
- prefetch [%o1 + (5 * BLOCK_SIZE)], #one_read
- ldda [%o1]%asi, %d2
- prefetch [%o1 + (6 * BLOCK_SIZE)], #one_read
- ldda [%o1 + 0x8]%asi, %d4
- faligndata %d0, %d2, %d16
- ldda [%o1 + 0x10]%asi, %d6
- faligndata %d2, %d4, %d18
- ldda [%o1 + 0x18]%asi, %d8
- faligndata %d4, %d6, %d20
- ldda [%o1 + 0x20]%asi, %d10
- or %g0, -8, %o5 ! if %o3 >= 0, %o5 = -8
- faligndata %d6, %d8, %d22
- ldda [%o1 + 0x28]%asi, %d12
- movrlz %o3, %g0, %o5 ! if %o3 < 0, %o5 = 0 (needed later)
- faligndata %d8, %d10, %d24
- ldda [%o1 + 0x30]%asi, %d14
- faligndata %d10, %d12, %d26
- ldda [%o1 + 0x38]%asi, %d0
- sub %o2, BLOCK_SIZE, %o2 ! update count
- prefetch [%o1 + (7 * BLOCK_SIZE)], #one_read
- add %o1, BLOCK_SIZE, %o1 ! update SRC
-
- ! This point is 32-byte aligned since 24 instructions appear since
- ! the previous alignment directive.
-
-
- ! Main loop. Write previous block. Load rest of current block.
- ! Some bytes will be loaded that won't yet be written.
-1:
- ldda [%o1]%asi, %d2
- faligndata %d12, %d14, %d28
- ldda [%o1 + 0x8]%asi, %d4
- faligndata %d14, %d0, %d30
- stda %d16, [%o0]ASI_BLK_P
- sub %o2, BLOCK_SIZE, %o2 ! update count
- ldda [%o1 + 0x10]%asi, %d6
- faligndata %d0, %d2, %d16
- ldda [%o1 + 0x18]%asi, %d8
- faligndata %d2, %d4, %d18
- ldda [%o1 + 0x20]%asi, %d10
- faligndata %d4, %d6, %d20
- ldda [%o1 + 0x28]%asi, %d12
- faligndata %d6, %d8, %d22
- ldda [%o1 + 0x30]%asi, %d14
- faligndata %d8, %d10, %d24
- ldda [%o1 + 0x38]%asi, %d0
- faligndata %d10, %d12, %d26
- ! offset of 8*BLK+8 bytes works best over range of (src-dst) mod 1K
- prefetch [%o1 + (8 * BLOCK_SIZE) + 8], #one_read
- add %o0, BLOCK_SIZE, %o0 ! update DST
- cmp %o2, BLOCK_SIZE + 8
- ! second prefetch important to correct for occasional dropped
- ! initial prefetches, 5*BLK works best over range of (src-dst) mod 1K
- ! strong prefetch prevents drops on Panther, but Jaguar and earlier
- ! US-III models treat strong prefetches as weak prefetchs
- ! to avoid regressions on customer hardware, we retain the prefetch
- prefetch [%o1 + (5 * BLOCK_SIZE)], #one_read
- bgu,pt %ncc, 1b
- add %o1, BLOCK_SIZE, %o1 ! update SRC
-
- faligndata %d12, %d14, %d28
- faligndata %d14, %d0, %d30
- stda %d16, [%o0]ASI_BLK_P ! store 64 bytes, bypass cache
- cmp %o2, BLOCK_SIZE
- bne %ncc, 2f ! exactly 1 block remaining?
- add %o0, BLOCK_SIZE, %o0 ! update DST
- brz,a %o3, 3f ! is SRC double aligned?
- ldd [%o1], %d2
-
-2:
- add %o5, %o2, %o5 ! %o5 was already set to 0 or -8
- add %o5, %o3, %o5
-
-
- ba .beginmedloop
- andn %o5, 7, %o5 ! 8 byte aligned count
-
-
- ! This is when there is exactly 1 block remaining and SRC is aligned
-3:
- ! %d0 was loaded in the last iteration of the loop above, and
- ! %d2 was loaded in the branch delay slot that got us here.
- ldd [%o1 + 0x08], %d4
- ldd [%o1 + 0x10], %d6
- ldd [%o1 + 0x18], %d8
- ldd [%o1 + 0x20], %d10
- ldd [%o1 + 0x28], %d12
- ldd [%o1 + 0x30], %d14
- stda %d0, [%o0]ASI_BLK_P
-
- ba .exit_memcpy
- nop
-
-copying_ge_512:
- mov %o0, %o5 ! save dst address for return value.
- ! both src and dst are aligned to 8 byte boundary.
- save %sp, -SA(STACK_OFFSET + SAVESIZE), %sp
- mov %i0, %o0
- mov %i1, %o1
- mov %i2, %o2
- mov %i3, %o3
- mov %i5, %o5
-#ifndef __sparcv9
- stx %g4, [%sp + STACK_OFFSET + g4_offset]
- stx %g5, [%sp + STACK_OFFSET + g5_offset]
-#endif
- rd %fprs, %g5 ! check for unused fp
- andcc %g5, FPRS_FEF, %g5 ! test FEF, fprs.du = fprs.dl = 0
- bz,a %ncc, 1f
- wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1
-1:
- !predfetch src buf
- sub %o1,1,%o3
- andn %o3,0x7f,%l1
- add %l1,128,%l1
- prefetch [%l1],2 !prefetch next 128b
- prefetch [%l1+64],2
- prefetch [%l1+(2*64)],2 !cont from above
- prefetch [%l1+(3*64)],2
- !predfetch dst buf
- sub %o5,1,%o3
- andn %o3,0x7f,%l1
- add %l1,128,%l1
- prefetch [%l1],2 !prefetch next 128b
- prefetch [%l1+64],2
- prefetch [%l1+(2*64)],2 !cont from above
- prefetch [%l1+(3*64)],2
-
- andcc %o5,0x7f,%o3 !o3=0 , means it is already 128 align
- brz,pn %o3,aligned_on_128
- sub %o3,128,%o3
-
- add %o2,%o3,%o2
-align_to_128:
- ldxa [%o1]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o1,8,%o1 ! increment src pointer
- stxa %o4,[%o5]ASI_CACHE_SPARING_PRIMARY
- addcc %o3,8,%o3
- bl,pt %ncc,align_to_128
- add %o5,8,%o5 ! increment dst pointer
-
-aligned_on_128:
- andcc %o5,0x1ff,%o3 !%o3=0 when it is 512 b aligned.
- brnz,pn %o3, 4f
- mov %o2,%l4 !l4=count from 512 align
- set 4096, %l2
- subcc %o2, %l2, %g0
- bge,pn %ncc, stingray_optimized_copy
- nop
-4:
-
- sub %o5,8,%l6 !should be in current 512 chunk
- andn %l6,0x1ff,%o3 !%o3=aligned 512b addr
- add %o3,0x200,%o3 !%o3=next aligned 512b addr to start
- ! stingray_optimized_copy
- sub %o3,%o5,%o3 !o3=how many byte in the current remaining chunk
- sub %o2,%o3,%l4 !l4=count from 512 align
- /*
- * if l4 is < 4096 do interleave_128_copy only.
- */
- set 4096, %l2
- subcc %l4, %l2, %g0
- bge,pn %ncc,6f
- nop
- mov %g0, %l4
- add %o5, %o2, %l1
- ba interleave_128_copy
- nop
-6:
- mov %o3, %o2
- subcc %o3,256,%g0 ! if it is > 256 bytes , could use the
- ! interleave_128_copy
- bl,pn %ncc,copy_word ! o.w use copy_word to finish the 512 byte
- ! alignment.
- !%o1=64 bytes data
- !%o5=next 8 byte addr to write
- !%o2=new count i.e how many bytes to write
- add %o5,%o2,%l1 !cal the last byte to write %l1
- ba interleave_128_copy
- nop
-
- .align 64
-interleave_128_copy:
- ! %l1 has the addr of the dest. buffer at or beyond which no write
- ! is to be done.
- ! %l4 has the number of bytes to zero using stingray_optimized_bzero
- !prefetch src
- !prefetch src
-
- add %o1, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o1, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o1, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o1, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
-
- !prefetch dst
-
- add %o5, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o5, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o5, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o5, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
-
- ldxa [%o1]ASI_CACHE_SPARING_PRIMARY, %o4
- stxa %o4,[%o5]ASI_CACHE_SPARING_PRIMARY !1st 64 byte line
- add %o1, 128, %o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, 128, %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY !3rd 64 byte line
- add %o1, (1 * 8), %o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (1 * 8), %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (1 * 8 + 128), %o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (1 * 8 + 128), %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (2 * 8),%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (2 * 8),%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (2 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (2 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (3 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (3 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (3 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (3 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (4 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (4 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (4 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (4 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (5 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (5 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (5 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (5 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (6 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (6 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (6 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (6 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (7 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (7 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (7 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (7 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (8 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (8 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (8 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (8 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (9 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (9 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (9 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (9 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (10 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (10 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (10 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (10 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (11 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (11 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (11 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (11 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (12 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (12 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (12 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (12 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (13 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (13 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (13 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (13 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (14 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (14 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (14 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (14 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (15 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (15 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, (15 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o5, (15 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o1, 256, %o1
- ! check if the next 256 byte copy will not exceed the number of
- ! bytes remaining to be copied.
- ! %l2 points to the dest buffer after copying 256 bytes more.
- ! %l1 points to dest. buffer at or beyond which no writes should be done.
- add %o5,512,%l2
-
- subcc %l1,%l2,%g0
- bge,pt %ncc,interleave_128_copy
- add %o5,256,%o5
-
-copy_word:
- and %o2,255,%o3
- and %o3,7,%o2
-
- ! Set the remaining doubles
- subcc %o3, 8, %o3 ! Can we store any doubles?
- bl,pn %ncc, 6f
- and %o2, 7, %o2 ! calc bytes left after doubles
-
- !prefetch src
-
- mov %o1, %o4
- prefetch [%o4], 2 !1st 64 byte line of next 256 byte block
- add %o1, 128, %o4
- prefetch [%o4], 2 !3rd 64 byte line of next 256 byte block
- add %o1, 64, %o4
- prefetch [%o4], 2 !2nd 64 byte line of next 256 byte block
- add %o1, 192, %o4
- prefetch [%o4], 2 !4th 64 byte line of next 256 byte block
-
- !prefetch dst
-
- mov %o5, %o4
- prefetch [%o4], 2 !1st 64 byte line of next 256 byte block
- add %o5, 128, %o4
- prefetch [%o4], 2 !3rd 64 byte line of next 256 byte block
- add %o5, 64, %o4
- prefetch [%o4], 2 !2nd 64 byte line of next 256 byte block
- add %o5, 192, %o4
- prefetch [%o4], 2 !4th 64 byte line of next 256 byte block
-
-5:
- ldxa [%o1]ASI_CACHE_SPARING_PRIMARY, %o4
- add %o1, 8, %o1
- stxa %o4, [%o5]ASI_CACHE_SPARING_PRIMARY
- subcc %o3, 8, %o3
- bge,pt %ncc, 5b
- add %o5, 8, %o5
-6:
- ! Set the remaining bytes
- brz %o2, can_we_do_stingray_optimized_copy
- nop
-
- ! Terminate the copy with a partial store.
- ! The data should be at d0
- ldxa [%o1]ASI_CACHE_SPARING_PRIMARY, %o4
- stx %o4, [%sp + STACK_OFFSET + scratch_offset]
- ldd [%sp + STACK_OFFSET + scratch_offset], %d0
-
- dec %o2 ! needed to get the mask right
- edge8n %g0, %o2, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
-can_we_do_stingray_optimized_copy:
- mov %l4, %o2
- brnz,pn %o2, stingray_optimized_copy
- nop
-
-exit:
- brnz %g5, 1f
- nop
- wr %g5, %g0, %fprs
-1:
-#ifndef __sparcv9
- ldx [%sp + STACK_OFFSET + g4_offset], %g4
- ldx [%sp + STACK_OFFSET + g5_offset], %g5
-#endif
- ret ! %o0 was preserved
- restore
-
-
-stingray_optimized_copy:
-!%o5 = next memory addr which is 512 b align
-!%l4 = remaining byte from 512 align.
-
- add %o5, %l4, %o2
-
- prefetch [%o1+0],2
- prefetch [%o1+(64*1)],2
- prefetch [%o1+(64*2)],2
- prefetch [%o1+(64*3)],2
- prefetch [%o1+(64*4)],2
- prefetch [%o1+(64*5)],2
- prefetch [%o1+(64*6)],2
- prefetch [%o1+(64*7)],2
- prefetch [%o1+(64*8)],2
- prefetch [%o1+(64*9)],2
- prefetch [%o1+(64*10)],2
- prefetch [%o1+(64*11)],2
- prefetch [%o1+(64*12)],2
- prefetch [%o1+(64*13)],2
- prefetch [%o1+(64*14)],2
- prefetch [%o1+(64*15)],2
-
- prefetch [%o5+0],2
- prefetch [%o5+(64*1)],2
- prefetch [%o5+(64*2)],2
- prefetch [%o5+(64*3)],2
- prefetch [%o5+(64*4)],2
- prefetch [%o5+(64*5)],2
- prefetch [%o5+(64*6)],2
- prefetch [%o5+(64*7)],2
- prefetch [%o5+(64*8)],2
- prefetch [%o5+(64*9)],2
- prefetch [%o5+(64*10)],2
- prefetch [%o5+(64*11)],2
- prefetch [%o5+(64*12)],2
- prefetch [%o5+(64*13)],2
- prefetch [%o5+(64*14)],2
- prefetch [%o5+(64*15)],2
-
- ba myloop2
- srl %l4, 12, %l4
-
- ! Local register usage:
- !
- ! %l1 address at short distance ahead of current %o1 for prefetching
- ! into L1 cache.
- ! %l2 address at far ahead of current %o1 for prefetching into L2 cache.
- ! %l3 save %o5 at start of inner loop.
- ! %l4 Number of 4k blocks to copy
- ! %g1 save %o1 at start of inner loop.
- ! %l5 iteration counter to make buddy loop execute 2 times.
- ! %l6 iteration counter to make inner loop execute 32 times.
- ! %l7 address at far ahead of current %o5 for prefetching destination
- ! into L2 cache.
-
-.align 64
-myloop2:
- set 2,%l5 ! %l5 is the loop count for the buddy loop, for 2 buddy lines.
- add %o5, 0, %l3
- add %o1, 0, %g1
-buddyloop:
- set PF_FAR, %g4 ! Prefetch far ahead. CHANGE FAR PREFETCH HERE.
- add %o1, %g4, %l2 ! For prefetching far ahead, set %l2 far ahead
- ! of %o1
- add %o1, PF_NEAR, %l1 ! For prefetching into L1 D$, set %l1 a
- ! little ahead of %o1
- add %o5, %g4, %l7 ! For prefetching far ahead, set %l7 far ahead
- ! of %o5
-
- add %l2, %g4, %g4 ! %g4 is now double far ahead of the source
- ! address in %o1.
- prefetch [%g4+%g0],2 ! Prefetch ahead by several pages to get TLB
- ! entry in advance.
- set 2*PF_FAR, %g4 ! Prefetch double far ahead. SET DOUBLE FAR
- ! PREFETCH HERE.
- add %o5, %g4, %g4 ! %g4 is now double far ahead of the dest
- ! address in %o5.
- prefetch [%g4+%g0],2 ! Prefetch ahead by 2 pages to get TLB entry
- ! in advance.
-
- set 4,%l6 ! %l6 = loop count for the inner loop,
- ! for 4 x 8 = 32 lines.
- set 0, %g4
-
- ! Each iteration of the inner loop below copies 8 sequential lines.
- ! This loop is iterated 4 times, to move a total of 32 lines,
- ! all of which have the same value of PA[9], so we increment the base
- ! address by 1024 bytes in each iteration, which varies PA[10]. */
-innerloop:
- /* ---- copy line 1 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 2 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 3 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 4 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 5 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 6 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 7 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
- add %g4, 64, %g4
- add %o5, 64, %o5
- add %o1, 64, %o1 /* increment %o1 for the next source line. */
-
- /* ---- copy line 8 of 8. ---- */
- prefetch [%l2+%g4],2
- prefetch [%l7+%g4],2
- prefetch [%l1+%g4],1
-
- ldd [%o1],%d0
- ldd [%o1+8],%d2
- ldd [%o1+16],%d4
- ldd [%o1+24],%d6
- ldd [%o1+32],%d8
- ldd [%o1+40],%d10
- ldd [%o1+48],%d12
- ldd [%o1+56],%d14
- stda %d0,[%o5]ASI_BLK_P
-
- subcc %l6,1,%l6 /* Decrement the inner loop counter. */
-
- ! Now increment by 64 + 512 so we don't toggle PA[9]
- add %g4, 576, %g4
- add %o5, 576, %o5
-
- bg,pt %icc,innerloop
- add %o1, 576, %o1 ! increment %o1 for the next source line.
- ! END OF INNER LOOP
-
-
- subcc %l5,1,%l5
- add %l3, 512, %o5 ! increment %o5 to first buddy line of dest.
- bg,pt %icc,buddyloop
- add %g1, 512 ,%o1 ! Set %o1 to the first of the odd buddy lines.
-
- subcc %l4, 1, %l4
- add %o5, 3584, %o5 ! Advance both base addresses to 4k above where
- ! they started.
- add %o1, 3584, %o1 ! They were already incremented by 512,
- ! so just add 3584.
-
- bg,pt %icc,myloop2
- nop
-
- /****larryalg_end_here*************/
-
- sub %o2,%o5,%o2 !how many byte left
- brz,pn %o2,complete_write
- mov %g0,%l4
- add %o5,%o2,%l1 !cal the last byte to write %l1
- subcc %o2,256,%g0
- bge,pt %ncc,interleave_128_copy
- mov %g0,%l4
-
- ba copy_word
- nop
-
-
-complete_write:
- ba exit
- nop
-
-
-
- SET_SIZE(memcpy)
- SET_SIZE(__align_cpy_1)
diff --git a/usr/src/lib/libc/sparc_hwcap1/common/gen/memset.s b/usr/src/lib/libc/sparc_hwcap1/common/gen/memset.s
deleted file mode 100644
index f9e0f62ac9..0000000000
--- a/usr/src/lib/libc/sparc_hwcap1/common/gen/memset.s
+++ /dev/null
@@ -1,767 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-
- .file "memset.s"
-/*
- * char *memset(sp, c, n)
- *
- * Set an array of n chars starting at sp to the character c.
- * Return sp.
- *
- * Fast assembler language version of the following C-program for memset
- * which represents the `standard' for the C-library.
- *
- * void *
- * memset(void *sp1, int c, size_t n)
- * {
- * if (n != 0) {
- * char *sp = sp1;
- * do {
- * *sp++ = (char)c;
- * } while (--n != 0);
- * }
- * return (sp1);
- * }
- */
-
-#include <sys/asm_linkage.h>
-#include <sys/sun4asi.h>
-
- ANSI_PRAGMA_WEAK(memset,function)
-
-#define SAVESIZE (8 * 1)
-#ifdef __sparcv9
-#define STACK_OFFSET (STACK_BIAS + 0)
-#else
-#define STACK_OFFSET (STACK_BIAS + 0 + 0)
-#endif
-#define scratch_offset 0
-
-#define ASI_CACHE_SPARING_PRIMARY 0xf4
-#define ALIGN8(X) (((X) + 7) & ~7)
-#define ICACHE_LINE_SIZE 64
-#define FPRS_FEF 0x4
-#define PF_FAR 2048
-
- .section ".text"
- .align ICACHE_LINE_SIZE
-
- /*
- * Optimizations done:
- *
- * No stores in delay slot of branch instructions.
- * conditional stores where possible
- * prefetch before doing stxa
- * Bank interleaved writing.
- */
-
- ENTRY(memset)
- add %sp, -SA(STACK_OFFSET + SAVESIZE), %sp
- mov %o0, %o5 ! copy sp1 before using it
- /*
- * If 0 bytes to xfer return
- */
- brnz %o2, continue
- nop
- retl
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
-continue:
- /*
- * If the count is multiple of 8 and buffer is aligned to 8
- * we don't have to look at fprs
- */
- or %o5, %o2, %o3
- and %o3, 7, %o3
- brnz %o3, check_fprs
- mov 4, %g1
- prefetch [%o5],2
- ba skip_rd_fprs
- nop
-
-check_fprs:
- rd %fprs, %g1 ! g1 = fprs
-skip_rd_fprs:
- prefetch [%o5],2
- andcc %g1, 0x4, %g1 ! fprs.du = fprs.dl = 0
- bnz %ncc, 1f ! Is fprs.fef == 1
- nop
- wr %g0, FPRS_FEF, %fprs ! fprs.fef = 1
-1:
- and %o1, 0xff, %o1 ! o1 is (char)c
- sll %o1, 8, %o3
- or %o1, %o3, %o1 ! now o1 has 2 bytes of c
- sll %o1, 16, %o3
- or %o1, %o3, %o1 ! now o1 has 4 bytes of c
- sllx %o1, 32, %o3
- or %o1, %o3, %o1 ! now o1 has 8 bytes of c
- stx %o1, [%sp + STACK_OFFSET + scratch_offset]
- ldd [%sp + STACK_OFFSET + scratch_offset], %d0
- cmp %o2, 8
- bge,pt %ncc, xfer_8_or_more
- mov %o0, %o5
- /*
- * Do a partial store of %o2 bytes
- */
- andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound
- brz,pt %o3, aligned_on_8
- sub %o5, %o3, %o5 ! align the destination buffer.
- mov %o3, %o1
- mov 8, %o4
- sub %o4, %o3, %o3
- cmp %o3, %o2
- bg,a,pn %ncc, 1f
- mov %o2, %o3
-1:
- ! %o3 has the bytes to be written in partial store.
- sub %o2, %o3, %o2
- dec %o3
- prefetch [%o5],2
- edge8n %g0, %o3, %o4
- srl %o4, %o1, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
- brz %o2, simple_ret
- add %o5, 8, %o5
-aligned_on_8:
- prefetch [%o5],2
- dec %o2 ! needed to get the mask right
- edge8n %g0, %o2, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
- brnz %g1, 1f ! was fprs.fef == 1
- nop
- wr %g1, %g0, %fprs ! fprs = g1 restore fprs
-1:
- retl
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
-
-xfer_8_or_more:
- andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound
- brz,pt %o3, blkchk
- sub %o5, %o3, %o5 ! align the destination buffer.
- sub %o3, 8, %o3 ! -(bytes till double aligned)
- add %o2, %o3, %o2 ! update o2 with new count
- xor %o3, 0xff, %o3
- and %o3, 7, %o3
- prefetch [%o5],2
- edge8ln %g0, %o3, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
- add %o5, 8, %o5
-
-
- ! Now sp1 is double aligned (sp1 is found in %o5)
-blkchk:
- cmp %o2, 767 ! if large count use Block ld/st
- bg,pt %ncc,blkwr
- nop
-
-
- and %o2, 24, %o3 ! o3 is {0, 8, 16, 24}
-
- brz %o3, skip_dw_loop
- nop
-
-1: subcc %o3, 8, %o3 ! double-word loop
- stx %o1, [%o5]
- bgu,pt %ncc, 1b
- add %o5, 8, %o5
-skip_dw_loop:
- andncc %o2, 31, %o4 ! o4 has 32 byte aligned count
- brz,pn %o4, 3f
- nop
- ba loop_32byte
- nop
-
- .align ICACHE_LINE_SIZE
-
-loop_32byte:
- subcc %o4, 32, %o4 ! main loop, 32 bytes per iteration
- stx %o1, [%o5]
- stx %o1, [%o5 + 8]
- stx %o1, [%o5 + 16]
- stx %o1, [%o5 + 24]
- bne,pt %ncc, loop_32byte
- add %o5, 32, %o5
-3:
- and %o2, 7, %o2 ! o2 has the remaining bytes (<8)
- brz %o2, skip_partial_copy
- nop
-
- ! Terminate the copy with a partial store.
- ! The data should be at d0
- prefetch [%o5],2
- dec %o2 ! needed to get the mask right
- edge8n %g0, %o2, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
-
-skip_partial_copy:
-simple_ret:
- brz,a %g1, 1f ! was fprs.fef == 0
- wr %g1, %g0, %fprs ! fprs = g1 restore fprs
-1:
- retl
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
-
-blkwr:
- sub %o5,1,%o3
- andn %o3,0x7f,%o4
- add %o4,128,%o4
- prefetch [%o4],2 !prefetch next 128b
- prefetch [%o4+64],2
- prefetch [%o4+(2*64)],2 !cont from above
- prefetch [%o4+(3*64)],2
-
- andcc %o5,0x7f,%o3 !o3=0 , means it is already 128 align
- brz,pn %o3,alreadyalign128
- sub %o3,128,%o3
-
- add %o2,%o3,%o2
-align128:
- stxa %o1,[%o5]ASI_CACHE_SPARING_PRIMARY
- addcc %o3,8,%o3
- bl,pt %ncc,align128
- add %o5,8,%o5
-
-
-
-alreadyalign128:
- andcc %o5,0x1ff,%o3 !%o3=0 when it is 512 b aligned.
- brnz,pn %o3, 4f
- mov %o2,%g5 !g5=count from 512 align
- set 4096, %o4
- subcc %o2, %o4, %g0
- bge,pn %ncc, larry_alg
- nop
-4:
-
- sub %o5,8,%o4 !should be in current 512 chunk
- andn %o4,0x1ff,%o3 !%o3=aligned 512b addr
- add %o3,0x200,%o3 !%o3=next aligned 512b addr which start larry process
- sub %o3,%o5,%o3 !o3=how many byte in the current remaining chunk
- sub %o2,%o3,%g5 !g5=count from 512 align
- /*
- * if g5 is < 4096 do start_128 only.
- */
- set 4096, %o4
- subcc %g5, %o4, %g0
- bge,pn %ncc,6f
- nop
- mov %g0, %g5
- add %o5, %o2, %o4
- ba start_128
- nop
-6:
- mov %o3, %o2
- subcc %o3,256,%g0 !if it is > 256 bytes , could use the st-interleave alg to wr
- bl,pn %ncc,storeword !o.w use storeword to finish the 512 byte alignment.
- !%o1=64 bytes data
- !%o5=next 8 byte addr to write
- !%o2=new count i.e how many bytes to write
- add %o5,%o2,%o4 !cal the last byte to write %o4
- ba start_128
- nop
-
- .align 64
-start_128:
- add %o5, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o5, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o5, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o5, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
- mov %o5, %o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !1st 64 byte line
- add %o5,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !3rd 64 byte line
- add %o5,8,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(2 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128 ,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(3 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(4 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(5 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(6 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(7 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(8 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(9 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(10 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(11 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(12 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(13 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(14 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(15 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,512,%o3 !%o3=final byte of next 256 byte, to check if more 256 byte block ahead
- subcc %o4,%o3,%g0 !%o4=final byte location;%o3=final byte of next 256 byte block
- bge,pt %ncc,start_128 !branch taken means next 256 byte block is still within the limit.
- add %o5,256,%o5
-
-!need to connect the rest of the program
-storeword:
- and %o2,255,%o3
- and %o3,7,%o2
-
- ! Set the remaining doubles
- subcc %o3, 8, %o3 ! Can we store any doubles?
- bl,pn %ncc, 6f
- and %o2, 7, %o2 ! calc bytes left after doubles
-
-5:
- stxa %o1, [%o5]ASI_CACHE_SPARING_PRIMARY
- subcc %o3, 8, %o3
- bge,pt %ncc, 5b
- add %o5, 8, %o5
-6:
- ! Set the remaining bytes
- brz %o2, check_larry_alg ! safe to check all 64-bits
-
- ! Terminate the copy with a partial store.
- ! The data should be at d0
- dec %o2 ! needed to get the mask right
- edge8n %g0, %o2, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
-check_larry_alg:
- mov %g5, %o2
- brnz,pn %o2, larry_alg
- nop
-
-.exit:
- brz,a %g1, 1f ! was fprs.fef == 0
- wr %g1, %g0, %fprs ! fprs = g1 restore fprs
-1:
- retl ! %o0 was preserved
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
-
-larry_alg:
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
- save %sp, -SA(MINFRAME), %sp
- mov %i0, %o0
- mov %i1, %o1
- mov %i2, %o2
- mov %i3, %o3
- mov %i5, %o5
-!%o5 = next memory addr which is 512 b align
-!%g5 = remaining byte from 512 align.
-init:
- set 4096,%g6
-
- prefetch [%o5+0],2
- prefetch [%o5+(64*1)],2
- prefetch [%o5+(64*2)],2
- prefetch [%o5+(64*3)],2
- prefetch [%o5+(64*4)],2
- prefetch [%o5+(64*5)],2
- prefetch [%o5+(64*6)],2
- prefetch [%o5+(64*7)],2
- prefetch [%o5+(64*8)],2
- prefetch [%o5+(64*9)],2
- prefetch [%o5+(64*10)],2
- prefetch [%o5+(64*11)],2
- prefetch [%o5+(64*12)],2
- prefetch [%o5+(64*13)],2
- prefetch [%o5+(64*14)],2
- prefetch [%o5+(64*15)],2
- ba myloop2
- add %o5,%g5,%g5
- /* Local register usage:
- %l3 save %o5 at start of inner loop.
- %l5 iteration counter to make buddy loop execute 2 times.
- %l6 iteration counter to make inner loop execute 32 times.
- %l7 address at far ahead of current %o5 for prefetching destination into L2 cache.
- */
-
- .align 64
-myloop2:
- /* Section 1 */
- set 2,%l5 /* %l5 is the loop count for the buddy loop, for 2 buddy lines. */
- add %o5, 0, %l3
-buddyloop:
- set PF_FAR, %l4 /* Prefetch far ahead. CHANGE FAR PREFETCH HERE. <<==== */
- add %o5, %l4, %l7 /* For prefetching far ahead, set %l7 far ahead of %o5 */
-
- set 2*PF_FAR, %l4 /* Prefetch double far ahead. SET DOUBLE FAR PREFETCH HERE. <<==== */
- add %o5, %l4, %l4 /* %l4 is now double far ahead of the dest address in %o5. */
- prefetch [%l4+%g0],2 /* Prefetch ahead by 2 pages to get TLB entry in advance. */
-
- set 4,%l6 /* %l6 = loop count for the inner loop, for 4 x 8 = 32 lines. */
- set 0, %l4
-
-
-/* Each iteration of the inner loop below writes 8 sequential lines. This loop is iterated 4 times,
- to move a total of 32 lines, all of which have the same value of PA[9], so we increment the base
- address by 1024 bytes in each iteration, which varies PA[10]. */
-innerloop:
- add %o5, PF_FAR, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
-
- mov %o5, %o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !1st 64 byte line
- add %o5,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !3rd 64 byte line
- add %o5,8,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(2 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128 ,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(3 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(4 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(5 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(6 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(7 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(8 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(9 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(10 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(11 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(12 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(13 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(14 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(15 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
-
- add %o5,256,%o5
-
- mov %o5, %o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !1st 64 byte line
- add %o5,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !3rd 64 byte line
- add %o5,8,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(2 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128 ,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(3 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(4 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(5 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(6 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(7 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(8 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(9 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(10 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(11 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(12 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(13 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(14 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(15 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
-
- subcc %l6,1,%l6 /* Decrement the inner loop counter. */
-
- /* -------- Now increment by 256 + 512 so we don't toggle PA[9] -------- */
- add %o5, 768, %o5
-
- bg,pt %ncc,innerloop
- nop
-/* ------------------------ END OF INNER LOOP -------------------------- */
-
- subcc %l5,1,%l5
- add %l3, 512, %o5 /* increment %o5 to first buddy line of dest. */
- bg,pt %ncc,buddyloop
- nop
- add %o5, 3584, %o5 /* Advance both base addresses to 4k above where they started. */
- !%o5=next 4096 block.
- add %o5,%g6,%i5
- subcc %g5,%i5,%g0
- bge,pt %ncc,myloop2
- nop
-
-
- /****larryalg_end_here*************/
-
- sub %g5,%o5,%o2 !how many byte left
- brz,pn %o2,complete_write
- mov %g0,%g5
- add %o5,%o2,%o4 !cal the last byte to write %o4
- subcc %o2,256,%g0
- bge,pt %ncc,memset_128
- mov %g0,%g5
-
- ba memset_storeword
- nop
-
-
-complete_write:
- brz,a %g1, 1f ! was fprs.fef == 0
- wr %g1, %g0, %fprs ! fprs = g1 restore fprs
-1:
- ret ! %o0 was preserved
- restore
-
- .align 64
-memset_128:
- add %o5, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o5, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o5, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o5, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
- mov %o5, %o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !1st 64 byte line
- add %o5,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY !3rd 64 byte line
- add %o5,8,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(2 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128 ,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(3 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(4 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(5 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(6 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(7 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(8 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(9 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(10 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(11 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(12 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(13 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(14 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,(15 * 8),%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
- add %o5,512,%l4 !%l4=final byte of next 256 byte, to check if more 256 byte block ahead
- add %o3,128,%o3
- stxa %o1,[%o3]ASI_CACHE_SPARING_PRIMARY
-!this branch condition is not needed if we are handling bytes before 4096b
-!because we will only issue once, so %l6 is an invalid data
-!the branch is really for handling bytes after 4096b, there could be
-!multiple of 256 byte block to work on.
-
- subcc %o4,%l4,%g0 !%o4=final byte location;%l4=final byte of next 256 byte block
- bge,pt %ncc,memset_128 !branch taken means next 256 byte block is still within the limit.
- add %o5,256,%o5
-
-!need to connect the rest of the program
-memset_storeword:
- and %o2,255,%o3
- and %o3,7,%o2
-
- ! Set the remaining doubles
- subcc %o3, 8, %o3 ! Can we store any doubles?
- bl,pn %ncc, 6f
- and %o2, 7, %o2 ! calc bytes left after doubles
-
-5:
- stxa %o1, [%o5]ASI_CACHE_SPARING_PRIMARY
- subcc %o3, 8, %o3
- bge,pt %ncc, 5b
- add %o5, 8, %o5
-6:
- ! Set the remaining bytes
- brz %o2, complete_write ! safe to check all 64-bits
-
- ! Terminate the copy with a partial store.
- ! The data should be at d0
- dec %o2 ! needed to get the mask right
- edge8n %g0, %o2, %o4
- stda %d0, [%o5]%o4, ASI_PST8_P
-
- brz,a %g1, 1f ! was fprs.fef == 0
- wr %g1, %g0, %fprs ! fprs = g1 restore fprs
-1:
- ret ! %o0 was preserved
- restore
-
-
- SET_SIZE(memset)
diff --git a/usr/src/lib/libc/sparc_hwcap1/common/gen/misc.s b/usr/src/lib/libc/sparc_hwcap1/common/gen/misc.s
deleted file mode 100644
index c2b4aa4c29..0000000000
--- a/usr/src/lib/libc/sparc_hwcap1/common/gen/misc.s
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/asm_linkage.h>
-
- ENTRY(_rock_pause)
- membar #Halt
- retl
- nop
- SET_SIZE(_rock_pause)
diff --git a/usr/src/lib/libc/sparc_hwcap1/common/gen/strcpy.s b/usr/src/lib/libc/sparc_hwcap1/common/gen/strcpy.s
deleted file mode 100644
index a9861b1a86..0000000000
--- a/usr/src/lib/libc/sparc_hwcap1/common/gen/strcpy.s
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
- .file "strcpy.s"
-
-/*
- * strcpy(s1, s2)
- *
- * Copy string s2 to s1. s1 must be large enough. Return s1.
- *
- * Fast assembler language version of the following C-program strcpy
- * which represents the `standard' for the C-library.
- *
- * char *
- * strcpy(s1, s2)
- * register char *s1;
- * register const char *s2;
- * {
- * char *os1 = s1;
- *
- * while(*s1++ = *s2++)
- * ;
- * return(os1);
- * }
- *
- */
-
-#include <sys/asm_linkage.h>
-
- ! This implementation of strcpy works by first checking the
- ! source alignment and copying byte, half byte, or word
- ! quantities until the source ptr is aligned at an extended
- ! word boundary. Once this has occurred, the string is copied,
- ! checking for zero bytes, depending upon its dst ptr alignment.
- ! (methods for xword, word, half-word, and byte copies are present)
-
-#ifdef __sparcv9
-#define SAVESIZE (8 * 3)
-#define STACK_OFFSET (STACK_BIAS + MINFRAME)
-#else
-#define SAVESIZE (8 * 5)
-#define STACK_OFFSET (STACK_BIAS + MINFRAME + 4)
-#endif
-
-#define LABEL_ADDRESS(label, reg) \
- .pushlocals ;\
-0: rd %pc, reg ;\
- add reg, (label) - 0b, reg ;\
- .poplocals
-
-offset_table:
- .word .storexword - offset_table ! Offset 0 => xword aligned
- .word .storebyte1241 - offset_table ! Offset 1 or 5
- .word .storehalfword - offset_table ! Offset 2 or 6
- .word .storebyte1421 - offset_table ! Offset 3 or 7
- .word .storeword - offset_table ! Offset 4
-
- .align 64
-#ifdef __sparcv9
- .skip 20
-#else
- .skip 12
-#endif
-
- ENTRY(strcpy)
- add %sp, -SA(STACK_OFFSET + SAVESIZE), %sp
-#ifndef __sparcv9
- stx %g4, [%sp + STACK_OFFSET + 24]
- stx %g5, [%sp + STACK_OFFSET + 32]
-#endif
- sethi %hi(0x01010101), %o4 ! 0x01010000
- sub %o1, %o0, %o3 ! src - dst
- or %o4, %lo(0x01010101), %o4 ! 0x01010101
- andcc %o1, 7, %g5 ! dword aligned ?
- sllx %o4, 32, %o5 ! 0x01010101 << 32
- mov %o0, %o2 ! save dst
- or %o4, %o5, %o4 ! 0x0101010101010101
-
- bz,pt %ncc, .srcaligned ! yup
- sllx %o4, 7, %o5 ! 0x8080808080808080
-
- sub %g0, %g5, %g4 ! count = -off
- ldx [%o1 + %g4], %o1 ! val = *(addr + -off)
- mov -1, %g1 ! mask = -1
- sllx %g5, 3, %g4 ! shift = off * 8
- srlx %g1, %g4, %g1 ! -1 >> ((addr & 7) * 8)
- orn %o1, %g1, %o1 ! val |= ~mask
-
- andn %o5, %o1, %g4 ! ~val & 0x80
- sub %o1, %o4, %g1 ! val - 0x01
- andcc %g4, %g1, %g4 ! ~val & 0x80 & (val - 0x01)
-
- sllx %g5, 3, %g4
- add %o2, 8, %o2 ! .zerobyte expects address = address + 8
- bnz,a,pn %xcc, .zerobyte ! Zero byte in the first xword
- sllx %o1, %g4, %o1 ! and data to be left justified
-
- sub %o2, 8, %o2
- mov 8, %g4
- sub %g4, %g5, %g1 ! Bytes to be written
- sub %g1, 1, %g4
-
-1: stub %o1, [%o2 + %g4]
- dec %g4
- brgez,pt %g4, 1b
- srlx %o1, 8, %o1
-
- add %o2, %g1, %o2 ! Move ptr by #bytes written
-
-.srcaligned:
- !! Check if the first dword contains zero after src is aligned
- ldx [%o2 + %o3], %o1 ! x = src[]
- andn %o5, %o1, %g1 ! ~x & 0x8080808080808080
- sub %o1, %o4, %g4 ! x - 0x0101010101010101
- andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
- bnz,a,pn %xcc, .zerobyte ! x has zero byte, handle end cases
- add %o2, 8, %o2 ! src += 8, dst += 8
-
- !! Determine the destination offset and branch
- !! to appropriate location
- and %o2, 3, %g4
- and %o2, 4, %g1
- or %g1, %g4, %g1
- movrnz %g4, 0, %g1
- movrnz %g1, 4, %g4
-
- !! %g4 contains the index of the jump address
- !! Load the address from the table.
- LABEL_ADDRESS(offset_table, %g1)
- sllx %g4, 2, %g4
- lduw [%g1 + %g4], %g4
- jmp %g1 + %g4
- add %o2, 8, %o2 ! src += 8, dst += 8
-
-.storexword:
- stx %o1, [%o2 - 8] ! store word to dst (address pre-incremented)
-
-1:
- ldx [%o2 + %o3], %o1 ! src dword
- add %o2, 8, %o2 ! src += 8, dst += 8
- andn %o5, %o1, %g1 ! ~dword & 0x8080808080808080
- sub %o1, %o4, %g4 ! dword - 0x0101010101010101
- andcc %g4, %g1, %g0 ! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080)
- bz,a,pt %xcc, 1b ! no zero byte if magic expression == 0
- stx %o1, [%o2 - 8] ! store word to dst (address pre-incremented)
-
- ba,a .zerobyte
-
-.storebyte1421:
- !! Offset 3 or 7
- srlx %o1, 56, %g1 ! %g1<7:0> = first byte; word aligned now
- stb %g1, [%o2 - 8] ! store first byte
- srlx %o1, 24, %g1 ! %g1<31:0> = bytes 2, 3, 4, 5
- stw %g1, [%o2 - 7] ! store bytes 2, 3, 4, 5
- srlx %o1, 8, %g1 ! %g1<15:0> = bytes 6, 7
- sth %g1, [%o2 - 3] ! store bytes 6, 7
-
- stx %l0, [%sp + STACK_OFFSET + 0]
- and %o2, 7, %g1
- stx %l1, [%sp + STACK_OFFSET + 8]
- cmp %g1, 3
- stx %l2, [%sp + STACK_OFFSET + 16]
-
- move %ncc, 40, %l0
- move %ncc, 24, %l1
- move %ncc, -11, %l2
-
- movne %ncc, 8, %l0
- movne %ncc, 56, %l1
- movne %ncc, -15, %l2
-
- ba .dstaligned
- mov %o1, %g5
-
-.storebyte1241:
- !! Offset 1 or 5
- srlx %o1, 56, %g1 ! %g1<7:0> = first byte; word aligned now
- stb %g1, [%o2 - 8] ! store first byte
- srlx %o1, 40, %g1 ! %g1<15:0> = bytes 2, 3
- sth %g1, [%o2 - 7] ! store bytes 2, 3
- srlx %o1, 8, %g1 ! %g1<31:0> = bytes 4, 5, 6, 7
- stw %g1, [%o2 - 5] ! store bytes 4, 5, 6, 7
-
- stx %l0, [%sp + STACK_OFFSET + 0]
- and %o2, 7, %g1
- stx %l1, [%sp + STACK_OFFSET + 8]
- cmp %g1, 1
- stx %l2, [%sp + STACK_OFFSET + 16]
-
- move %ncc, 56, %l0
- move %ncc, 8, %l1
- move %ncc, -9, %l2
-
- movne %ncc, 24, %l0
- movne %ncc, 40, %l1
- movne %ncc, -13, %l2
-
- ba .dstaligned
- mov %o1, %g5
-
-.storehalfword:
- srlx %o1, 48, %g1 ! get first and second byte
- sth %g1, [%o2 - 8] ! store first and second byte; word aligned now
- srlx %o1, 16, %g1 ! %g1<31:0> = bytes 3, 4, 5, 6
- stw %g1, [%o2 - 6] ! store bytes 3, 4, 5, 6
-
- stx %l0, [%sp + STACK_OFFSET + 0]
- and %o2, 7, %g1
- stx %l1, [%sp + STACK_OFFSET + 8]
- cmp %g1, 2
- stx %l2, [%sp + STACK_OFFSET + 16]
-
- move %ncc, 48, %l0
- move %ncc, 16, %l1
- move %ncc, -10, %l2
-
- movne %ncc, 16, %l0
- movne %ncc, 48, %l1
- movne %ncc, -14, %l2
-
- ba .dstaligned
- mov %o1, %g5
-
-.storeword:
- srlx %o1, 32, %g1 ! get bytes 1,2,3,4
- stw %g1, [%o2 - 8] ! store bytes 1,2,3,4 (address is pre-incremented)
-
- stx %l0, [%sp + STACK_OFFSET + 0]
- mov 32, %l0 ! Num of bits to be shifted left
- stx %l1, [%sp + STACK_OFFSET + 8]
- mov 32, %l1 ! Num of bits to be shifted right
- stx %l2, [%sp + STACK_OFFSET + 16]
- mov -12, %l2 ! -offset
- mov %o1, %g5
-
- nop ! Do not delete. Used for alignment.
-.dstaligned:
- ldx [%o2 + %o3], %o1 ! x = src[]
- add %o2, 8, %o2 ! src += 8, dst += 8
- andn %o5, %o1, %g1 ! ~x & 0x8080808080808080
- sub %o1, %o4, %g4 ! x - 0x0101010101010101
- andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
- bnz,a,pn %xcc, .finishup ! x has zero byte, handle end cases
- stb %g5, [%o2 - 9]
-
- sllx %g5, %l0, %g5
- srlx %o1, %l1, %g4
- or %g5, %g4, %g5
-
- stx %g5, [%o2 + %l2]
- ba .dstaligned
- mov %o1, %g5
-
-.finishup:
- cmp %l0, 56
- be,pn %ncc, .zerobyte_restore
- andcc %o2, 1, %g0
- bnz,a %ncc, 1f
- srlx %g5, 8, %g5
-
-1: srlx %l1, 4, %g4 ! g4 contains 1, 2 or 3
- sub %g4, 1, %g4 ! multiple of 16
- sllx %g4, 4, %g4 ! How many bits to shift
- srlx %g5, %g4, %l0
- add %o2, %l2, %g1
-
-2: sth %l0, [%g1]
- sub %g4, 16, %g4
- add %g1, 2, %g1
- brgez,a,pt %g4, 2b
- srlx %g5, %g4, %l0
-
-.zerobyte_restore:
- ldx [%sp + STACK_OFFSET + 0], %l0
- andn %o5, %o1, %o3 ! ~val & 0x80
- ldx [%sp + STACK_OFFSET + 8], %l1
- sub %o1, %o4, %g1 ! val - 0x01
- ldx [%sp + STACK_OFFSET + 16], %l2
-
- ba 1f
- andcc %o3, %g1, %o3 ! ~val & 0x80 & (val - 0x01)
-
-.zerobyte:
- !! %o5: 0x8080808080808080
- !! %o4: 0x0101010101010101
- !! %o1: Left justified dowrd that contains 0 byte
- !! %o2: Address to be written + 8
-
- andn %o5, %o1, %o3 ! ~val & 0x80
- sub %o1, %o4, %g1 ! val - 0x01
- andcc %o3, %g1, %o3 ! ~val & 0x80 & (val - 0x01)
-
-1: srlx %o3, 7, %o3 ! shift 0x80 -> 0x01
- andn %o3, %o1, %o3 ! mask off leading 0x01 bytes
- lzd %o3, %o4 ! 7, 15, ... 63
-
- mov 64, %o5 ! Calc # of bytes to be discarded
- inc %o4 ! Include the zero byte too
- sub %o5, %o4, %o5 ! after the null byte
- sub %o2, 8, %o2 ! Adjust address which is +8 here.
- srlx %o1, %o5, %o1 ! Discard them
-
- srlx %o4, 3, %o4 ! Bits to bytes to be written
- dec %o4 ! dec 1 to use it as offset
-
-2: stub %o1, [%o2 + %o4]
- dec %o4
- brgez,pt %o4, 2b
- srlx %o1, 8, %o1
-
-#ifndef __sparcv9
- ldx [%sp + STACK_OFFSET + 24], %g4
- ldx [%sp + STACK_OFFSET + 32], %g5
-#endif
- retl ! done with leaf function
- add %sp, SA(STACK_OFFSET + SAVESIZE), %sp
- SET_SIZE(strcpy)
diff --git a/usr/src/lib/libc/sparc_hwcap1/common/gen/strlen.s b/usr/src/lib/libc/sparc_hwcap1/common/gen/strlen.s
deleted file mode 100644
index d2683ef381..0000000000
--- a/usr/src/lib/libc/sparc_hwcap1/common/gen/strlen.s
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
- .file "strlen.s"
-
-/*
- * strlen(s)
- *
- * Given string s, return length (not including the terminating null).
- *
- * Fast assembler language version of the following C-program strlen
- * which represents the `standard' for the C-library.
- *
- * size_t
- * strlen(s)
- * register const char *s;
- * {
- * register const char *s0 = s + 1;
- *
- * while (*s++ != '\0')
- * ;
- * return (s - s0);
- * }
- */
-
-#include <sys/asm_linkage.h>
-
- /*
- * There are two key optimizations in the routine below.
- * First, all memory accesses are 8 bytes wide. The time
- * for long strings is dominated by the latency of load
- * instructions in the inner loop, and going 8 bytes at
- * a time means 1/8th as much latency.
- *
- * Scanning an 8 byte word for a '\0' is made fast by
- * this formula (due to Alan Mycroft):
- * ~x & 0x808080808080 & (x - 0x0101010101010101)
- * The result of this formula is non-zero iff there's
- * a '\0' somewhere in x.
- *
- * Second, the cost of short strings is dominated by the
- * cost of figuring out which byte out of the last 8
- * contained the '\0' that terminated the string. We use
- * properties of the formula above to convert scanning the
- * word for '\0' into a single LZD instruction.
- */
- .align 64
- .skip 4*4 ! force .findnull to align to 64 bytes
- ENTRY_NP(strlen)
- and %o0, 7, %o3 ! off = addr & 7
- sethi %hi(0x01010101), %o4 ! 0x01010000
-
- sub %g0, %o3, %o2 ! count = -off
- or %o4, %lo(0x01010101), %o4 ! 0x01010101
-
- ldx [%o0 + %o2], %o1 ! val = *(addr + count)
- sllx %o4, 32, %o5 ! 0x01010101 << 32
-
- mov -1, %g1 ! mask = -1
- sllx %o3, 3, %o3 ! shift = off * 8
-
- or %o4, %o5, %o4 ! 0x0101010101010101
- srlx %g1, %o3, %g1 ! -1 >> ((addr & 7) * 8)
-
- sllx %o4, 7, %o5 ! 0x8080808080808080
- orn %o1, %g1, %o1 ! val |= ~mask
-.strlen_findnull:
- !! %o0 - base address
- !! %o1 - xword from memory
- !! %o2 - index
- !! %o3 - result of test for '\0'
- !! %o4 - constant 0x0101.0101.0101.0101
- !! %o5 - constant 0x8080.8080.8080.8080
- !! %g1 - scratch
- andn %o5, %o1, %o3 ! ~val & 0x80
- sub %o1, %o4, %g1 ! val - 0x01
- andcc %o3, %g1, %o3 ! ~val & 0x80 & (val - 0x01)
- inc 8, %o2
- bz,a,pt %xcc, .strlen_findnull
- ldx [%o0 + %o2], %o1
-
- /*
- * The result of Mycroft's formula is a pattern of 0x80 and
- * 0x00 bytes. There's a 0x80 at every byte position where
- * there was a '\0' character, but a string of 0x01 bytes
- * immediately preceding a '\0' becomes a corresponding
- * string of 0x80 bytes. (e.g. 0x0101010101010100 becomes
- * 0x8080808080808080). We need one final step to discount
- * any leading 0x01 bytes, and then LZD can tell us how many
- * characters there were before the terminating '\0'.
- */
- !! %o1 - last data word
- !! %o2 - length+8, plus 1-8 extra
- !! %o3 - xword with 0x80 for each 0x00 byte and leading 0x01
- sub %o2, 8, %o2 ! subtract off '\0' and last 8
- srlx %o3, 7, %o3 ! shift 0x80 -> 0x01
- andn %o3, %o1, %o3 ! mask off leading 0x01 bytes
- lzd %o3, %o3 ! 7, 15, ... 63
- srlx %o3, 3, %o3 ! 0 ... 7
-
- retl
- add %o2, %o3, %o0 ! add back bytes before '\0'
-
- SET_SIZE(strlen)
diff --git a/usr/src/lib/libc/sparc_hwcap1/sparc/Makefile b/usr/src/lib/libc/sparc_hwcap1/sparc/Makefile
index 3a299a35e0..d648203adc 100644
--- a/usr/src/lib/libc/sparc_hwcap1/sparc/Makefile
+++ b/usr/src/lib/libc/sparc_hwcap1/sparc/Makefile
@@ -28,8 +28,7 @@ LIBCBASE= $(SRC)/lib/libc/sparc
LIBRARY= libc_hwcap1.a
-EXTN_CPPFLAGS= -DSMT_PAUSE_FUNCTION=_rock_pause \
- -I$(SRC)/uts/sun4v -I$(ROOT)/usr/platform/sun4v/include
+EXTN_CPPFLAGS= -I$(SRC)/uts/sun4v -I$(ROOT)/usr/platform/sun4v/include
EXTN_ASFLAGS= -xarch=v8plusd
EXTN_DYNFLAGS= -M mapfile
@@ -40,10 +39,10 @@ OPTIMIZED_LIBCBASE=../common
PRFOBJS= \
memcpy.o \
+ memmove.o \
memset.o \
strlen.o \
strcpy.o \
- misc.o
MAPFILE_AUX = mapfile-vers-aux
diff --git a/usr/src/lib/libc/sparc_hwcap1/sparcv9/Makefile b/usr/src/lib/libc/sparc_hwcap1/sparcv9/Makefile
index 7065a134f2..451d682145 100644
--- a/usr/src/lib/libc/sparc_hwcap1/sparcv9/Makefile
+++ b/usr/src/lib/libc/sparc_hwcap1/sparcv9/Makefile
@@ -27,8 +27,7 @@ LIBCBASE= $(SRC)/lib/libc/sparcv9
LIBRARY= libc_hwcap1.a
-EXTN_CPPFLAGS= -DSMT_PAUSE_FUNCTION=_rock_pause \
- -I$(SRC)/uts/sun4v -I$(ROOT)/usr/platform/sun4v/include
+EXTN_CPPFLAGS= -I$(SRC)/uts/sun4v -I$(ROOT)/usr/platform/sun4v/include
EXTN_ASFLAGS= -xarch=v9d
EXTN_DYNFLAGS= -M mapfile
@@ -39,10 +38,10 @@ OPTIMIZED_LIBCBASE=../common
PRFOBJS= \
memcpy.o \
+ memmove.o \
memset.o \
strlen.o \
strcpy.o \
- misc.o
MAPFILE_AUX = mapfile-vers-aux
diff --git a/usr/src/lib/libdisasm/sparc/dis_sparc_fmt.c b/usr/src/lib/libdisasm/sparc/dis_sparc_fmt.c
index 5107c5d8bc..56e36e6d3c 100644
--- a/usr/src/lib/libdisasm/sparc/dis_sparc_fmt.c
+++ b/usr/src/lib/libdisasm/sparc/dis_sparc_fmt.c
@@ -20,12 +20,12 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
- * Copyright 2008 Jason King. All rights reserved.
+ * Copyright 2009 Jason King. All rights reserved.
* Use is subject to license terms.
*/
@@ -389,15 +389,15 @@ typedef struct formatmbr {
uint32_t op3:6;
uint32_t rs1:5;
uint32_t i:1;
- uint32_t undef:5;
- uint32_t cmask:4;
+ uint32_t undef:6;
+ uint32_t cmask:3;
uint32_t mmask:4;
} formatmbr_t;
#elif defined(_BIT_FIELDS_LTOH)
typedef struct formatmbr {
uint32_t mmask:4;
- uint32_t cmask:4;
- uint32_t undef:5;
+ uint32_t cmask:3;
+ uint32_t undef:6;
uint32_t i:1;
uint32_t rs1:5;
uint32_t op3:6;
@@ -566,8 +566,8 @@ static const char *membar_mmask[4] = {
"#LoadLoad", "#StoreLoad", "#LoadStore", "#StoreStore"
};
-static const char *membar_cmask[4] = {
- "#Lookaside", "#MemIssue", "#Sync", "#Halt"
+static const char *membar_cmask[3] = {
+ "#Lookaside", "#MemIssue", "#Sync"
};
/* v8 ancillary state register names */
@@ -592,15 +592,15 @@ static const char *v9_asr_names[32] = {
"%pcr", "%pic", "%dcr", "%gsr",
"%softint_set", "%softint_clr", "%softint", "%tick_cmpr",
"%stick", "%stick_cmpr", NULL, NULL,
- "%cps", NULL, NULL, NULL
+ NULL, NULL, NULL, NULL
};
/*
* on v9, only certain registers are valid for read or writing
* these are bitmasks corresponding to which registers are valid in which
- * case
+ * case. Any access to %dcr is illegal.
*/
-static const uint32_t v9_asr_rdmask = 0x13cb007d;
-static const uint32_t v9_asr_wrmask = 0x13fb004d;
+static const uint32_t v9_asr_rdmask = 0x03cb007d;
+static const uint32_t v9_asr_wrmask = 0x03fb004d;
/* privledged register names on v9 */
/* TODO: compat - NULL to %priv_nn */
@@ -617,7 +617,7 @@ static const char *v9_privreg_names[32] = {
/* hyper privileged register names on v9 */
static const char *v9_hprivreg_names[32] = {
- "%hpstate", "%htstate", "%hrstba", "%hintp",
+ "%hpstate", "%htstate", NULL, "%hintp",
NULL, "%htba", "%hver", NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
@@ -629,8 +629,8 @@ static const char *v9_hprivreg_names[32] = {
static const uint32_t v9_pr_rdmask = 0x80017fff;
static const uint32_t v9_pr_wrmask = 0x00017fff;
-static const uint32_t v9_hpr_rdmask = 0x8000006f;
-static const uint32_t v9_hpr_wrmask = 0x8000006f;
+static const uint32_t v9_hpr_rdmask = 0x8000006b;
+static const uint32_t v9_hpr_wrmask = 0x8000006b;
static const char *prefetch_str[32] = {
"#n_reads", "#one_read",
@@ -784,7 +784,6 @@ fmt_branch(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
int32_t disp;
uint32_t flags = inp->in_data.in_def.in_flags;
int octal = ((dhp->dh_flags & DIS_OCTAL) != 0);
- int chkpt = 0;
if ((dhp->dh_debug & DIS_DEBUG_PRTFMT) != 0) {
prt_field("op", f->f2.op, 2);
@@ -822,13 +821,6 @@ fmt_branch(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
flags = FLG_RS1(REG_NONE)|FLG_DISP(DISP19);
}
- if (f->f2b.op2 == 0x01 && f->f2b.a == 1 &&
- f->f2b.p == 0 && f->f2b.cond == 0x8 && f->f2b.cc == 0x01) {
- name = "chkpt";
- flags = FLG_RS1(REG_NONE)|FLG_DISP(DISP19);
- chkpt = 1;
- }
-
switch (FLG_DISP_VAL(flags)) {
case DISP22:
@@ -867,11 +859,7 @@ fmt_branch(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
}
}
- if (!chkpt) {
- (void) snprintf(buf, sizeof (buf), "%s%s%s", name, annul, pred);
- } else {
- (void) snprintf(buf, sizeof (buf), "%s", name);
- }
+ (void) snprintf(buf, sizeof (buf), "%s%s%s", name, annul, pred);
prt_name(dhp, buf, 1);
@@ -884,19 +872,11 @@ fmt_branch(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
break;
case DISP19:
- if (!chkpt) {
- bprintf(dhp,
- (octal != 0) ? "%s, %s0%-5lo <" :
- "%s, %s0x%-04lx <",
- r,
- (disp < 0) ? "-" : "+",
- (disp < 0) ? (-disp) : disp);
- } else {
- bprintf(dhp,
- (octal != 0) ? "%s0%-5lo <" : "%s0x%-04lx <",
- (disp < 0) ? "-" : "+",
- (disp < 0) ? (-disp) : disp);
- }
+ bprintf(dhp,
+ (octal != 0) ? "%s, %s0%-5lo <" :
+ "%s, %s0x%-04lx <", r,
+ (disp < 0) ? "-" : "+",
+ (disp < 0) ? (-disp) : disp);
break;
case DISP16:
@@ -1328,7 +1308,7 @@ dis_fmt_rdwr(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
first = 0;
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < 4; ++i) {
if ((f->fmb.cmask & (1L << i)) != 0) {
bprintf(dhp, "%s%s",
(first != 0) ? "|" : "",
@@ -1503,7 +1483,6 @@ fmt_trap(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
int v9 = ((dhp->dh_flags & (DIS_SPARC_V9|DIS_SPARC_V9_SGI)) != 0);
int p_rs1, p_t;
- char failstr[8] = "fail";
if (f->ftcc.undef != 0)
return (-1);
@@ -1530,26 +1509,13 @@ fmt_trap(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
(p_rs1 != 0) ? " + " : "",
(p_t != 0) ? reg_names[f->f3.rs2] : "");
} else {
- if ((p_rs1 == 0) && (f->ftcc.immtrap == 0xF)) {
- (void) strlcat(failstr,
- (const char *)&(inp->in_data.in_def.in_name[1]),
- sizeof (failstr));
-
- prt_name(dhp, failstr, 1);
- bprintf(dhp, "%s%s%s",
- (v9 != 0) ? icc_names[f->ftcc2.cc] : "",
- (p_rs1 != 0) ? reg_names[f->ftcc2.rs1] : "",
- (p_rs1 != 0) ? " + " : "");
- } else {
bprintf(dhp, "%-9s %s%s%s%s0x%x", inp->in_data.in_def.in_name,
(v9 != 0) ? icc_names[f->ftcc2.cc] : "",
(v9 != 0) ? ", " : "",
(p_rs1 != 0) ? reg_names[f->ftcc2.rs1] : "",
(p_rs1 != 0) ? " + " : "",
f->ftcc.immtrap);
- }
}
-
return (0);
}
@@ -1894,17 +1860,9 @@ fmt_alu(dis_handle_t *dhp, uint32_t instr, const inst_t *inp, int idx)
return (0);
case 0x3b:
- if (f->f3.rd == 1) {
- /* flusha */
- prt_name(dhp, "flusha", 1);
- prt_address(dhp, instr, 0);
- (void) strlcat(dhp->dh_buf, " ", dhp->dh_buflen);
- prt_asi(dhp, instr);
- } else {
- /* flush */
- prt_name(dhp, name, 1);
- prt_address(dhp, instr, 0);
- }
+ /* flush */
+ prt_name(dhp, name, 1);
+ prt_address(dhp, instr, 0);
return (0);
case 0x3c:
diff --git a/usr/src/lib/libdisasm/sparc/instr.c b/usr/src/lib/libdisasm/sparc/instr.c
index 022d60af53..aa357837a0 100644
--- a/usr/src/lib/libdisasm/sparc/instr.c
+++ b/usr/src/lib/libdisasm/sparc/instr.c
@@ -20,12 +20,12 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
- * Copyright 2007 Jason King. All rights reserved.
+ * Copyright 2009 Jason King. All rights reserved.
* Use is subject to license terms.
*/
@@ -155,12 +155,12 @@ static const table_t Bicc_table = {
};
static const inst_t BPr_table_def[16] = {
- INST("brnr", V9, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
+ INVALID,
INST("brz", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
INST("brlez", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
INST("brlz", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
- INST("brr", V9, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
+ INVALID,
INST("brnz", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
INST("brgz", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
INST("brgez", V9|V9S, FLG_PRED|FLG_DISP(DISP16)|FLG_RS1(REG_INT)),
@@ -483,10 +483,7 @@ static const inst_t tr_table_def[32] = {
/* 0x10 */
INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
- INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
- INST("commit", V9, 0),
- INVALID
-
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID
};
static const table_t tr_table = {
@@ -637,12 +634,7 @@ static const inst_t FPop1_table_def[512] = {
INST("fsqrtq", VALL,
FLG_P1(REG_NONE)|FLG_P2(REG_FPQ)|FLG_NOIMM|FLG_P3(REG_FPQ)),
- INVALID,
- INST("frsqrt1xs", V9,
- FLG_P1(REG_NONE)|FLG_P2(REG_FPQ)|FLG_NOIMM|FLG_P3(REG_FPQ)),
- INST("frsqrt1xd", VALL,
- FLG_P1(REG_NONE)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID,
+ INVALID, INVALID, INVALID, INVALID,
/* 0x30 */
INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
@@ -683,31 +675,11 @@ static const inst_t FPop1_table_def[512] = {
FLG_P1(REG_FPQ)|FLG_P2(REG_FPQ)|FLG_NOIMM|FLG_P3(REG_FPQ)),
/* 0x050 */
- INVALID,
- INST("fnadds", V9S,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FP)),
- INST("fnaddd", V9S,
- FLG_P1(REG_FPD)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID, INVALID, INVALID, INVALID, INVALID,
- INVALID,
- INST("fnmuls", V9S,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FP)),
- INST("fnmuld", V9S,
- FLG_P1(REG_FPD)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID, INVALID, INVALID, INVALID, INVALID,
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
/* 0x060 */
- INVALID,
- INST("fhadds", V9,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FP)),
- INST("fhaddd", V9,
- FLG_P1(REG_FPD)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID, INVALID,
- INST("fhsubs", V9S,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FP)),
- INST("fhsubd", V9S,
- FLG_P1(REG_FPD)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID,
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
/* 0x068 */
INVALID,
@@ -723,16 +695,8 @@ static const inst_t FPop1_table_def[512] = {
INVALID,
/* 0x070 */
- INVALID,
- INST("fnhadds", V9S,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FP)),
- INST("fnhaddd", V9S,
- FLG_P1(REG_FPD)|FLG_P2(REG_FPD)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID, INVALID, INVALID, INVALID, INVALID,
- INVALID,
- INST("fnsmuld", V9S,
- FLG_P1(REG_FP)|FLG_P2(REG_FP)|FLG_NOIMM|FLG_P3(REG_FPD)),
- INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
+ INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID, INVALID,
/* 0x080 */
INVALID,
@@ -1600,40 +1564,6 @@ static const table_t fused_table = {
.tbl_inp = fused_table_def
};
-static const inst_t unfused_table_def[16] = {
- /* 0x0 */
- INVALID,
- INST("fumadds", V9, FLG_P1(REG_FP)),
- INST("fumaddd", V9, FLG_P1(REG_FPD)),
- INVALID,
-
- /* 0x4 */
- INVALID,
- INST("fumsubs", V9, FLG_P1(REG_FP)),
- INST("fumsubd", V9, FLG_P1(REG_FPD)),
- INVALID,
-
- /* 0x8 */
- INVALID,
- INST("fnumsubs", V9, FLG_P1(REG_FP)),
- INST("fnumsubd", V9, FLG_P1(REG_FPD)),
- INVALID,
-
- /* 0xc */
- INVALID,
- INST("fnumadds", V9, FLG_P1(REG_FP)),
- INST("fnumaddd", V9, FLG_P1(REG_FPD)),
- INVALID
-};
-
-static const table_t unfused_table = {
- .tbl_field = 8,
- .tbl_len = 4,
- .tbl_ovp = NULL,
- .tbl_fmt = fmt_fused,
- .tbl_inp = unfused_table_def
-};
-
static const inst_t alu_table_def[64] = {
/* 0x00 */
INST("add", VALL, 0),
@@ -1722,7 +1652,7 @@ static const inst_t alu_table_def[64] = {
INST("save", VALL, 0),
INST("restore", VALL, 0),
TABLE(tr_table, V9|V9S),
- TABLE(unfused_table, V9|V9S)
+ INVALID
};
diff --git a/usr/src/lib/libprtdiag/common/display_sun4v.c b/usr/src/lib/libprtdiag/common/display_sun4v.c
index 14c7651d16..1d9f3aa028 100644
--- a/usr/src/lib/libprtdiag/common/display_sun4v.c
+++ b/usr/src/lib/libprtdiag/common/display_sun4v.c
@@ -106,8 +106,6 @@ static void sun4v_env_print_current_sensors();
static void sun4v_env_print_current_indicators();
static void sun4v_env_print_voltage_sensors();
static void sun4v_env_print_voltage_indicators();
-static void sun4v_env_print_humidity_sensors();
-static void sun4v_env_print_humidity_indicators();
static void sun4v_env_print_LEDs();
static void sun4v_print_fru_status();
static int is_fru_absent(picl_nodehdl_t);
@@ -1101,16 +1099,6 @@ sun4v_disp_env_status()
class_node_found = 0;
all_status_ok = 1;
- sun4v_env_print_humidity_sensors();
- exit_code |= (!all_status_ok);
-
- class_node_found = 0;
- all_status_ok = 1;
- sun4v_env_print_humidity_indicators();
- exit_code |= (!all_status_ok);
-
- class_node_found = 0;
- all_status_ok = 1;
sun4v_env_print_LEDs();
exit_code |= (!all_status_ok);
@@ -1737,68 +1725,6 @@ sun4v_env_print_voltage_indicators()
}
static void
-sun4v_env_print_humidity_sensors()
-{
- char *fmt = "%-34s %-14s %-10s\n";
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_SENSOR,
- (void *)PICL_PROP_HUMIDITY,
- sun4v_env_print_sensor_callback);
- if (!class_node_found)
- return;
- log_printf("\nHumidity sensors:\n");
- if (syserrlog == 0) {
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_SENSOR,
- PICL_PROP_HUMIDITY, sun4v_env_print_sensor_callback);
- if (all_status_ok) {
- log_printf("All humidity sensors are OK.\n");
- return;
- }
- }
- log_printf("-------------------------------------------------"
- "-----------\n");
- log_printf(fmt, "Location", "Sensor", "Status", 0);
- log_printf("-------------------------------------------------"
- "-----------\n");
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_SENSOR,
- (void *)PICL_PROP_HUMIDITY,
- sun4v_env_print_sensor_callback);
-}
-
-static void
-sun4v_env_print_humidity_indicators()
-{
- char *fmt = "%-34s %-14s %-8s\n";
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_INDICATOR,
- (void *)PICL_PROP_CONDITION,
- sun4v_env_print_indicator_callback);
- if (!class_node_found)
- return;
- log_printf("\nHumidity indicators:\n");
- if (syserrlog == 0) {
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_INDICATOR, (void *)PICL_PROP_CONDITION,
- sun4v_env_print_indicator_callback);
- if (all_status_ok) {
- log_printf("All humidity indicators are OK.\n");
- return;
- }
- }
- log_printf("-------------------------------------------------"
- "-----------\n");
- log_printf(fmt, "Location", "Indicator", "Condition", 0);
- log_printf("-------------------------------------------------"
- "-----------\n");
- (void) picl_walk_tree_by_class(phyplatformh,
- PICL_CLASS_HUMIDITY_INDICATOR,
- (void *)PICL_PROP_CONDITION,
- sun4v_env_print_indicator_callback);
-}
-
-static void
sun4v_env_print_LEDs()
{
char *fmt = "%-34s %-14s %-8s\n";
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index 3e9f63d267..85d9fae4fc 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -96,7 +96,6 @@ sparc_SUBDIRS= \
SUNWssad \
SUNWstc.u \
SUNWus.u \
- SUNWusat10.v \
SUNWust1.v \
SUNWust2.v \
SUNWwbsd
diff --git a/usr/src/pkgdefs/SUNWusat10.v/Makefile b/usr/src/pkgdefs/SUNWusat10.v/Makefile
deleted file mode 100644
index c29be1f177..0000000000
--- a/usr/src/pkgdefs/SUNWusat10.v/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-include ../Makefile.com
-
-.KEEP_STATE:
-
-all: $(FILES)
-
-install: all pkg
-
-include ../Makefile.targ
diff --git a/usr/src/pkgdefs/SUNWusat10.v/pkginfo.tmpl b/usr/src/pkgdefs/SUNWusat10.v/pkginfo.tmpl
deleted file mode 100644
index 05de3ac092..0000000000
--- a/usr/src/pkgdefs/SUNWusat10.v/pkginfo.tmpl
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# This required package information file describes characteristics of the
-# package, such as package abbreviation, full package name, package version,
-# and package architecture.
-#
-PKG="SUNWusat10"
-NAME="UltraSPARC-AT10 (Root)"
-ARCH="sparc.sun4v"
-VERSION="ONVERS,REV=0.0.0"
-SUNW_PRODNAME="SunOS"
-SUNW_PRODVERS="RELEASE/VERSION"
-SUNW_PKGTYPE="root"
-MAXINST="1000"
-CATEGORY="system"
-DESC="UltraSPARC-AT10 core kernel software"
-VENDOR="Sun Microsystems, Inc."
-HOTLINE="Please contact your local service provider"
-EMAIL=""
-CLASSES="none"
-BASEDIR=/
-SUNW_PKGVERS="1.0"
-SUNW_PKG_ALLZONES="true"
-SUNW_PKG_HOLLOW="true"
-SUNW_PKG_THISZONE="false"
-#VSTOCK="<reserved by Release Engineering for package part #>"
-#ISTATES="<developer defined>"
-#RSTATES='<developer defined>'
-#ULIMIT="<developer defined>"
-#ORDER="<developer defined>"
-#PSTAMP="<developer defined>"
-#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWusat10.v/prototype_com b/usr/src/pkgdefs/SUNWusat10.v/prototype_com
deleted file mode 100644
index 34d77eef71..0000000000
--- a/usr/src/pkgdefs/SUNWusat10.v/prototype_com
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# This required package information file contains a list of package contents.
-# The 'pkgmk' command uses this file to identify the contents of a package
-# and their location on the development machine when building the package.
-# Can be created via a text editor or through use of the 'pkgproto' command.
-
-#!search <pathname pathname ...> # where to find pkg objects
-#!include <filename> # include another 'prototype' file
-#!default <mode> <owner> <group> # default used if not specified on entry
-#!<param>=<value> # puts parameter in pkg environment
-
-# packaging files
-i pkginfo
-i copyright
-#
-# source locations relative to the prototype file
-#
-# SUNWusat10.v
-#
-d none platform 755 root sys
-d none platform/sun4v 755 root sys
-d none platform/sun4v/kernel 755 root sys
-d none platform/sun4v/kernel/cpu 755 root sys
-d none platform/sun4v/kernel/cpu/sparcv9 755 root sys
-f none platform/sun4v/kernel/cpu/sparcv9/SUNW,UltraSPARC-AT10 755 root sys
-d none platform/sun4v/kernel/pcbe 755 root sys
-d none platform/sun4v/kernel/pcbe/sparcv9 755 root sys
-f none platform/sun4v/kernel/pcbe/sparcv9/pcbe.SUNW,UltraSPARC-AT10 755 root sys
diff --git a/usr/src/pkgdefs/SUNWusat10.v/prototype_sparc b/usr/src/pkgdefs/SUNWusat10.v/prototype_sparc
deleted file mode 100644
index 8f4f424ff9..0000000000
--- a/usr/src/pkgdefs/SUNWusat10.v/prototype_sparc
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-# Include ISA independent files (prototype_com)
-!include prototype_com
-
-# List files which are SPARC specific here
diff --git a/usr/src/uts/common/io/mem.c b/usr/src/uts/common/io/mem.c
index 2a7b4d290a..d654270129 100644
--- a/usr/src/uts/common/io/mem.c
+++ b/usr/src/uts/common/io/mem.c
@@ -230,9 +230,6 @@ mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
flags, name, valuep, lengthp, 0));
}
-extern void mach_sync_icache_pa(caddr_t, size_t);
-#pragma weak mach_sync_icache_pa
-
static int
mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
page_t *pp)
@@ -271,18 +268,9 @@ mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
error = EFAULT;
} else
error = EIO;
- } else {
+ } else
error = uiomove(va + pageoff, nbytes, rw, uio);
- /*
- * In case this has changed executable code,
- * non-coherent I-caches must be flushed.
- */
- if (rw != UIO_READ && &mach_sync_icache_pa != NULL) {
- mach_sync_icache_pa((caddr_t)ptob(pfn), PAGESIZE);
- }
- }
-
if (devload)
hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
else if (pp)
diff --git a/usr/src/uts/common/sys/auxv_SPARC.h b/usr/src/uts/common/sys/auxv_SPARC.h
index 28b160b77a..b4adf1a16e 100644
--- a/usr/src/uts/common/sys/auxv_SPARC.h
+++ b/usr/src/uts/common/sys/auxv_SPARC.h
@@ -45,7 +45,6 @@ extern "C" {
#define AV_SPARC_VIS2 0x0040 /* VIS2 instruction set supported */
#define AV_SPARC_ASI_BLK_INIT 0x0080 /* ASI_BLK_INIT_xxx ASI */
#define AV_SPARC_FMAF 0x0100 /* Fused Multiply-Add */
-#define AV_SPARC_FMAU 0x0200 /* Unfused Multiply-Add */
#define AV_SPARC_VIS3 0x0400 /* VIS3 instruction set extensions */
#define AV_SPARC_HPC 0x0800 /* High Performance Computing insns */
#define AV_SPARC_RANDOM 0x1000 /* random instruction */
@@ -57,7 +56,7 @@ extern "C" {
#define FMT_AV_SPARC \
"\20" \
"\21cspare" \
- "\20ima\17fjfmau\16trans\15random\14hpc\13vis3\12fmau\11fmaf" \
+ "\20ima\17fjfmau\16trans\15random\14hpc\13vis3\12-\11fmaf" \
"\10ASIBlkInit\7vis2\6vis\5popc\4v8plus\3fsmuld\2div32\1mul32"
/*
diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h
index 2a8450fd50..b966acf7fc 100644
--- a/usr/src/uts/common/vm/hat.h
+++ b/usr/src/uts/common/vm/hat.h
@@ -425,25 +425,6 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t);
#define HAT_STRUCTURE_LE 0x2000
#define HAT_ENDIAN_MASK 0x3000
-/*
- * Attributes for non-coherent I-cache support.
- *
- * We detect if an I-cache has been filled by first resetting
- * execute permission in a tte entry. This forces a trap when
- * an instruction fetch first occurs in that page. In "soft
- * execute mode", the hardware execute permission is cleared
- * and a different software execution bit is set in the tte.
- *
- * HAT_ATTR_TEXT: set this flag to avoid the extra trap associated
- * with soft execute mode. Same meaning as HAT_LOAD_TEXT.
- *
- * HAT_ATTR_NOSOFTEXEC: set this flag when installing a permanent
- * mapping, or installing a mapping that will never be
- * freed. Overrides soft execute mode.
- */
-#define HAT_ATTR_TEXT 0x4000
-#define HAT_ATTR_NOSOFTEXEC 0x8000
-
/* flags for hat_softlock */
#define HAT_COW 0x0001
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 63b0469960..ceccab3d32 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -780,7 +780,7 @@ int page_mem_avail(pgcnt_t);
int page_reclaim_mem(pgcnt_t, pgcnt_t, int);
void page_set_props(page_t *, uint_t);
-void page_clr_all_props(page_t *, int);
+void page_clr_all_props(page_t *);
int page_clear_lck_cow(page_t *, int);
kmutex_t *page_vnode_mutex(struct vnode *);
diff --git a/usr/src/uts/common/vm/page_retire.c b/usr/src/uts/common/vm/page_retire.c
index fa454101f8..8908807a4d 100644
--- a/usr/src/uts/common/vm/page_retire.c
+++ b/usr/src/uts/common/vm/page_retire.c
@@ -535,7 +535,7 @@ page_retire_destroy(page_t *pp)
ASSERT(!hat_page_is_mapped(pp));
ASSERT(!pp->p_vnode);
- page_clr_all_props(pp, 0);
+ page_clr_all_props(pp);
pagescrub(pp, 0, MMU_PAGESIZE);
pp->p_next = NULL;
diff --git a/usr/src/uts/common/vm/seg_kmem.c b/usr/src/uts/common/vm/seg_kmem.c
index fdc5ab7aa7..d3aac6aa25 100644
--- a/usr/src/uts/common/vm/seg_kmem.c
+++ b/usr/src/uts/common/vm/seg_kmem.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -883,15 +883,6 @@ segkmem_xalloc(vmem_t *vmp, void *inaddr, size_t size, int vmflag, uint_t attr,
else
allocflag = 0;
- /*
- * Support for non-coherent I-cache.
- * Set HAT_LOAD_TEXT to override soft execute.
- */
- if (attr & HAT_ATTR_TEXT) {
- attr &= ~HAT_ATTR_TEXT;
- allocflag |= HAT_LOAD_TEXT;
- }
-
while (ppl != NULL) {
page_t *pp = ppl;
page_sub(&ppl, pp);
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index 951098f201..a6af733be8 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -620,7 +620,7 @@ add_physmem(
* initialize other fields in the page_t
*/
PP_SETFREE(pp);
- page_clr_all_props(pp, 0);
+ page_clr_all_props(pp);
PP_SETAGED(pp);
pp->p_offset = (u_offset_t)-1;
pp->p_next = pp;
@@ -2662,7 +2662,7 @@ page_free(page_t *pp, int dontneed)
PP_SETFREE(pp);
ASSERT(pp->p_vnode == NULL || !IS_VMODSORT(pp->p_vnode) ||
!hat_ismod(pp));
- page_clr_all_props(pp, 0);
+ page_clr_all_props(pp);
ASSERT(!hat_page_getshare(pp));
/*
@@ -2803,7 +2803,7 @@ page_free_pages(page_t *pp)
ASSERT(tpp->p_szc == szc);
PP_SETFREE(tpp);
- page_clr_all_props(tpp, 0);
+ page_clr_all_props(tpp);
PP_SETAGED(tpp);
tpp->p_offset = (u_offset_t)-1;
ASSERT(tpp->p_next == tpp);
@@ -3149,7 +3149,7 @@ page_destroy_pages(page_t *pp)
ASSERT(tpp->p_szc == szc);
PP_SETFREE(tpp);
- page_clr_all_props(tpp, 0);
+ page_clr_all_props(tpp);
PP_SETAGED(tpp);
ASSERT(tpp->p_next == tpp);
ASSERT(tpp->p_prev == tpp);
@@ -3525,7 +3525,7 @@ page_do_hashout(page_t *pp)
page_vpsub(&vp->v_pages, pp);
pp->p_hash = NULL;
- page_clr_all_props(pp, 1);
+ page_clr_all_props(pp);
PP_CLRSWAP(pp);
pp->p_vnode = NULL;
pp->p_offset = (u_offset_t)-1;
@@ -4542,7 +4542,7 @@ page_do_relocate_hash(page_t *new, page_t *old)
old->p_vnode = NULL;
PP_CLRSWAP(old);
old->p_offset = (u_offset_t)-1;
- page_clr_all_props(old, 1);
+ page_clr_all_props(old);
/*
* Wake up processes waiting for this page. The page's
@@ -4888,7 +4888,7 @@ do_page_relocate(
for (i = 0; i < npgs; i++) {
ppattr = hat_page_getattr(targ, (P_MOD | P_REF | P_RO));
- page_clr_all_props(repl, 0);
+ page_clr_all_props(repl);
page_set_props(repl, ppattr);
page_relocate_hash(repl, targ);
@@ -4899,7 +4899,7 @@ do_page_relocate(
* page_relocate_hash(), they no longer
* have any meaning.
*/
- page_clr_all_props(targ, 0);
+ page_clr_all_props(targ);
ASSERT(targ->p_next == targ);
ASSERT(targ->p_prev == targ);
page_list_concat(&pl, &targ);
@@ -4983,7 +4983,7 @@ page_free_replacement_page(page_t *pplist)
pp = pplist;
if (pp->p_szc == 0) {
page_sub(&pplist, pp);
- page_clr_all_props(pp, 0);
+ page_clr_all_props(pp);
PP_SETFREE(pp);
PP_SETAGED(pp);
page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL);
@@ -4997,7 +4997,7 @@ page_free_replacement_page(page_t *pplist)
do {
ASSERT(PAGE_EXCL(tpp));
ASSERT(!hat_page_is_mapped(tpp));
- page_clr_all_props(tpp, 0);
+ page_clr_all_props(tpp);
PP_SETFREE(tpp);
PP_SETAGED(tpp);
} while ((tpp = tpp->p_next) != pp);
@@ -6110,25 +6110,9 @@ page_set_props(page_t *pp, uint_t flags)
pp->p_nrm |= (uchar_t)flags;
}
-extern void mach_sync_icache_pp(page_t *);
-#pragma weak mach_sync_icache_pp
-
-/*
- * Flush I-cache if the page is being reassigned. The hashout flag is
- * set when a page has been removed from a hash chain (i.e. vnode
- * pages). If the page stays on the hash chain there is a chance it
- * will be re-used, therefore there is no need to flush the
- * I-cache. However, if the page is being removed from a hash chain
- * then the page can be used for any new purpose, and the I-cache must
- * be flushed.
- */
-/* ARGSUSED */
void
-page_clr_all_props(page_t *pp, int hashout)
+page_clr_all_props(page_t *pp)
{
- if (&mach_sync_icache_pp != NULL && hashout) {
- mach_sync_icache_pp(pp);
- }
pp->p_nrm = 0;
}
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_asm.s b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
index 6ecd81de3a..78bc5d21b7 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_asm.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_asm.s
@@ -248,7 +248,6 @@
*/ ;\
sllx tagtarget, TTARGET_VA_SHIFT, tagtarget ;\
ldxa [ttepa]ASI_MEM, tte ;\
- TTE_CLR_SOFTEXEC_ML(tte) ;\
srlx tagtarget, TTARGET_VA_SHIFT, tagtarget ;\
sethi %hi(TSBTAG_INVALID), tmp2 ;\
add tsbep, TSBE_TAG, tmp1 ;\
@@ -371,7 +370,6 @@ label: ;\
#define TSB_UPDATE(tsbep, tteva, tagtarget, tmp1, tmp2, label) \
/* can't rd tteva after locking tsb because it can tlb miss */ ;\
ldx [tteva], tteva /* load tte */ ;\
- TTE_CLR_SOFTEXEC_ML(tteva) ;\
TSB_LOCK_ENTRY(tsbep, tmp1, tmp2, label) ;\
sethi %hi(TSBTAG_INVALID), tmp2 ;\
add tsbep, TSBE_TAG, tmp1 ;\
@@ -946,11 +944,6 @@ sfmmu_patch_shctx(void)
{
}
-void
-sfmmu_patch_pgsz_reg(void)
-{
-}
-
/* ARGSUSED */
void
sfmmu_load_tsbe(struct tsbe *tsbep, uint64_t vaddr, tte_t *ttep, int phys)
@@ -1441,19 +1434,6 @@ do_patch:
#endif /* sun4u */
SET_SIZE(sfmmu_patch_shctx)
- ENTRY_NP(sfmmu_patch_pgsz_reg)
-#ifdef sun4u
- retl
- nop
-#else /* sun4u */
- set sfmmu_pgsz_load_mmustate_patch, %o0
- MAKE_NOP_INSTR(%o1)
- st %o1, [%o0]
- retl
- flush %o0
-#endif /* sun4u */
- SET_SIZE(sfmmu_patch_pgsz_reg)
-
/*
* Routine that loads an entry into a tsb using virtual addresses.
* Locking is required since all cpus can use the same TSB.
@@ -2408,13 +2388,6 @@ label/**/4: ;\
ba,a,pt %xcc, label/**/8 ;\
label/**/6: ;\
GET_SCDSHMERMAP(tsbarea, hmeblkpa, hatid, hmemisc) ;\
- /* ;\
- * hmemisc is set to 1 if this is a shared mapping. It will ;\
- * be cleared by CHECK_SHARED_PGSZ if this pagesize is not ;\
- * allowed, in order to limit the number of entries in the ;\
- * pagesize register. ;\
- */ ;\
- CHECK_SHARED_PGSZ(tsbarea, tte, hatid, hmemisc, label/**/9) ;\
ldn [tsbarea + (TSBMISS_SCRATCH + TSBMISS_HMEBP)], hatid ;\
label/**/7: ;\
set TTE_SUSPEND, hatid ;\
@@ -3295,37 +3268,8 @@ tsb_shme_checktte:
stub %g1, [%g6 + TSBMISS_URTTEFLAGS]
SAVE_CTX1(%g7, %g2, %g1, tsb_shmel)
- ba tsb_validtte
#endif /* sun4u && !UTSB_PHYS */
-tsb_ism_validtte:
-#ifdef sun4v
- /*
- * Check pagesize against bitmap for Rock page size register,
- * for ism mappings.
- *
- * %g1, %g2 = scratch
- * %g3 = tte
- * g4 = tte pa
- * g5 = tte va
- * g6 = tsbmiss area
- * %g7 = tt
- */
- ldub [%g6 + TSBMISS_URTTEFLAGS], %g1
- and %g1, HAT_CHKCTX1_FLAG, %g2
- /*
- * Clear the HAT_CHKCTX1_FLAG in %g2 if this shared pagesize is not allowed
- * to limit the number of entries in the pagesize search register.
- */
- CHECK_SHARED_PGSZ(%g6, %g3, %g7, %g2, ism_chk_pgsz)
- andn %g1, HAT_CHKCTX1_FLAG, %g1
- or %g1, %g2, %g1
- stub %g1, [%g6 + TSBMISS_URTTEFLAGS]
- brz %g2, tsb_validtte
- rdpr %tt, %g7
- SAVE_CTX1(%g7, %g1, %g2, tsb_shctxl)
-#endif /* sun4v */
-
tsb_validtte:
/*
* g3 = tte
@@ -3355,11 +3299,9 @@ tsb_validtte:
ba,pt %xcc, tsb_update_tl1
nop
4:
- /*
- * ITLB translation was found but execute permission is
- * disabled. If we have software execute permission (soft exec
- * bit is set), then enable hardware execute permission.
- * Otherwise continue with a protection violation.
+ /*
+ * If ITLB miss check exec bit.
+ * If not set treat as invalid TTE.
*/
cmp %g7, T_INSTR_MMU_MISS
be,pn %icc, 5f
@@ -3368,11 +3310,9 @@ tsb_validtte:
bne,pt %icc, 3f
andcc %g3, TTE_EXECPRM_INT, %g0 /* check execute bit is set */
5:
- bnz,pn %icc, 3f
- TTE_CHK_SOFTEXEC_ML(%g3) /* check soft execute */
bz,pn %icc, tsb_protfault
nop
- TTE_SET_EXEC_ML(%g3, %g4, %g7, tsb_lset_exec)
+
3:
/*
* Set reference bit if not already set
@@ -3415,7 +3355,6 @@ tsb_validtte:
#endif /* sun4v */
tsb_update_tl1:
- TTE_CLR_SOFTEXEC_ML(%g3)
srlx %g2, TTARGET_CTX_SHIFT, %g7
brz,pn %g7, tsb_kernel
#ifdef sun4v
@@ -3658,7 +3597,10 @@ tsb_ism:
ldub [%g6 + TSBMISS_URTTEFLAGS], %g5
or %g5, HAT_CHKCTX1_FLAG, %g5
stub %g5, [%g6 + TSBMISS_URTTEFLAGS]
+ rdpr %tt, %g5
+ SAVE_CTX1(%g5, %g3, %g1, tsb_shctxl)
#endif /* defined(sun4v) || defined(UTSB_PHYS) */
+
/*
* ISM pages are always locked down.
* If we can't find the tte then pagefault
@@ -3690,7 +3632,7 @@ tsb_ism_32M:
/* NOT REACHED */
tsb_ism_32M_found:
- brlz,a,pt %g3, tsb_ism_validtte
+ brlz,a,pt %g3, tsb_validtte
rdpr %tt, %g7
ba,pt %xcc, tsb_ism_4M
nop
@@ -3708,7 +3650,7 @@ tsb_ism_256M:
tsb_ism_4M)
tsb_ism_256M_found:
- brlz,a,pt %g3, tsb_ism_validtte
+ brlz,a,pt %g3, tsb_validtte
rdpr %tt, %g7
tsb_ism_4M:
@@ -3721,7 +3663,7 @@ tsb_ism_4M:
/* NOT REACHED */
tsb_ism_4M_found:
- brlz,a,pt %g3, tsb_ism_validtte
+ brlz,a,pt %g3, tsb_validtte
rdpr %tt, %g7
tsb_ism_8K:
@@ -3735,7 +3677,7 @@ tsb_ism_8K:
/* NOT REACHED */
tsb_ism_8K_found:
- brlz,a,pt %g3, tsb_ism_validtte
+ brlz,a,pt %g3, tsb_validtte
rdpr %tt, %g7
tsb_pagefault:
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
index 615066961a..4e60c2e38a 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -328,21 +328,7 @@ kdi_trap_vatotte(void)
ldxa [%g2]ASI_MEM, %g1
brgez,a %g1, 4f
clr %g1
-4:
- /*
- * If soft execute bit is set, make sure HW execute permission
- * is also set. But, clear soft execute bit before giving tte to
- * the caller.
- */
- TTE_CHK_SOFTEXEC_ML(%g1)
- bz,pt %icc, 6f
- andcc %g1, TTE_EXECPRM_INT, %g0
- bnz,pt %icc, 7f
- nop
- TTE_SET_EXEC_ML(%g1, %g2, %g4, kdi_trap_vatotte)
-7:
- TTE_CLR_SOFTEXEC_ML(%g1)
- ba,a 6f
+4: ba,a 6f
5: add %g3, 1, %g3
set mmu_hashcnt, %g4
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
index a25f1d9964..6fa557f2d3 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c
@@ -184,14 +184,6 @@ void hat_pagecachectl(struct page *, int);
#define HAT_TMPNC 0x4
/*
- * This flag is set to 0 via the MD in platforms that do not support
- * I-cache coherency in hardware. Used to enable "soft exec" mode.
- * The MD "coherency" property is optional, and defaults to 1 (because
- * coherent I-cache is the norm.)
- */
-uint_t icache_is_coherent = 1;
-
-/*
* Flag to allow the creation of non-cacheable translations
* to system memory. It is off by default. At the moment this
* flag is used by the ecache error injector. The error injector
@@ -227,7 +219,6 @@ uint_t disable_large_pages = 0;
uint_t disable_ism_large_pages = (1 << TTE512K);
uint_t disable_auto_data_large_pages = 0;
uint_t disable_auto_text_large_pages = 0;
-uint_t disable_shctx_large_pages = 0;
/*
* Private sfmmu data structures for hat management
@@ -294,14 +285,6 @@ int disable_shctx = 0;
/* Internal variable, set by MD if the HW supports shctx feature */
int shctx_on = 0;
-/* Internal variable, set by MD if the HW supports the search order register */
-int pgsz_search_on = 0;
-/*
- * External /etc/system tunable, for controlling search order register
- * support.
- */
-int disable_pgsz_search = 0;
-
#ifdef DEBUG
static void check_scd_sfmmu_list(sfmmu_t **, sfmmu_t *, int);
#endif
@@ -481,6 +464,7 @@ static void sfmmu_ismtlbcache_demap(caddr_t, sfmmu_t *, struct hme_blk *,
pfn_t, int);
static void sfmmu_tlb_demap(caddr_t, sfmmu_t *, struct hme_blk *, int, int);
static void sfmmu_tlb_range_demap(demap_range_t *);
+static void sfmmu_invalidate_ctx(sfmmu_t *);
static void sfmmu_sync_mmustate(sfmmu_t *);
static void sfmmu_tsbinfo_setup_phys(struct tsb_info *, pfn_t);
@@ -589,7 +573,7 @@ mmu_ctx_t **mmu_ctxs_tbl; /* global array of context domains */
uint64_t mmu_saved_gnum = 0; /* to init incoming MMUs' gnums */
#define DEFAULT_NUM_CTXS_PER_MMU 8192
-uint_t nctxs = DEFAULT_NUM_CTXS_PER_MMU;
+static uint_t nctxs = DEFAULT_NUM_CTXS_PER_MMU;
int cache; /* describes system cache */
@@ -743,7 +727,11 @@ int sfmmu_page_spl_held(struct page *);
static void sfmmu_mlist_reloc_enter(page_t *, page_t *,
kmutex_t **, kmutex_t **);
static void sfmmu_mlist_reloc_exit(kmutex_t *, kmutex_t *);
-static hatlock_t *sfmmu_hat_tryenter(sfmmu_t *);
+static hatlock_t *
+ sfmmu_hat_enter(sfmmu_t *);
+static hatlock_t *
+ sfmmu_hat_tryenter(sfmmu_t *);
+static void sfmmu_hat_exit(hatlock_t *);
static void sfmmu_hat_lock_all(void);
static void sfmmu_hat_unlock_all(void);
static void sfmmu_ismhat_enter(sfmmu_t *, int);
@@ -1067,14 +1055,12 @@ hat_init_pagesizes()
disable_ism_large_pages |= disable_large_pages;
disable_auto_data_large_pages = disable_large_pages;
disable_auto_text_large_pages = disable_large_pages;
- disable_shctx_large_pages |= disable_large_pages;
/*
* Initialize mmu-specific large page sizes.
*/
if (&mmu_large_pages_disabled) {
disable_large_pages |= mmu_large_pages_disabled(HAT_LOAD);
- disable_shctx_large_pages |= disable_large_pages;
disable_ism_large_pages |=
mmu_large_pages_disabled(HAT_LOAD_SHARE);
disable_auto_data_large_pages |=
@@ -1413,14 +1399,6 @@ hat_init(void)
shctx_on = 0;
}
- /*
- * If support for page size search is disabled via /etc/system
- * set pgsz_search_on to 0 here.
- */
- if (pgsz_search_on && disable_pgsz_search) {
- pgsz_search_on = 0;
- }
-
if (shctx_on) {
srd_buckets = kmem_zalloc(SFMMU_MAX_SRD_BUCKETS *
sizeof (srd_buckets[0]), KM_SLEEP);
@@ -1595,11 +1573,6 @@ hat_alloc(struct as *as)
sfmmup->sfmmu_scdp = NULL;
sfmmup->sfmmu_scd_link.next = NULL;
sfmmup->sfmmu_scd_link.prev = NULL;
-
- if (&mmu_set_pgsz_order && sfmmup != ksfmmup) {
- mmu_set_pgsz_order(sfmmup, 0);
- sfmmu_init_pgsz_hv(sfmmup);
- }
return (sfmmup);
}
@@ -2082,8 +2055,6 @@ hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
newhat->sfmmu_scdismttecnt[i] =
hat->sfmmu_scdismttecnt[i];
}
- } else if (&mmu_set_pgsz_order) {
- mmu_set_pgsz_order(newhat, 0);
}
sfmmu_check_page_sizes(newhat, 1);
@@ -2579,7 +2550,7 @@ sfmmu_memload_batchsmall(struct hat *hat, caddr_t vaddr, page_t **pps,
void
sfmmu_memtte(tte_t *ttep, pfn_t pfn, uint_t attr, int tte_sz)
{
- ASSERT((attr & ~(SFMMU_LOAD_ALLATTR | HAT_ATTR_NOSOFTEXEC)) == 0);
+ ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
ttep->tte_inthi = MAKE_TTE_INTHI(pfn, attr, tte_sz, 0 /* hmenum */);
ttep->tte_intlo = MAKE_TTE_INTLO(pfn, attr, tte_sz, 0 /* hmenum */);
@@ -2593,18 +2564,6 @@ sfmmu_memtte(tte_t *ttep, pfn_t pfn, uint_t attr, int tte_sz)
if (TTE_IS_NFO(ttep) && TTE_IS_EXECUTABLE(ttep)) {
panic("sfmmu_memtte: can't set both NFO and EXEC bits");
}
-
- /*
- * Disable hardware execute permission to force a fault if
- * this page is executed, so we can detect the execution. Set
- * the soft exec bit to remember that this TTE has execute
- * permission.
- */
- if (TTE_IS_EXECUTABLE(ttep) && (attr & HAT_ATTR_NOSOFTEXEC) == 0 &&
- icache_is_coherent == 0) {
- TTE_CLR_EXEC(ttep);
- TTE_SET_SOFTEXEC(ttep);
- }
}
/*
@@ -3095,26 +3054,9 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
(void *)hmeblkp);
}
ASSERT(TTE_CSZ(&tteold) == TTE_CSZ(ttep));
-
- if (TTE_IS_EXECUTABLE(&tteold) && TTE_IS_SOFTEXEC(ttep)) {
- TTE_SET_EXEC(ttep);
- }
}
if (pp) {
- /*
- * If we know that this page will be executed, because
- * it was in the past (PP_ISEXEC is already true), or
- * if the caller says it will likely be executed
- * (HAT_LOAD_TEXT is true), then there is no need to
- * dynamically detect execution with a soft exec
- * fault. Enable hardware execute permission now.
- */
- if ((PP_ISEXEC(pp) || (flags & HAT_LOAD_TEXT)) &&
- TTE_IS_SOFTEXEC(ttep)) {
- TTE_SET_EXEC(ttep);
- }
-
if (size == TTE8K) {
#ifdef VAC
/*
@@ -3138,12 +3080,6 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
sfmmu_page_exit(pmtx);
}
- if (TTE_EXECUTED(ttep)) {
- pmtx = sfmmu_page_enter(pp);
- PP_SETEXEC(pp);
- sfmmu_page_exit(pmtx);
- }
-
} else if (sfmmu_pagearray_setup(vaddr, pps, ttep, remap)) {
/*
* sfmmu_pagearray_setup failed so return
@@ -3151,9 +3087,6 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
sfmmu_mlist_exit(pml);
return (1);
}
-
- } else if (TTE_IS_SOFTEXEC(ttep)) {
- TTE_SET_EXEC(ttep);
}
/*
@@ -3227,17 +3160,11 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
if (!(sfmmup->sfmmu_tteflags & tteflag)) {
hatlockp = sfmmu_hat_enter(sfmmup);
sfmmup->sfmmu_tteflags |= tteflag;
- if (&mmu_set_pgsz_order) {
- mmu_set_pgsz_order(sfmmup, 1);
- }
sfmmu_hat_exit(hatlockp);
}
} else if (!(sfmmup->sfmmu_rtteflags & tteflag)) {
hatlockp = sfmmu_hat_enter(sfmmup);
sfmmup->sfmmu_rtteflags |= tteflag;
- if (&mmu_set_pgsz_order && sfmmup != ksfmmup) {
- mmu_set_pgsz_order(sfmmup, 1);
- }
sfmmu_hat_exit(hatlockp);
}
/*
@@ -3284,8 +3211,7 @@ sfmmu_tteload_addentry(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, tte_t *ttep,
* ref bit in tteload.
*/
ASSERT(TTE_IS_REF(ttep));
- if (TTE_IS_MOD(&tteold) || (TTE_EXECUTED(&tteold) &&
- !TTE_IS_EXECUTABLE(ttep))) {
+ if (TTE_IS_MOD(&tteold)) {
sfmmu_ttesync(sfmmup, vaddr, &tteold, pp);
}
/*
@@ -3416,12 +3342,6 @@ sfmmu_pagearray_setup(caddr_t addr, page_t **pps, tte_t *ttep, int remap)
sfmmu_page_exit(pmtx);
}
- if (TTE_EXECUTED(ttep)) {
- pmtx = sfmmu_page_enter(pp);
- PP_SETEXEC(pp);
- sfmmu_page_exit(pmtx);
- }
-
/*
* If this is a remap we skip vac & contiguity checks.
*/
@@ -5052,11 +4972,9 @@ sfmmu_hblk_chgattr(struct hat *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
continue;
}
- if ((tteflags.tte_intlo & TTE_HWWR_INT) ||
- (TTE_EXECUTED(&tte) &&
- !TTE_IS_EXECUTABLE(&ttemod))) {
+ if (tteflags.tte_intlo & TTE_HWWR_INT) {
/*
- * need to sync if clearing modify/exec bit.
+ * need to sync if we are clearing modify bit.
*/
sfmmu_ttesync(sfmmup, addr, &tte, pp);
}
@@ -5109,14 +5027,6 @@ sfmmu_vtop_attr(uint_t attr, int mode, tte_t *ttemaskp)
ttevalue.tte_intlo = MAKE_TTEATTR_INTLO(attr);
ttemaskp->tte_inthi = TTEINTHI_ATTR;
ttemaskp->tte_intlo = TTEINTLO_ATTR;
- if (!icache_is_coherent) {
- if (!(attr & PROT_EXEC)) {
- TTE_SET_SOFTEXEC(ttemaskp);
- } else {
- TTE_CLR_EXEC(ttemaskp);
- TTE_SET_SOFTEXEC(&ttevalue);
- }
- }
break;
case SFMMU_SETATTR:
ASSERT(!(attr & ~HAT_PROT_MASK));
@@ -5171,9 +5081,6 @@ sfmmu_ptov_attr(tte_t *ttep)
if (TTE_IS_EXECUTABLE(ttep)) {
attr |= PROT_EXEC;
}
- if (TTE_IS_SOFTEXEC(ttep)) {
- attr |= PROT_EXEC;
- }
if (!TTE_IS_PRIVILEGED(ttep)) {
attr |= PROT_USER;
}
@@ -5390,11 +5297,6 @@ sfmmu_hblk_chgprot(sfmmu_t *sfmmup, struct hme_blk *hmeblkp, caddr_t addr,
ttemod = tte;
TTE_SET_LOFLAGS(&ttemod, tteflags, pprot);
- ASSERT(TTE_IS_SOFTEXEC(&tte) ==
- TTE_IS_SOFTEXEC(&ttemod));
- ASSERT(TTE_IS_EXECUTABLE(&tte) ==
- TTE_IS_EXECUTABLE(&ttemod));
-
#if defined(SF_ERRATA_57)
if (check_exec && addr < errata57_limit)
ttemod.tte_exec_perm = 0;
@@ -6094,8 +5996,7 @@ again:
continue;
}
- if (!(flags & HAT_UNLOAD_NOSYNC) ||
- (pp != NULL && TTE_EXECUTED(&tte))) {
+ if (!(flags & HAT_UNLOAD_NOSYNC)) {
sfmmu_ttesync(sfmmup, addr, &tte, pp);
}
@@ -6435,47 +6336,35 @@ static void
sfmmu_ttesync(struct hat *sfmmup, caddr_t addr, tte_t *ttep, page_t *pp)
{
uint_t rm = 0;
- int sz = TTE_CSZ(ttep);
+ int sz;
pgcnt_t npgs;
ASSERT(TTE_IS_VALID(ttep));
- if (!TTE_IS_NOSYNC(ttep)) {
+ if (TTE_IS_NOSYNC(ttep)) {
+ return;
+ }
- if (TTE_IS_REF(ttep))
- rm |= P_REF;
+ if (TTE_IS_REF(ttep)) {
+ rm = P_REF;
+ }
+ if (TTE_IS_MOD(ttep)) {
+ rm |= P_MOD;
+ }
- if (TTE_IS_MOD(ttep))
- rm |= P_MOD;
+ if (rm == 0) {
+ return;
+ }
- if (rm != 0) {
- if (sfmmup != NULL && sfmmup->sfmmu_rmstat) {
- int i;
- caddr_t vaddr = addr;
+ sz = TTE_CSZ(ttep);
+ if (sfmmup != NULL && sfmmup->sfmmu_rmstat) {
+ int i;
+ caddr_t vaddr = addr;
- for (i = 0; i < TTEPAGES(sz); i++) {
- hat_setstat(sfmmup->sfmmu_as, vaddr,
- MMU_PAGESIZE, rm);
- vaddr += MMU_PAGESIZE;
- }
- }
+ for (i = 0; i < TTEPAGES(sz); i++, vaddr += MMU_PAGESIZE) {
+ hat_setstat(sfmmup->sfmmu_as, vaddr, MMU_PAGESIZE, rm);
}
- }
-
- if (!pp)
- return;
- /*
- * If software says this page is executable, and the page was
- * in fact executed (indicated by hardware exec permission
- * being enabled), then set P_EXEC on the page to remember
- * that it was executed. The I$ will be flushed when the page
- * is reassigned.
- */
- if (TTE_EXECUTED(ttep)) {
- rm |= P_EXEC;
- } else if (rm == 0) {
- return;
}
/*
@@ -6485,6 +6374,8 @@ sfmmu_ttesync(struct hat *sfmmup, caddr_t addr, tte_t *ttep, page_t *pp)
* The nrm bits are protected by the same mutex as
* the one that protects the page's mapping list.
*/
+ if (!pp)
+ return;
ASSERT(sfmmu_mlist_held(pp));
/*
* If the tte is for a large page, we need to sync all the
@@ -6503,8 +6394,7 @@ sfmmu_ttesync(struct hat *sfmmup, caddr_t addr, tte_t *ttep, page_t *pp)
ASSERT(pp);
ASSERT(sfmmu_mlist_held(pp));
if (((rm & P_REF) != 0 && !PP_ISREF(pp)) ||
- ((rm & P_MOD) != 0 && !PP_ISMOD(pp)) ||
- ((rm & P_EXEC) != 0 && !PP_ISEXEC(pp)))
+ ((rm & P_MOD) != 0 && !PP_ISMOD(pp)))
hat_page_setattr(pp, rm);
/*
@@ -6826,7 +6716,6 @@ hat_page_relocate(page_t **target, page_t **replacement, spgcnt_t *nrelocp)
kmutex_t *low, *high;
spgcnt_t npages, i;
page_t *pl = NULL;
- uint_t ppattr;
int old_pil;
cpuset_t cpuset;
int cap_cpus;
@@ -6977,9 +6866,8 @@ hat_page_relocate(page_t **target, page_t **replacement, spgcnt_t *nrelocp)
* Copy attributes. VAC consistency was handled above,
* if required.
*/
- ppattr = hat_page_getattr(tpp, (P_MOD | P_REF | P_RO));
- page_clr_all_props(rpp, 0);
- page_set_props(rpp, ppattr);
+ rpp->p_nrm = tpp->p_nrm;
+ tpp->p_nrm = 0;
rpp->p_index = tpp->p_index;
tpp->p_index = 0;
#ifdef VAC
@@ -7791,7 +7679,7 @@ hat_page_setattr(page_t *pp, uint_t flag)
noshuffle = flag & P_NSH;
flag &= ~P_NSH;
- ASSERT(!(flag & ~(P_MOD | P_REF | P_RO | P_EXEC)));
+ ASSERT(!(flag & ~(P_MOD | P_REF | P_RO)));
/*
* nothing to do if attribute already set
@@ -8480,8 +8368,6 @@ ism_tsb_entries(sfmmu_t *sfmmup, int szc)
int j;
sf_scd_t *scdp;
uchar_t rid;
- hatlock_t *hatlockp;
- int ismnotinscd = 0;
ASSERT(SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
scdp = sfmmup->sfmmu_scdp;
@@ -8502,21 +8388,9 @@ ism_tsb_entries(sfmmu_t *sfmmup, int szc)
/* ISMs is not in SCD */
npgs +=
ism_map[j].imap_ismhat->sfmmu_ttecnt[szc];
- ismnotinscd = 1;
}
}
}
-
- if (&mmu_set_pgsz_order) {
- hatlockp = sfmmu_hat_enter(sfmmup);
- if (ismnotinscd) {
- SFMMU_FLAGS_SET(sfmmup, HAT_ISMNOTINSCD);
- } else {
- SFMMU_FLAGS_CLEAR(sfmmup, HAT_ISMNOTINSCD);
- }
- sfmmu_hat_exit(hatlockp);
- }
-
sfmmup->sfmmu_ismttecnt[szc] = npgs;
sfmmup->sfmmu_scdismttecnt[szc] = npgs_scd;
return (npgs);
@@ -8850,11 +8724,6 @@ hat_share(struct hat *sfmmup, caddr_t addr,
sfmmu_hat_exit(hatlockp);
}
- if (&mmu_set_pgsz_order) {
- hatlockp = sfmmu_hat_enter(sfmmup);
- mmu_set_pgsz_order(sfmmup, 1);
- sfmmu_hat_exit(hatlockp);
- }
sfmmu_ismhat_exit(sfmmup, 0);
/*
@@ -9050,11 +8919,6 @@ hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
(void) ism_tsb_entries(sfmmup, i);
}
- if (&mmu_set_pgsz_order) {
- hatlockp = sfmmu_hat_enter(sfmmup);
- mmu_set_pgsz_order(sfmmup, 1);
- sfmmu_hat_exit(hatlockp);
- }
sfmmu_ismhat_exit(sfmmup, 0);
/*
@@ -11027,7 +10891,7 @@ sfmmu_mlist_reloc_exit(kmutex_t *low, kmutex_t *high)
mutex_exit(low);
}
-hatlock_t *
+static hatlock_t *
sfmmu_hat_enter(sfmmu_t *sfmmup)
{
hatlock_t *hatlockp;
@@ -11054,7 +10918,7 @@ sfmmu_hat_tryenter(sfmmu_t *sfmmup)
return (NULL);
}
-void
+static void
sfmmu_hat_exit(hatlock_t *hatlockp)
{
if (hatlockp != NULL)
@@ -12197,13 +12061,8 @@ sfmmu_rgntlb_demap(caddr_t addr, sf_region_t *rgnp,
* then we flush the shared TSBs, if we find a private hat,
* which is part of an SCD, but where the region
* is not part of the SCD then we flush the private TSBs.
- *
- * If the Rock page size register is present, then SCDs
- * may contain both shared and private pages, so we cannot
- * use this optimization to avoid flushing private TSBs.
*/
- if (pgsz_search_on == 0 &&
- !sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
+ if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
!SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
scdp = sfmmup->sfmmu_scdp;
if (SF_RGNMAP_TEST(scdp->scd_hmeregion_map, rid)) {
@@ -12332,13 +12191,8 @@ sfmmu_ismtlbcache_demap(caddr_t addr, sfmmu_t *ism_sfmmup,
* which is part of an SCD, but where the region
* corresponding to this va is not part of the SCD then we
* flush the private TSBs.
- *
- * If the Rock page size register is present, then SCDs
- * may contain both shared and private pages, so we cannot
- * use this optimization to avoid flushing private TSBs.
*/
- if (pgsz_search_on == 0 &&
- !sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
+ if (!sfmmup->sfmmu_scdhat && sfmmup->sfmmu_scdp != NULL &&
!SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD) &&
!SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY)) {
if (!find_ism_rid(sfmmup, ism_sfmmup, va,
@@ -12648,7 +12502,7 @@ sfmmu_tlb_range_demap(demap_range_t *dmrp)
* A per-process (PP) lock is used to synchronize ctx allocations in
* resume() and ctx invalidations here.
*/
-void
+static void
sfmmu_invalidate_ctx(sfmmu_t *sfmmup)
{
cpuset_t cpuset;
@@ -14174,9 +14028,6 @@ rfound:
if (tteflag && !(sfmmup->sfmmu_rtteflags & tteflag)) {
hatlockp = sfmmu_hat_enter(sfmmup);
sfmmup->sfmmu_rtteflags |= tteflag;
- if (&mmu_set_pgsz_order) {
- mmu_set_pgsz_order(sfmmup, 1);
- }
sfmmu_hat_exit(hatlockp);
}
hatlockp = sfmmu_hat_enter(sfmmup);
@@ -15232,9 +15083,6 @@ sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup)
ASSERT(sfmmup->sfmmu_ttecnt[i] >= scdp->scd_rttecnt[i]);
atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
-sfmmup->sfmmu_scdrttecnt[i]);
- if (!sfmmup->sfmmu_ttecnt[i]) {
- sfmmup->sfmmu_tteflags &= ~(1 << i);
- }
}
/* update tsb0 inflation count */
if (old_scdp != NULL) {
@@ -15245,9 +15093,6 @@ sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup)
scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt);
sfmmup->sfmmu_tsb0_4minflcnt -= scdp->scd_sfmmup->sfmmu_tsb0_4minflcnt;
- if (&mmu_set_pgsz_order) {
- mmu_set_pgsz_order(sfmmup, 0);
- }
sfmmu_hat_exit(hatlockp);
if (old_scdp != NULL) {
@@ -15307,7 +15152,7 @@ sfmmu_find_scd(sfmmu_t *sfmmup)
for (scdp = srdp->srd_scdp; scdp != NULL;
scdp = scdp->scd_next) {
SF_RGNMAP_EQUAL(&scdp->scd_region_map,
- &sfmmup->sfmmu_region_map, SFMMU_RGNMAP_WORDS, ret);
+ &sfmmup->sfmmu_region_map, ret);
if (ret == 1) {
SF_SCD_INCR_REF(scdp);
mutex_exit(&srdp->srd_scd_mutex);
@@ -15455,10 +15300,6 @@ sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
scdp->scd_rttecnt[i]);
atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
sfmmup->sfmmu_scdrttecnt[i]);
- if (sfmmup->sfmmu_ttecnt[i] &&
- (sfmmup->sfmmu_tteflags & (1 << i)) == 0) {
- sfmmup->sfmmu_tteflags |= (1 << i);
- }
sfmmup->sfmmu_scdrttecnt[i] = 0;
/* update ismttecnt to include SCD ism before hat leaves SCD */
sfmmup->sfmmu_ismttecnt[i] += sfmmup->sfmmu_scdismttecnt[i];
@@ -15472,9 +15313,6 @@ sfmmu_leave_scd(sfmmu_t *sfmmup, uchar_t r_type)
}
sfmmup->sfmmu_scdp = NULL;
- if (&mmu_set_pgsz_order) {
- mmu_set_pgsz_order(sfmmup, 0);
- }
sfmmu_hat_exit(hatlockp);
/*
@@ -15520,8 +15358,7 @@ sfmmu_destroy_scd(sf_srd_t *srdp, sf_scd_t *scdp, sf_region_map_t *scd_rmap)
* It is possible that the scd has been freed and reallocated with a
* different region map while we've been waiting for the srd_scd_mutex.
*/
- SF_RGNMAP_EQUAL(scd_rmap, &sp->scd_region_map,
- SFMMU_RGNMAP_WORDS, ret);
+ SF_RGNMAP_EQUAL(scd_rmap, &sp->scd_region_map, ret);
if (ret != 1) {
mutex_exit(&srdp->srd_scd_mutex);
return;
diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
index 5e56264869..327b2fcf36 100644
--- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h
+++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h
@@ -112,7 +112,6 @@ typedef struct sf_scd sf_scd_t;
#define P_TNC 0x10 /* non-caching is temporary bit */
#define P_KPMS 0x20 /* kpm mapped small (vac alias prevention) */
#define P_KPMC 0x40 /* kpm conflict page (vac alias prevention) */
-#define P_EXEC 0x80 /* execution reference (I-cache filled) */
#define PP_GENERIC_ATTR(pp) ((pp)->p_nrm & (P_MOD | P_REF | P_RO))
#define PP_ISMOD(pp) ((pp)->p_nrm & P_MOD)
@@ -125,7 +124,6 @@ typedef struct sf_scd sf_scd_t;
#endif
#define PP_ISKPMS(pp) ((pp)->p_nrm & P_KPMS)
#define PP_ISKPMC(pp) ((pp)->p_nrm & P_KPMC)
-#define PP_ISEXEC(pp) ((pp)->p_nrm & P_EXEC)
#define PP_SETMOD(pp) ((pp)->p_nrm |= P_MOD)
#define PP_SETREF(pp) ((pp)->p_nrm |= P_REF)
@@ -138,7 +136,6 @@ typedef struct sf_scd sf_scd_t;
#endif
#define PP_SETKPMS(pp) ((pp)->p_nrm |= P_KPMS)
#define PP_SETKPMC(pp) ((pp)->p_nrm |= P_KPMC)
-#define PP_SETEXEC(pp) ((pp)->p_nrm |= P_EXEC)
#define PP_CLRMOD(pp) ((pp)->p_nrm &= ~P_MOD)
#define PP_CLRREF(pp) ((pp)->p_nrm &= ~P_REF)
@@ -150,17 +147,6 @@ typedef struct sf_scd sf_scd_t;
#endif
#define PP_CLRKPMS(pp) ((pp)->p_nrm &= ~P_KPMS)
#define PP_CLRKPMC(pp) ((pp)->p_nrm &= ~P_KPMC)
-#define PP_CLREXEC(pp) ((pp)->p_nrm &= ~P_EXEC)
-
-/*
- * Support for non-coherent I-cache. If the MD property "coherency"
- * is set to 0, it means that the I-cache must be flushed in
- * software. Use the "soft exec" bit in the TTE to detect when a page
- * has been executed, so that it can be flushed before it is re-used
- * for another program.
- */
-#define TTE_EXECUTED(ttep) \
- (TTE_IS_EXECUTABLE(ttep) && TTE_IS_SOFTEXEC(ttep))
/*
* All shared memory segments attached with the SHM_SHARE_MMU flag (ISM)
@@ -337,15 +323,15 @@ typedef union sf_region_map_u {
}
/*
- * Returns 1 if region map1 and map2 are equal.
+ * Returns 1 if map1 and map2 are equal.
*/
-#define SF_RGNMAP_EQUAL(map1, map2, words, rval) { \
+#define SF_RGNMAP_EQUAL(map1, map2, rval) { \
int _i; \
- for (_i = 0; _i < words; _i++) { \
+ for (_i = 0; _i < SFMMU_RGNMAP_WORDS; _i++) { \
if ((map1)->bitmap[_i] != (map2)->bitmap[_i]) \
break; \
} \
- if (_i < words) \
+ if (_i < SFMMU_RGNMAP_WORDS) \
rval = 0; \
else \
rval = 1; \
@@ -609,13 +595,9 @@ typedef struct mmu_ctx {
extern uint_t max_mmu_ctxdoms;
extern mmu_ctx_t **mmu_ctxs_tbl;
-extern uint_t nctxs;
extern void sfmmu_cpu_init(cpu_t *);
extern void sfmmu_cpu_cleanup(cpu_t *);
-extern void sfmmu_invalidate_ctx(sfmmu_t *);
-extern hatlock_t *sfmmu_hat_enter(sfmmu_t *);
-extern void sfmmu_hat_exit(hatlock_t *);
/*
* The following structure is used to get MMU context domain information for
@@ -652,6 +634,7 @@ typedef struct sfmmu_ctx {
uint64_t cnum:16;
} sfmmu_ctx_t;
+
/*
* The platform dependent hat structure.
* tte counts should be protected by cas.
@@ -713,11 +696,7 @@ struct hat {
sf_rgn_link_t *sfmmu_hmeregion_links[SFMMU_L1_HMERLINKS];
sf_rgn_link_t sfmmu_scd_link; /* link to scd or pending queue */
#ifdef sun4v
- /* ttecnt for Rock pagesize register management */
- ulong_t sfmmu_mmuttecnt[MMU_PAGE_SIZES];
struct hv_tsb_block sfmmu_hvblock;
- struct hv_pgsz_order sfmmu_pgsz_order; /* pagesize search order */
- uint8_t sfmmu_pgsz_map; /* bit map to control shared pgsz use */
#endif
/*
* sfmmu_ctxs is a variable length array of max_mmu_ctxdoms # of
@@ -763,8 +742,6 @@ struct sf_scd {
extern int disable_shctx;
extern int shctx_on;
-extern int pgsz_search_on;
-extern int disable_pgsz_search;
/*
* bit mask for managing vac conflicts on large pages.
@@ -878,7 +855,6 @@ struct ctx_trace {
#define HAT_CTX1_FLAG 0x100 /* ISM imap hatflag for ctx1 */
#define HAT_JOIN_SCD 0x200 /* region is joining scd */
#define HAT_ALLCTX_INVALID 0x400 /* all per-MMU ctxs are invalidated */
-#define HAT_ISMNOTINSCD 0x800 /* Not all ISM segs are in the SCD */
#define SFMMU_LGPGS_INUSE(sfmmup) \
(((sfmmup)->sfmmu_tteflags | (sfmmup)->sfmmu_rtteflags) || \
@@ -1822,8 +1798,7 @@ struct tsbmiss {
uintptr_t scratch[3];
ulong_t shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */
ulong_t scd_shmermap[SFMMU_HMERGNMAP_WORDS]; /* 8 bytes */
- uint8_t pgsz_bitmap; /* limits ctx1 page sizes */
- uint8_t pad[47]; /* pad to 64 bytes */
+ uint8_t pad[48]; /* pad to 64 bytes */
};
/*
@@ -2354,17 +2329,11 @@ extern struct hme_blk *sfmmu_hmetohblk(struct sf_hment *);
#pragma weak mmu_large_pages_disabled
#pragma weak mmu_set_ctx_page_sizes
#pragma weak mmu_check_page_sizes
-#pragma weak mmu_set_pgsz_order
-#pragma weak sfmmu_init_pgsz_hv
-#pragma weak mmu_enable_pgsz_search
extern void mmu_init_scd(sf_scd_t *);
extern uint_t mmu_large_pages_disabled(uint_t);
extern void mmu_set_ctx_page_sizes(sfmmu_t *);
extern void mmu_check_page_sizes(sfmmu_t *, uint64_t *);
-extern void mmu_set_pgsz_order(sfmmu_t *, int);
-extern void sfmmu_init_pgsz_hv(sfmmu_t *);
-extern void mmu_enable_pgsz_search();
extern sfmmu_t *ksfmmup;
extern caddr_t ktsb_base;
@@ -2406,15 +2375,12 @@ extern uint_t disable_large_pages;
extern uint_t disable_ism_large_pages;
extern uint_t disable_auto_data_large_pages;
extern uint_t disable_auto_text_large_pages;
-extern uint_t disable_shctx_large_pages;
-
-extern void sfmmu_patch_shctx(void);
-extern void sfmmu_patch_pgsz_reg(void);
/* kpm externals */
extern pfn_t sfmmu_kpm_vatopfn(caddr_t);
extern void sfmmu_kpm_patch_tlbm(void);
extern void sfmmu_kpm_patch_tsbm(void);
+extern void sfmmu_patch_shctx(void);
extern void sfmmu_kpm_load_tsb(caddr_t, tte_t *, int);
extern void sfmmu_kpm_unload_tsb(caddr_t, int);
extern void sfmmu_kpm_tsbmtl(short *, uint_t *, int);
diff --git a/usr/src/uts/sparc/fpu/fpu_simulator.c b/usr/src/uts/sparc/fpu/fpu_simulator.c
index 953b0abd65..c65e15402a 100644
--- a/usr/src/uts/sparc/fpu/fpu_simulator.c
+++ b/usr/src/uts/sparc/fpu/fpu_simulator.c
@@ -112,14 +112,6 @@ struct fpuinfo_kstat fpuinfo = {
{ "fpu_sim_fnmaddd", KSTAT_DATA_UINT64},
{ "fpu_sim_fnmsubs", KSTAT_DATA_UINT64},
{ "fpu_sim_fnmsubd", KSTAT_DATA_UINT64},
- { "fpu_sim_fumadds", KSTAT_DATA_UINT64},
- { "fpu_sim_fumaddd", KSTAT_DATA_UINT64},
- { "fpu_sim_fumsubs", KSTAT_DATA_UINT64},
- { "fpu_sim_fumsubd", KSTAT_DATA_UINT64},
- { "fpu_sim_fnumadds", KSTAT_DATA_UINT64},
- { "fpu_sim_fnumaddd", KSTAT_DATA_UINT64},
- { "fpu_sim_fnumsubs", KSTAT_DATA_UINT64},
- { "fpu_sim_fnumsubd", KSTAT_DATA_UINT64},
{ "fpu_sim_invalid", KSTAT_DATA_UINT64},
};
@@ -185,14 +177,12 @@ _fp_fpu_simulator(
enum fcc_type cc;
uint32_t nfcc; /* fcc number field. */
uint64_t lusr;
- uint_t fmau_mul_exceptions;
nrs1 = inst.rs1;
nrs2 = inst.rs2;
nrd = inst.rd;
fsr = *pfsr;
pfpsd->fp_current_exceptions = 0; /* Init current exceptions. */
- fmau_mul_exceptions = 0;
pfpsd->fp_fsrtem = fsr.tem; /* Obtain fsr's tem */
/*
* Obtain rounding direction and precision
@@ -200,7 +190,7 @@ _fp_fpu_simulator(
pfpsd->fp_direction = GSR_IM(gsr) ? GSR_IRND(gsr) : fsr.rnd;
pfpsd->fp_precision = fsr.rnp;
- if (inst.op3 == 0x37) { /* FMA-fused opcode */
+ if (inst.op3 == 0x37) { /* IMPDEP2B FMA-fused opcode */
fp_fma_inst_type *fma_inst;
uint32_t nrs3;
unpacked us3;
@@ -263,121 +253,6 @@ _fp_fpu_simulator(
FPUINFO_KSTAT_PREC(fma_inst->sz, fpu_sim_fnmsubs,
fpu_sim_fnmsubd, fpu_sim_invalid);
}
- } else if (inst.op3 == fmau) { /* FMA-unfused opcode */
- fp_fma_inst_type *fmau_inst;
- uint32_t nrs3;
- unpacked us3;
- unpacked ust;
- /*
- * For FMA-unfused, if either the multiply part or the add
- * part raises an exception whose trap is enabled, we trap
- * with cexc indicating only that exception and aexc un-
- * changed. If neither part raises an exception whose trap
- * is enabled, the instruction completes with cexc indicating
- * just those exceptions that occurred in the add part and
- * aexc accumulating all exceptions that occurred in either
- * part. We use fmau_mul_exceptions to keep track of the
- * exceptions that occurred in the multiply part while we
- * simulate the add part.
- */
- fmau_inst = (fp_fma_inst_type *) &inst;
- nrs2 = fmau_inst->rs2;
- nrs3 = fmau_inst->rs3;
- switch (fmau_inst->var) {
- case fmadd:
- _fp_unpack(pfpsd, &us1, nrs1, fmau_inst->sz);
- _fp_unpack(pfpsd, &us2, nrs2, fmau_inst->sz);
- _fp_mul(pfpsd, &us1, &us2, &ust);
- _fp_pack(pfpsd, &ust, nrd, fmau_inst->sz);
- if ((pfpsd->fp_current_exceptions & fsr.tem) == 0) {
- fmau_mul_exceptions =
- pfpsd->fp_current_exceptions;
- pfpsd->fp_current_exceptions = 0;
- _fp_unpack(pfpsd, &us3, nrs3, fmau_inst->sz);
- _fp_unpack(pfpsd, &ust, nrd, fmau_inst->sz);
- _fp_add(pfpsd, &ust, &us3, &ud);
- /* ensure QSNaN1 has precedence over QNaN3 */
- if ((us3.fpclass == fp_quiet) &&
- ((us1.fpclass == fp_signaling) ||
- (us2.fpclass == fp_signaling)))
- ud = ust;
- _fp_pack(pfpsd, &ud, nrd, fmau_inst->sz);
- }
- FPUINFO_KSTAT_PREC(fmau_inst->sz, fpu_sim_fumadds,
- fpu_sim_fumaddd, fpu_sim_invalid);
- break;
- case fmsub:
- _fp_unpack(pfpsd, &us1, nrs1, fmau_inst->sz);
- _fp_unpack(pfpsd, &us2, nrs2, fmau_inst->sz);
- _fp_mul(pfpsd, &us1, &us2, &ust);
- _fp_pack(pfpsd, &ust, nrd, fmau_inst->sz);
- if ((pfpsd->fp_current_exceptions & fsr.tem) == 0) {
- fmau_mul_exceptions =
- pfpsd->fp_current_exceptions;
- pfpsd->fp_current_exceptions = 0;
- _fp_unpack(pfpsd, &us3, nrs3, fmau_inst->sz);
- _fp_unpack(pfpsd, &ust, nrd, fmau_inst->sz);
- _fp_sub(pfpsd, &ust, &us3, &ud);
- /* ensure QSNaN1 has precedence over QNaN3 */
- if ((us3.fpclass == fp_quiet) &&
- ((us1.fpclass == fp_signaling) ||
- (us2.fpclass == fp_signaling)))
- ud = ust;
- _fp_pack(pfpsd, &ud, nrd, fmau_inst->sz);
- }
- FPUINFO_KSTAT_PREC(fmau_inst->sz, fpu_sim_fumsubs,
- fpu_sim_fumsubd, fpu_sim_invalid);
- break;
- case fnmadd:
- _fp_unpack(pfpsd, &us1, nrs1, fmau_inst->sz);
- _fp_unpack(pfpsd, &us2, nrs2, fmau_inst->sz);
- _fp_mul(pfpsd, &us1, &us2, &ust);
- _fp_pack(pfpsd, &ust, nrd, fmau_inst->sz);
- if ((pfpsd->fp_current_exceptions & fsr.tem) == 0) {
- fmau_mul_exceptions =
- pfpsd->fp_current_exceptions;
- pfpsd->fp_current_exceptions = 0;
- _fp_unpack(pfpsd, &us3, nrs3, fmau_inst->sz);
- _fp_unpack(pfpsd, &ust, nrd, fmau_inst->sz);
- if (ust.fpclass != fp_quiet &&
- ust.fpclass != fp_signaling)
- ust.sign ^= 1;
- _fp_sub(pfpsd, &ust, &us3, &ud);
- /* ensure QSNaN1 has precedence over QNaN3 */
- if ((us3.fpclass == fp_quiet) &&
- ((us1.fpclass == fp_signaling) ||
- (us2.fpclass == fp_signaling)))
- ud = ust;
- _fp_pack(pfpsd, &ud, nrd, fmau_inst->sz);
- }
- FPUINFO_KSTAT_PREC(fmau_inst->sz, fpu_sim_fnumadds,
- fpu_sim_fnumaddd, fpu_sim_invalid);
- break;
- case fnmsub:
- _fp_unpack(pfpsd, &us1, nrs1, fmau_inst->sz);
- _fp_unpack(pfpsd, &us2, nrs2, fmau_inst->sz);
- _fp_mul(pfpsd, &us1, &us2, &ust);
- _fp_pack(pfpsd, &ust, nrd, fmau_inst->sz);
- if ((pfpsd->fp_current_exceptions & fsr.tem) == 0) {
- fmau_mul_exceptions =
- pfpsd->fp_current_exceptions;
- pfpsd->fp_current_exceptions = 0;
- _fp_unpack(pfpsd, &us3, nrs3, fmau_inst->sz);
- _fp_unpack(pfpsd, &ust, nrd, fmau_inst->sz);
- if (ust.fpclass != fp_quiet &&
- ust.fpclass != fp_signaling)
- ust.sign ^= 1;
- _fp_add(pfpsd, &ust, &us3, &ud);
- /* ensure QSNaN1 has precedence over QNaN3 */
- if ((us3.fpclass == fp_quiet) &&
- ((us1.fpclass == fp_signaling) ||
- (us2.fpclass == fp_signaling)))
- ud = ust;
- _fp_pack(pfpsd, &ud, nrd, fmau_inst->sz);
- }
- FPUINFO_KSTAT_PREC(fmau_inst->sz, fpu_sim_fnumsubs,
- fpu_sim_fnumsubd, fpu_sim_invalid);
- }
} else {
nfcc = nrd & 0x3;
if (inst.op3 == 0x35) { /* fpop2 */
@@ -645,7 +520,7 @@ _fp_fpu_simulator(
*pfsr = fsr;
return (ftt_ieee);
} else { /* Just set accrued exception field. */
- fsr.aexc |= pfpsd->fp_current_exceptions | fmau_mul_exceptions;
+ fsr.aexc |= pfpsd->fp_current_exceptions;
}
*pfsr = fsr;
return (ftt_none);
@@ -697,7 +572,7 @@ fpu_vis_sim(
return (ftt);
} else if ((fp.inst.hibits == 2) &&
((fp.inst.op3 == 0x34) || (fp.inst.op3 == 0x35) ||
- (fp.inst.op3 == 0x37) || (fp.inst.op3 == 0x3f))) {
+ (fp.inst.op3 == 0x37))) {
ftt = _fp_fpu_simulator(pfpsd, fp.inst, pfsr, gsr);
if (ftt == ftt_none || ftt == ftt_ieee) {
pregs->r_pc = pregs->r_npc;
@@ -776,7 +651,7 @@ fp_emulator(
if ((fp.inst.hibits == 2) &&
((fp.inst.op3 == 0x34) || (fp.inst.op3 == 0x35) ||
- (fp.inst.op3 == 0x37) || (fp.inst.op3 == 0x3f))) {
+ (fp.inst.op3 == 0x37))) {
ftt = _fp_fpu_simulator(pfpsd, fp.inst, (fsr_type *)&tfsr, gsr);
/* Do not retry emulated instruction. */
pregs->r_pc = pregs->r_npc;
@@ -816,7 +691,7 @@ again:
return (ftt);
if ((fp.inst.hibits == 2) && /* fpops */
((fp.inst.op3 == 0x34) || (fp.inst.op3 == 0x35) ||
- (fp.inst.op3 == 0x37) || (fp.inst.op3 == 0x3f))) {
+ (fp.inst.op3 == 0x37))) {
ftt = _fp_fpu_simulator(pfpsd, fp.inst, (fsr_type *)&tfsr, gsr);
/* Do not retry emulated instruction. */
pfpu->fpu_fsr = tfsr;
diff --git a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h
index 06c1723db5..0a89f34a0b 100644
--- a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h
+++ b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h
@@ -204,7 +204,7 @@ enum fp_opcode { /* FPU op codes, minus precision and leading 0. */
ft_op_38 = 0x38,
fp_op_39 = 0x39, fp_op_3a = 0x3a, fp_op_3b = 0x3b,
fp_op_3c = 0x3c,
- fp_op_3d = 0x3d, fp_op_3e = 0x3e, fmau = 0x3f
+ fp_op_3d = 0x3d, fp_op_3e = 0x3e, fp_op_3f = 0x3f
};
typedef /* FPU instruction. */
@@ -219,14 +219,14 @@ typedef /* FPU instruction. */
uint32_t rs2 : 5; /* Second operand. */
} fp_inst_type;
-enum fp_op_fma_var { /* FMA-fused/unfused instr. variations */
+enum fp_op_fma_var { /* IMPDEP2B FMA-fused instr. variations */
fmadd = 0,
fmsub = 1,
fnmsub = 2,
fnmadd = 3
};
-typedef /* FPU FMA-fused/unfused instructions. */
+typedef /* IMPDEP2B FPU FMA-fused instruction. */
struct {
uint32_t hibits : 2; /* Top two bits. */
uint32_t rd : 5; /* Destination. */
@@ -330,14 +330,6 @@ struct fpuinfo_kstat {
struct kstat_named fpu_sim_fnmaddd;
struct kstat_named fpu_sim_fnmsubs;
struct kstat_named fpu_sim_fnmsubd;
- struct kstat_named fpu_sim_fumadds;
- struct kstat_named fpu_sim_fumaddd;
- struct kstat_named fpu_sim_fumsubs;
- struct kstat_named fpu_sim_fumsubd;
- struct kstat_named fpu_sim_fnumadds;
- struct kstat_named fpu_sim_fnumaddd;
- struct kstat_named fpu_sim_fnumsubs;
- struct kstat_named fpu_sim_fnumsubd;
struct kstat_named fpu_sim_invalid;
};
diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c
index cd08c002ed..e90fcd15dd 100644
--- a/usr/src/uts/sun4/os/startup.c
+++ b/usr/src/uts/sun4/os/startup.c
@@ -896,7 +896,7 @@ install_kmem64_tte()
PRM_DEBUG(kmem64_pabase);
PRM_DEBUG(kmem64_szc);
sfmmu_memtte(&tte, kmem64_pabase >> MMU_PAGESHIFT,
- PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC, kmem64_szc);
+ PROC_DATA | HAT_NOSYNC, kmem64_szc);
PRM_DEBUG(tte.ll);
(void) sprintf(b, kmem64_obp_str,
kmem64_base, kmem64_end, TTE_PAGEMASK(kmem64_szc), tte.ll);
@@ -2850,8 +2850,6 @@ char obp_tte_str[] =
"h# %p constant KCONTEXT "
"h# %p constant KHATID "
"h# %x constant ASI_MEM "
- "h# %x constant SOFTEXEC "
- "h# %x constant EXECPRM "
": PHYS-X@ ( phys -- data ) "
" ASI_MEM spacex@ "
@@ -2954,11 +2952,7 @@ char obp_tte_str[] =
" ?dup if ( addr sfmmup hmeblkp ) "
" nip swap HBLK_TO_TTEP ( ttep ) "
" dup TTE_IS_VALID if ( valid-ttep ) "
- " PHYS-X@ ( tte-data ) "
- " dup SOFTEXEC and 0> if ( tte-data ) "
- " SOFTEXEC - EXECPRM or ( tte-data ) "
- " then ( tte-data ) "
- " true ( tte-data true ) "
+ " PHYS-X@ true ( tte-data true ) "
" else ( invalid-tte ) "
" drop false ( false ) "
" then ( false | tte-data true ) "
@@ -3009,9 +3003,7 @@ create_va_to_tte(void)
KHMEHASH_SZ,
KCONTEXT,
KHATID,
- ASI_MEM,
- icache_is_coherent ? 0 : TTE_SOFTEXEC_INT,
- TTE_EXECPRM_INT);
+ ASI_MEM);
prom_interpret(bp, 0, 0, 0, 0, 0);
kobj_free(bp, MMU_PAGESIZE);
diff --git a/usr/src/uts/sun4/vm/sfmmu.c b/usr/src/uts/sun4/vm/sfmmu.c
index 6d6a75319d..464bf06839 100644
--- a/usr/src/uts/sun4/vm/sfmmu.c
+++ b/usr/src/uts/sun4/vm/sfmmu.c
@@ -199,10 +199,6 @@ hat_kern_setup(void)
sfmmu_patch_shctx();
}
- if (&mmu_enable_pgsz_search) {
- mmu_enable_pgsz_search();
- }
-
/*
* The 8K-indexed kernel TSB space is used to hold
* translations below...
diff --git a/usr/src/uts/sun4/vm/vm_dep.h b/usr/src/uts/sun4/vm/vm_dep.h
index be1dc0cba3..6750585419 100644
--- a/usr/src/uts/sun4/vm/vm_dep.h
+++ b/usr/src/uts/sun4/vm/vm_dep.h
@@ -859,16 +859,6 @@ extern void *ndata_extra_base(struct memlist *, size_t, caddr_t);
extern size_t ndata_maxsize(struct memlist *);
extern size_t ndata_spare(struct memlist *, size_t, size_t);
-/*
- * Platform specific support for non-coherent I-cache and soft exec
- */
-extern uint_t icache_is_coherent;
-extern uint_t force_sync_icache_after_bcopy;
-extern uint_t force_sync_icache_after_dma;
-
-extern void mach_setup_icache(uint_t);
-#pragma weak mach_setup_icache
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/sun4u/sys/pte.h b/usr/src/uts/sun4u/sys/pte.h
index 77d2fa207e..34e3698b40 100644
--- a/usr/src/uts/sun4u/sys/pte.h
+++ b/usr/src/uts/sun4u/sys/pte.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -354,23 +354,6 @@ typedef union {
#endif /* !_ASM */
-/*
- * There is no support for non-coherent I-cache in sun4u
- */
-#define TTE_SOFTEXEC_INT 0x00000000
-#ifndef _ASM
-#ifdef lint
-/* fix lint warnings about constant conditionals and empty if */
-#define TTE_IS_SOFTEXEC(ttep) TTE_IS_EXECUTABLE(ttep)
-#define TTE_SET_SOFTEXEC(ttep) TTE_SET_EXEC(ttep)
-#define TTE_CLR_SOFTEXEC(ttep) TTE_CLR_EXEC(ttep)
-#else
-#define TTE_IS_SOFTEXEC(ttep) (0)
-#define TTE_SET_SOFTEXEC(ttep)
-#define TTE_CLR_SOFTEXEC(ttep)
-#endif /* lint */
-#endif /* !_ASM */
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/sun4u/vm/mach_sfmmu.h b/usr/src/uts/sun4u/vm/mach_sfmmu.h
index ed2b4f1c79..5eb7ae3f38 100644
--- a/usr/src/uts/sun4u/vm/mach_sfmmu.h
+++ b/usr/src/uts/sun4u/vm/mach_sfmmu.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -112,12 +112,6 @@ label/**/3:
/* END CSTYLED */
/*
- * This macro is to control the pagesizes used for shared context on
- * Rock systems.
- */
-#define CHECK_SHARED_PGSZ(tsbarea, tte, tmp, use_shctx, label)
-
-/*
* This macro is used in the MMU code to check if TL should be lowered from
* 2 to 1 to pop trapstat's state. See the block comment in trapstat.c
* for details.
@@ -267,12 +261,6 @@ label/**/3:
/* CSTYLED */ \
label/**/1:
-/*
- * No support for non-coherent I-cache in sun4u
- */
-#define TTE_SET_EXEC_ML(tte, ttepa, tmp1, label)
-#define TTE_CLR_SOFTEXEC_ML(tte)
-#define TTE_CHK_SOFTEXEC_ML(tte) andcc tte, 0, %g0
/*
* TTE_SET_REF_ML is a macro that updates the reference bit if it is
diff --git a/usr/src/uts/sun4v/Makefile.files b/usr/src/uts/sun4v/Makefile.files
index 48fdc723c1..93e3f9bfba 100644
--- a/usr/src/uts/sun4v/Makefile.files
+++ b/usr/src/uts/sun4v/Makefile.files
@@ -179,7 +179,6 @@ SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o
#
NI_PCBE_OBJS = niagara_pcbe.o
N2_PCBE_OBJS = niagara2_pcbe.o
-RK_PCBE_OBJS = rock_pcbe.o
#
# cpu modules
@@ -190,7 +189,6 @@ NIAGARACPU_OBJS = niagara.o niagara_copy.o common_asm.o niagara_perfctr.o
NIAGARACPU_OBJS += niagara_asm.o atomic.o
NIAGARA2CPU_OBJS = niagara2.o niagara_copy.o common_asm.o niagara_perfctr.o
NIAGARA2CPU_OBJS += niagara2_asm.o atomic.o
-ROCKCPU_OBJS = rock.o rock_copy.o common_asm.o rock_asm.o atomic.o
#
# platform module
diff --git a/usr/src/uts/sun4v/Makefile.sun4v.shared b/usr/src/uts/sun4v/Makefile.sun4v.shared
index ca0483862b..8796b1a67b 100644
--- a/usr/src/uts/sun4v/Makefile.sun4v.shared
+++ b/usr/src/uts/sun4v/Makefile.sun4v.shared
@@ -433,9 +433,9 @@ XMODS +=
#
# cpu modules
#
-CPU_KMODS += generic niagara niagara2 vfalls rock
+CPU_KMODS += generic niagara niagara2 vfalls
-LINT_CPU_KMODS += generic rock
+LINT_CPU_KMODS += generic
#
# Performance Counter BackEnd Modules (/usr/kernel/pcbe):
@@ -443,4 +443,3 @@ LINT_CPU_KMODS += generic rock
PCBE_KMODS += niagara_pcbe
PCBE_KMODS += niagara2_pcbe
PCBE_KMODS += vfalls_pcbe
-PCBE_KMODS += rock_pcbe
diff --git a/usr/src/uts/sun4v/cpu/rock.c b/usr/src/uts/sun4v/cpu/rock.c
deleted file mode 100644
index b5192fab57..0000000000
--- a/usr/src/uts/sun4v/cpu/rock.c
+++ /dev/null
@@ -1,1014 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <sys/archsystm.h>
-#include <sys/machparam.h>
-#include <sys/machsystm.h>
-#include <sys/cpu.h>
-#include <sys/elf_SPARC.h>
-#include <vm/page.h>
-#include <vm/vm_dep.h>
-#include <sys/cpuvar.h>
-#include <sys/async.h>
-#include <sys/cmn_err.h>
-#include <sys/debug.h>
-#include <sys/dditypes.h>
-#include <sys/sunddi.h>
-#include <sys/cpu_module.h>
-#include <sys/prom_debug.h>
-#include <sys/vmsystm.h>
-#include <sys/prom_plat.h>
-#include <sys/sysmacros.h>
-#include <sys/intreg.h>
-#include <sys/machtrap.h>
-#include <sys/ontrap.h>
-#include <sys/ivintr.h>
-#include <sys/atomic.h>
-#include <sys/panic.h>
-#include <sys/dtrace.h>
-#include <vm/seg_spt.h>
-#include <sys/hypervisor_api.h>
-#include <sys/rock_hypervisor_api.h>
-#include <sys/hsvc.h>
-#include <vm/hat_sfmmu.h>
-#include <sys/mutex_impl.h>
-
-uint_t root_phys_addr_lo_mask = 0xffffffffU;
-uint8_t enable_tm = 1;
-
-char cpu_module_name[] = "SUNW,UltraSPARC-AT10";
-boolean_t hsvc_tm_available = B_TRUE;
-
-static hsvc_info_t rock_tm_hsvc = {
- HSVC_REV_1, /* HSVC rev num */
- NULL, /* Private */
- HSVC_GROUP_TM, /* Requested API Group */
- ROCK_HSVC_MAJOR, /* Requested Major */
- ROCK_HSVC_MINOR, /* Requested Minor */
- cpu_module_name /* Module name */
-};
-
-boolean_t hsvc_mmu_ext_available = B_TRUE;
-
-static hsvc_info_t rock_mmu_ext_hsvc = {
- HSVC_REV_1, /* HSVC rev num */
- NULL, /* Private */
- HSVC_GROUP_RKMMU_EXT, /* Requested API Group */
- ROCK_HSVC_MAJOR, /* Requested Major */
- ROCK_HSVC_MINOR, /* Requested Minor */
- cpu_module_name /* Module name */
-};
-
-static void encode_pgsz_order(uint64_t, int, int, uint16_t *, uchar_t *);
-static void set_pgsz_order(uchar_t, uchar_t, uint64_t *, int *, int *,
- sfmmu_t *);
-
-extern void rock_mutex_delay(void);
-
-/*
- * External /etc/system tunable, for controlling whether shared or private pages
- * come first in the pagesize order register.
- */
-int pgsz_order_shared_first = 1;
-
-#define MCOREID_MASK 0x1E
-#define MCOREID_SHIFT 1
-
-static uint_t mmu_disable_large_pages = ((1 << TTE512K) | (1 << TTE32M) |
- (1 << TTE2G) | (1 << TTE16G));
-static uint_t mmu_disable_ism_large_pages = ((1 << TTE512K) | (1 << TTE32M) |
- (1 << TTE2G) | (1 << TTE16G));
-static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE512K) |
- (1 << TTE32M) | (1 << TTE2G) | (1 << TTE16G));
-static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE512K) |
- (1 << TTE32M) | (1 << TTE2G) | (1 << TTE16G));
-
-void
-cpu_setup(void)
-{
- extern int cpc_has_overflow_intr;
- uint64_t sup_minor;
- int status;
-
- /*
- * The setup common to all CPU modules is done in cpu_setup_common
- * routine.
- */
- cpu_setup_common(NULL);
-
- /*
- * Rock's max nctxs is 64K. Set it accordingly.
- */
- nctxs = MAX_NCTXS;
-
- /*
- * Rock I$ is non-coherent.
- */
- mach_setup_icache(0);
-
-#ifdef DEBUG
- /*
- * These should always be present on Rock
- */
- if (cpu_hwcap_flags == 0)
- cmn_err(CE_WARN, "hwcap-list missing from MD");
-#endif
- cpu_hwcap_flags |= AV_SPARC_ASI_CACHE_SPARING;
-
- cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
-
- if (use_page_coloring) {
- do_pg_coloring = 1;
- }
-
- /*
- * Rock generates hpriv performance event trap instead of pic overflow
- * trap. To get the attention of the guest hv in-turn generates pic
- * overflow trap. Therefore enable support for that.
- */
- cpc_has_overflow_intr = 1;
-
- /*
- * Enable 4M pages for OOB.
- */
- max_uheap_lpsize = MMU_PAGESIZE4M;
- max_ustack_lpsize = MMU_PAGESIZE4M;
- max_privmap_lpsize = MMU_PAGESIZE4M;
-
- /*
- * hv_tm_enable is a part of TM group. We need to
- * negotiate that API group before we can use it.
- */
- status = hsvc_register(&rock_tm_hsvc, &sup_minor);
- if ((status != 0) || (sup_minor < (uint64_t)ROCK_HSVC_MINOR)) {
- cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: "
- "major: 0x%lx minor: 0x%lx group: 0x%x errno: %d",
- cpu_module_name, rock_tm_hsvc.hsvc_major,
- rock_tm_hsvc.hsvc_minor, HSVC_GROUP_TM, status);
- hsvc_tm_available = B_FALSE;
- }
-
- /*
- * Negotiate API group for rock mmu extensions.
- */
- status = hsvc_register(&rock_mmu_ext_hsvc, &sup_minor);
- if ((status != 0) || (sup_minor <
- (uint64_t)ROCK_HSVC_MINOR)) {
- cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: "
- "major: 0x%lx minor: 0x%lx group: 0x%x errno: %d",
- cpu_module_name, rock_mmu_ext_hsvc.hsvc_major,
- rock_mmu_ext_hsvc.hsvc_minor, HSVC_GROUP_RKMMU_EXT,
- status);
- hsvc_mmu_ext_available = B_FALSE;
- }
-}
-
-/*
- * Set the magic constants of the implementation.
- */
-void
-cpu_fiximp(struct cpu_node *cpunode)
-{
- /*
- * The Cache node is optional in MD. Therefore in case it
- * does not exist, use hardcoded values.
- */
-#ifdef DEBUG
- /*
- * ...that said, we do want this info to come from the MD.
- */
- if (cpunode->ecache_size == 0 || cpunode->ecache_linesize == 0 ||
- cpunode->ecache_associativity == 0) {
- cmn_err(CE_WARN, "ecache info missing from MD");
- }
-#endif
- if (cpunode->ecache_size == 0)
- cpunode->ecache_size = 2 * 1024 * 1024;
- if (cpunode->ecache_linesize == 0)
- cpunode->ecache_linesize = 64;
- if (cpunode->ecache_associativity == 0)
- cpunode->ecache_associativity = 8;
-}
-
-void
-dtrace_flush_sec(uintptr_t addr)
-{
- pfn_t pfn;
- proc_t *procp = ttoproc(curthread);
- page_t *pp;
- caddr_t va;
-
- pfn = hat_getpfnum(procp->p_as->a_hat, (void *)addr);
- if (pfn != -1) {
- ASSERT(pf_is_memory(pfn));
- pp = page_numtopp_noreclaim(pfn, SE_SHARED);
- if (pp != NULL) {
- va = ppmapin(pp, PROT_READ | PROT_WRITE, (void *)addr);
- /* sparc needs 8-byte align */
- doflush((caddr_t)((uintptr_t)va & -8l));
- ppmapout(va);
- page_unlock(pp);
- }
- }
-}
-
-void
-cpu_map_exec_units(struct cpu *cp)
-{
- ASSERT(MUTEX_HELD(&cpu_lock));
-
- /*
- * The cpu_ipipe and cpu_fpu fields are initialized based on
- * the execution unit sharing information from the MD. They
- * default to the CPU id in the absence of such information.
- */
- cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
- if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
- cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
-
- cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
- if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
- cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
-
- cp->cpu_m.cpu_core = (cp->cpu_id & MCOREID_MASK) >> MCOREID_SHIFT;
-
- /*
- * The cpu_chip field is initialized based on the information
- * in the MD and assume that all cpus within a chip
- * share the same L2 cache. If no such info is available, we
- * set the cpu to CPU_CHIPID_INVALID.
- */
- cp->cpu_m.cpu_mpipe = cpunodes[cp->cpu_id].l2_cache_mapping;
- if (cp->cpu_m.cpu_mpipe == NO_L2_CACHE_MAPPING_FOUND)
- cp->cpu_m.cpu_mpipe = CPU_L2_CACHEID_INVALID;
-
- cp->cpu_m.cpu_chip = cpunodes[cp->cpu_id].l2_cache_mapping;
- if (cp->cpu_m.cpu_chip == NO_L2_CACHE_MAPPING_FOUND)
- cp->cpu_m.cpu_chip = CPU_CHIPID_INVALID;
-}
-
-void
-cpu_init_private(struct cpu *cp)
-{
- cpu_map_exec_units(cp);
- mutex_delay = rock_mutex_delay;
-}
-
-/*ARGSUSED*/
-void
-cpu_uninit_private(struct cpu *cp)
-{
-}
-
-/*
- * cpu_feature_init
- *
- * This function is called once per strand.
- */
-void
-cpu_feature_init(void)
-{
- static int set_mutex_backoff_tunables = 0;
- /*
- * Set constants for mutex_backoff only once.
- * On Rock, setting this to 8 gives the best performance,
- * even for multi-chip systems.
- */
- if (! set_mutex_backoff_tunables) {
- mutex_backoff_base = 1;
- mutex_cap_factor = 8;
- set_mutex_backoff_tunables = 1;
- }
-
- /*
- * Enable or disable for each cpu if hypervisor API is negotiated.
- */
- if (hsvc_tm_available == B_TRUE)
- (void) hv_tm_enable((uint64_t)enable_tm);
-}
-
-/*
- * Flush specified address range from I$ via hv_mem_iflush interface
- * Note that the hypervisor interface expects physical address range
- * and can flush less than the requested size.
- */
-
-void
-rock_sync_icache(caddr_t addr, size_t size)
-{
- uint64_t pa, i, flushlen, flushed;
-
- if (!force_sync_icache_after_bcopy)
- /*
- * Do not clear the I-cache after bcopy.
- * The default value is 0. This flag made be
- * set via /etc/system.
- */
- return;
-
- if (!tba_taken_over)
- /*
- * Very early in boot, va_to_pa() will try to call back
- * into OBP. Very *very* early in boot, this will fail
- * because we haven't set up the OBP callback handler.
- * (Without this check, kmdb boot will fail.)
- */
- return;
-
- for (i = 0; i < size; i += flushed) {
- pa = va_to_pa(addr + i);
- ASSERT(pa != -1);
-
- /*
- * Only flush the required length up to a PAGESIZE.
- */
-
- flushlen = MIN((size - i), (PAGESIZE - (pa & MMU_PAGEOFFSET)));
-
- /*
- * Flush I$ up to the page bounday. This call should never
- * fail. If it does, we panic the system as I$ may contain
- * stale instructions, which can result in silent data
- * corruption.
- */
-
- if (hv_mem_iflush(pa, flushlen, &flushed) != H_EOK) {
- cmn_err(CE_PANIC, "Flushing the Icache failed");
- }
-
- }
-}
-
-/*
- * There are no Hypervisor trapstat(1m) interfaces for Rock
- * If trapstat(1m) wants to do its thing, it will have to
- * take over all TLB miss handling.
- */
-int
-cpu_trapstat_conf(int cmd)
-{
- int status;
-
- switch (cmd) {
- case CPU_TSTATCONF_INIT:
- case CPU_TSTATCONF_FINI:
- case CPU_TSTATCONF_ENABLE:
- case CPU_TSTATCONF_DISABLE:
- status = ENOTSUP;
- break;
- default:
- status = EINVAL;
- break;
- }
- return (status);
-}
-
-/*ARGSUSED*/
-void
-cpu_trapstat_data(void *buf, uint_t tstat_pgszs)
-{
-}
-
-#define MAX_PAGE_COLORS (1 << MAX_PAGE_COLORS_SHIFT)
-#define MAX_PAGE_COLORS_SHIFT (5)
-
-/*ARGSUSED*/
-uint_t
-page_pfn_2_color_cpu(pfn_t pfn, uchar_t szc, void *cookie)
-{
- uint_t color;
-
- pfn = PFN_BASE(pfn, szc);
- color = pfn ^ (pfn >> 20);
- color = color ^ (color >> 10);
- return ((color ^ (color >> 5)) & 0x1f);
-}
-
-/*
- * this macro rotates value "x" n steps to the right
- * mask consists of "n + m" bits
- * ASSERT(x < (1 << (n + m));
- */
-#define ROTATE_BITS(x, n, m) (((x) >> (n)) | (((x) & ((1 << (n)) - 1)) << m))
-
-
-uchar_t clr2sqnclr_table[MMU_PAGE_SIZES][MAX_PAGE_COLORS];
-
-/*
- * on Rock, the hash cache index is calculated as follows:
- * pa[47:43]^pa[42:38]^pa[37:33]^pa[32:28]^
- * pa[27:23]^pa[22:18]^pa[17:13].pa[12:6]
- * That is, every 5 bits is folded and XORd together. Page sizes
- * differ by 3 bits, which is a factor of 8. This function computes
- * the next sequential color by rotating by 3 steps within a field of 5 bits
- * for every page size.
- */
-void
-clr2sqnclr_table_init()
-{
- uchar_t szc;
- uint_t color;
- uint_t rot = 0;
-
- for (szc = 0; szc < MMU_PAGE_SIZES; szc++) {
- rot = (szc * 3) % MAX_PAGE_COLORS_SHIFT;
- for (color = 0; color < MAX_PAGE_COLORS; color++) {
- clr2sqnclr_table[szc][color] =
- ROTATE_BITS(color, rot,
- (MAX_PAGE_COLORS_SHIFT - rot));
- }
- }
-}
-
-uint_t
-clr2sqnclr(uchar_t szc, uint_t color)
-{
- ASSERT(szc < MMU_PAGE_SIZES);
- ASSERT(color < MAX_PAGE_COLORS);
-
- return (clr2sqnclr_table[szc][color]);
-}
-
-#if MMU_PAGE_SIZES > 8
-#error MMU_PAGE_SIZES can be at most 8
-#endif
-
-uint_t
-page_get_nsz_color_mask_cpu(uchar_t szc, uint_t mask)
-{
- static uint_t rock_color_masks[7] = {0x18, 6, 0x11, 0xc, 3, 0x18, 6};
-
- ASSERT(szc < MMU_PAGE_SIZES - 1);
- return (mask & rock_color_masks[szc]);
-}
-
-/*ARGSUSED*/
-uint_t
-page_get_nsz_color_cpu(uchar_t szc, uint_t color)
-{
- return (color);
-}
-
-uint_t
-page_get_color_shift_cpu(uchar_t szc, uchar_t nszc)
-{
- ASSERT(nszc >= szc);
- return (0);
-}
-
-/*ARGSUSED*/
-pfn_t
-page_next_pfn_for_color_cpu(pfn_t pfn, uchar_t szc, uint_t color,
- uint_t ceq_mask, uint_t color_mask, void *cookie)
-{
- uint_t sqn_ceq_mask = clr2sqnclr(szc, ceq_mask);
- uint_t sqn_color = clr2sqnclr(szc, color);
- uint_t pfn_shift = PNUM_SHIFT(szc);
- pfn_t cpfn, npfn, base_pfn = pfn & (~(pfn_t)color_mask << pfn_shift);
- uint_t base_sqn_color, nsqn_color, wrap = 0;
-
- ASSERT((color & ~ceq_mask) == 0);
-
- base_sqn_color = clr2sqnclr(szc,
- page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color;
- nsqn_color = base_sqn_color;
-
- cpfn = (pfn_t)-1L;
- do {
- npfn = base_pfn | (nsqn_color << pfn_shift);
-
- ASSERT(((page_pfn_2_color_cpu(npfn, szc, NULL) ^ color) &
- ceq_mask) == 0);
-
- if (npfn > pfn && npfn < cpfn)
- cpfn = npfn;
-
- nsqn_color = INC_MASKED(nsqn_color, sqn_ceq_mask, color_mask);
- if (nsqn_color != base_sqn_color)
- continue;
-
- if (cpfn != (pfn_t)-1L)
- break;
-
- base_pfn += ((pfn_t)color_mask + 1) << pfn_shift;
-
- base_sqn_color = clr2sqnclr(szc,
- page_pfn_2_color_cpu(base_pfn, szc, NULL)) ^ sqn_color;
- nsqn_color = base_sqn_color;
- wrap++;
-
- } while (nsqn_color != base_sqn_color || wrap < 2);
-
- ASSERT(cpfn != (pfn_t)-1L);
-
- return (cpfn);
-}
-
-void
-page_coloring_init_cpu()
-{
- int i;
- uint_t colors = 1 << MAX_PAGE_COLORS_SHIFT;
-
- for (i = 0; i < mmu_page_sizes; i++) {
- hw_page_array[i].hp_colors = colors;
- }
-
- /*
- * initialise conversion table between page colors and
- * sequential colors
- */
- clr2sqnclr_table_init();
-
-}
-
-/*
- * group colorequiv colors on Rock by low order bits of the color first
- */
-void
-page_set_colorequiv_arr_cpu(void)
-{
- static uint_t nequiv_shades_log2[MMU_PAGE_SIZES] = {0, 3, 0, 0, 0, 0};
-
- if (colorequiv > 1) {
- int i;
- uint_t sv_a = lowbit(colorequiv) - 1;
-
- if (sv_a > 15)
- sv_a = 15;
-
- for (i = 0; i < MMU_PAGE_SIZES; i++) {
- uint_t colors;
- uint_t a = sv_a;
-
- if ((colors = hw_page_array[i].hp_colors) <= 1)
- continue;
- while ((colors >> a) == 0)
- a--;
- if (a > (colorequivszc[i] & 0xf) +
- (colorequivszc[i] >> 4)) {
- if (a <= nequiv_shades_log2[i]) {
- colorequivszc[i] = (uchar_t)a;
- } else {
- colorequivszc[i] =
- ((a - nequiv_shades_log2[i]) << 4) |
- nequiv_shades_log2[i];
- }
- }
- }
- }
-}
-
-/*
- * Calculate the page sizes needed to program Rock TLB page size register.
- * The invctx parameter is a flag which indicates that it will be necessary to
- * synchronize by invalidating contexts if the sfmmu pagesize register is
- * updated.
- */
-void
-mmu_set_pgsz_order(sfmmu_t *sfmmup, int invctx)
-{
- uchar_t private_pgsz_mask;
- uchar_t shared_pgsz_mask;
- uint16_t pgsz_order_hv[MAX_PGSZ_SEARCH_ORDER];
- uint64_t pgsz_order = 0;
- uchar_t pgsz_map = 0;
- int private_pgsz_num = 0;
- int shared_pgsz_num = 0;
- int tot_pgsz_num;
- sf_scd_t *scdp;
- int ret;
- int i;
-
- /*
- * The hatlock must be held in all cases except when the sfmmu is
- * being initialized by hat_alloc() or we are calling hat_dup(), in
- * these cases no other thread will be using the sfmmu yet.
- */
-
- ASSERT(!invctx || sfmmu_hat_lock_held(sfmmup));
-
- if (pgsz_search_on == 0)
- return;
-
- /* Always enable 8K private mappings */
- private_pgsz_mask = 1 << TTE8K;
-
- /* Enable 64K private mappings unless specifically disabled */
- if (!(disable_large_pages & (1 << TTE64K))) {
- private_pgsz_mask |= 1 << TTE64K;
- }
-
- /*
- * First check for ISM segments not in an SCD. The algorithm for
- * creating an SCD is to create one when an (D)ISM segment is attached
- * unless the process's shared segments are a subset of an SCD which
- * already exists.
- *
- * This situation also arises when we attach to more than the maximum
- * number of (D)ISM segments defined in the region bit map
- * (currently 64).
- *
- * We have set mmu_disable_ism_large_pages to force ISM segments to use
- * only 4M and 256M pages.
- */
- if (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMNOTINSCD)) {
- private_pgsz_mask |= 1 << TTE4M;
- if (SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM)) {
- private_pgsz_mask |= 1 << TTE256M;
- }
- }
-
- /* Now check for regions not included in the SCD. */
- if ((scdp = sfmmup->sfmmu_scdp) != NULL) {
- SF_RGNMAP_EQUAL(&scdp->scd_hmeregion_map,
- &sfmmup->sfmmu_hmeregion_map,
- SFMMU_HMERGNMAP_WORDS, ret);
- if (!ret) {
- private_pgsz_mask |= sfmmup->sfmmu_rtteflags;
- }
- } else {
- private_pgsz_mask |= sfmmup->sfmmu_rtteflags;
- }
-
- private_pgsz_mask |= sfmmup->sfmmu_tteflags;
-
- /*
- * If the process is part of an SCD then enable 4M and 256M shared
- * page sizes - unless these are specifically disabled. If the 4M
- * shared page size is specifically disabled and the process has (D)ISM
- * segments attached or 4M regions then enable the private 4M page size.
- * If the 256M shared page size is disabled and the process has a 256M
- * page size region then enable the 256M private page size. The trap
- * handler looks at the shared page sizes enabled and if a shared
- * mapping does not correspond to one these sizes then it is treated
- * as a private mapping.
- *
- * The SCD includes the process's main text segment and (D)ISM segments
- * but we only enable the 4M shared page size so an 8K main text
- * segment will be treated as private due to the trap handler support.
- *
- * Note that for simplicity the ordering of the shared page sizes is
- * hard coded.
- */
- shared_pgsz_mask = 0;
- if (sfmmup->sfmmu_scdp != NULL) {
- if (!(disable_shctx_large_pages & (1 << TTE4M))) {
- shared_pgsz_mask |= 1 << TTE4M;
- } else if (sfmmup->sfmmu_iblk != NULL ||
- (sfmmup->sfmmu_rtteflags &
- (1 << TTE4M))) {
- private_pgsz_mask |= 1 << TTE4M;
- }
-
- if (SFMMU_FLAGS_ISSET(sfmmup, HAT_256M_ISM) ||
- (sfmmup->sfmmu_rtteflags & (1 << TTE256M))) {
- if (!(disable_shctx_large_pages & (1 << TTE256M))) {
- shared_pgsz_mask |= 1 << TTE256M;
- } else {
- private_pgsz_mask |= 1 << TTE256M;
- }
- }
- }
-
- set_pgsz_order(private_pgsz_mask, shared_pgsz_mask, &pgsz_order,
- &private_pgsz_num, &shared_pgsz_num, sfmmup);
-
- encode_pgsz_order(pgsz_order, private_pgsz_num, shared_pgsz_num,
- pgsz_order_hv, &pgsz_map);
-
- tot_pgsz_num = private_pgsz_num + shared_pgsz_num;
- ASSERT(tot_pgsz_num <= MAX_PGSZ_SEARCH_ORDER);
-
- for (i = 0; i < tot_pgsz_num; i++) {
- if (pgsz_order_hv[i] != sfmmup->sfmmu_pgsz_order_hv[i])
- break;
- }
-
- /*
- * If either we've reached the maximum number of page sizes or the
- * next element is 0, indicating the end of the list, then both the
- * entries and their number in both arrays is the same and we return.
- */
- if ((i == tot_pgsz_num) && (i == MAX_PGSZ_SEARCH_ORDER ||
- sfmmup->sfmmu_pgsz_order_hv[i] == 0)) {
- ASSERT(pgsz_map == sfmmup->sfmmu_pgsz_map);
- return;
- }
-
- /* Otherwise update the sw page size register setting */
- if (invctx) {
- sfmmu_invalidate_ctx(sfmmup);
- }
-
- for (i = 0; i < tot_pgsz_num; i++) {
- sfmmup->sfmmu_pgsz_order_hv[i] = pgsz_order_hv[i];
- }
-
- /* Disable next entry in search list to mark the end */
- if (i < MAX_PGSZ_SEARCH_ORDER) {
- sfmmup->sfmmu_pgsz_order_hv[i] = 0;
- }
- sfmmup->sfmmu_pgsz_map = pgsz_map;
-}
-
-/*
- * Encode the Rock TLB page size register.
- *
- * Input:
- * pgsz_order, ordered list of page sizes, private and shared, the order
- * between these depends on the pgsz_order_shared_first config variable.
- * private_pgsz_num, number of private page sizes.
- * shared_pgsz_num, number of shared page sizes.
- * Output:
- * pgsz_order_hv contains the encoded pagesize search order for the hv
- * pgsz_map field contains the page size bit map used by the trap
- * handler to prevent unauthorized shared page sizes being used.
- */
-
-static void
-encode_pgsz_order(uint64_t pgsz_order, int private_pgsz_num,
- int shared_pgsz_num, uint16_t *pgsz_order_hv, uchar_t *pgsz_map)
-{
- int i;
- int tot_pgsz_num;
- uint16_t pgsz_entry;
- uint16_t first_entry_mask, second_entry_mask;
- int first_pgsz_num;
-
- ASSERT(private_pgsz_num < MMU_PAGE_SIZES);
- ASSERT(shared_pgsz_num < MMU_PAGE_SIZES);
- ASSERT(private_pgsz_num > 0);
-
- if (pgsz_order_shared_first) {
- first_entry_mask = TLB_PGSZ_CONTEXT1_ENABLE;
- second_entry_mask = TLB_PGSZ_ENABLE;
- first_pgsz_num = shared_pgsz_num;
- } else {
- first_entry_mask = TLB_PGSZ_ENABLE;
- second_entry_mask = TLB_PGSZ_CONTEXT1_ENABLE;
- first_pgsz_num = private_pgsz_num;
- }
-
- tot_pgsz_num = private_pgsz_num + shared_pgsz_num;
- for (i = 0; i < tot_pgsz_num; i++) {
- pgsz_entry = pgsz_order & TTE_SZ_BITS;
- if (i < first_pgsz_num) {
- if (pgsz_order_shared_first) {
- *pgsz_map |= (1 << pgsz_entry);
- }
- pgsz_entry |= first_entry_mask;
- } else {
- if (!pgsz_order_shared_first) {
- *pgsz_map |= (1 << pgsz_entry);
- }
- pgsz_entry |= second_entry_mask;
- }
- pgsz_order >>= 4;
- pgsz_order_hv[i] = pgsz_entry;
- }
-}
-
-/*
- * The function returns the mmu-specific values for the
- * hat's disable_large_pages, disable_ism_large_pages, and
- * disable_auto_data_large_pages and
- * disable_text_data_large_pages variables.
- */
-uint_t
-mmu_large_pages_disabled(uint_t flag)
-{
- uint_t pages_disable = 0;
-
- if (flag == HAT_LOAD) {
- pages_disable = mmu_disable_large_pages;
- } else if (flag == HAT_LOAD_SHARE) {
- pages_disable = mmu_disable_ism_large_pages;
- } else if (flag == HAT_AUTO_DATA) {
- pages_disable = mmu_disable_auto_data_large_pages;
- } else if (flag == HAT_AUTO_TEXT) {
- pages_disable = mmu_disable_auto_text_large_pages;
- }
- return (pages_disable);
-}
-
-/*
- * Uses private and shared page size bitmaps to produce an ordered list
- * of page sizes and counts to be passed to encode_pgsz_order().
- *
- * Input:
- * private_pgsz_mask, bit map of private page sizes.
- * shared_pgsz_mask, bit map of private page sizes.
- * sfmmup, pointer to hat structure.
- *
- * Output:
- * pgsz_order, ordered list of page sizes.
- * private_pgsz_num, number of private page sizes in pgsz_order.
- * shared_pgsz_num, number of shared page sizes in pgsz_order.
- */
-static void
-set_pgsz_order(uchar_t private_pgsz_mask, uchar_t shared_pgsz_mask,
- uint64_t *pgsz_order, int *private_pgsz_num, int *shared_pgsz_num,
- sfmmu_t *sfmmup)
-{
- int64_t sortcnt[MMU_PAGE_SIZES];
- int8_t tmp_pgsz[MMU_PAGE_SIZES];
- ulong_t tmp;
- uint8_t i, j, max;
-
- *private_pgsz_num = 0;
- *shared_pgsz_num = 0;
- *pgsz_order = 0;
-
- /* Sort pages by area mapped */
- for (i = 0; i < mmu_page_sizes; i++) {
- tmp = sfmmup->sfmmu_ttecnt[i] + sfmmup->sfmmu_ismttecnt[i];
- sortcnt[i] = tmp << TTE_PAGE_SHIFT(i);
- }
-
- for (j = 0; j < mmu_page_sizes; j++) {
- for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
- if (sortcnt[i] > sortcnt[max])
- max = i;
- }
- tmp_pgsz[j] = max;
- sortcnt[max] = -1;
- }
-
- /* Add shared page sizes to page order if these come first */
- if (pgsz_order_shared_first) {
- if (shared_pgsz_mask & (1 << TTE256M)) {
- *pgsz_order = TTE256M;
- (*shared_pgsz_num)++;
- }
- if (shared_pgsz_mask & (1 << TTE4M)) {
- *pgsz_order |= (TTE4M << (*shared_pgsz_num * 4));
- (*shared_pgsz_num)++;
- }
- }
-
-
- /* Add private page sizes to page order */
- for (i = 0; i < mmu_page_sizes; i++) {
- if (private_pgsz_mask & (1 << tmp_pgsz[i])) {
- *pgsz_order |= (tmp_pgsz[i] <<
- ((*private_pgsz_num + *shared_pgsz_num) * 4));
- (*private_pgsz_num)++;
- }
- }
-
- /* Add shared page sizes to page order if these come last */
- if (!pgsz_order_shared_first) {
- if (shared_pgsz_mask & (1 << TTE256M)) {
- *pgsz_order |= (TTE256M <<
- ((*private_pgsz_num + *shared_pgsz_num) * 4));
- (*shared_pgsz_num)++;
- }
- if (shared_pgsz_mask & (1 << TTE4M)) {
- *pgsz_order |= (TTE4M <<
- ((*private_pgsz_num + *shared_pgsz_num) * 4));
- (*shared_pgsz_num)++;
- }
- }
-
- ASSERT(*pgsz_order);
- ASSERT(*private_pgsz_num);
- ASSERT((*private_pgsz_num + *shared_pgsz_num)
- <= MAX_PGSZ_SEARCH_ORDER);
-}
-
-/*
- * This routine is called without holding the hat lock to determine
- * whether the process's optimal page size order has changed significantly
- * since the page size register was last set. If it has changed we get the
- * hat lock and call mmu_set_pgsz_order() to update the effective pagesize
- * order.
- */
-void
-mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
-{
- int64_t sortcnt[MMU_PAGE_SIZES];
- int8_t tmp_pgsz[MMU_PAGE_SIZES];
- ulong_t tmp;
- int8_t i, j, max;
- uint_t pgsz;
- uint16_t *pgsz_order_hv;
- int page_order_changed;
- hatlock_t *hatlockp;
- int pgsz_count = 0;
-
- ASSERT(!sfmmu_hat_lock_held(sfmmup));
-
- if (pgsz_search_on == 0)
- return;
-
- /*
- * Check if ttecnt has changed significantly, since the last time we
- * were called. If the shared page sizes have changed then this is
- * handled by mmu_set_pgsz_order() being called directly when we join
- * the SCD.
- */
- for (i = 0; i < mmu_page_sizes; i++) {
- if (ttecnt[i] > (sfmmup->sfmmu_mmuttecnt[i] << 1) ||
- ttecnt[i] < (sfmmup->sfmmu_mmuttecnt[i] >> 1))
- break;
- }
-
- if (i == mmu_page_sizes) {
- return;
- }
-
- /* Sort pages by area mapped */
- for (i = 0; i < mmu_page_sizes; i++) {
- tmp = ttecnt[i];
- sortcnt[i] = tmp << TTE_PAGE_SHIFT(i);
- }
-
- for (j = 0; j < mmu_page_sizes; j++) {
- for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
- if (sortcnt[i] > sortcnt[max])
- max = i;
- }
- tmp_pgsz[j] = max;
- sortcnt[max] = -1;
- }
-
- /*
- * Check if the order of the private page sizes has changed. We call
- * mmu_set_pgsz_order() directly if additional page sizes are used,
- * so we can assume that the number of entries is unchanged.
- */
- pgsz_order_hv = sfmmup->sfmmu_pgsz_order_hv;
- if (pgsz_order_shared_first) {
- /* skip over shared pgsz entries */
- while ((pgsz_order_hv[pgsz_count] & TLB_PGSZ_CONTEXT1_ENABLE) ==
- TLB_PGSZ_CONTEXT1_ENABLE) {
- pgsz_count++;
- }
- }
-
- i = 0;
- page_order_changed = 0;
- while ((pgsz_order_hv[pgsz_count] & TLB_PGSZ_ENABLE) &&
- !(pgsz_order_hv[pgsz_count] & TLB_PGSZ_CONTEXT1) &&
- (pgsz_count < MAX_PGSZ_SEARCH_ORDER)) {
- pgsz = (pgsz_order_hv[pgsz_count] & TTE_SZ_BITS);
- ASSERT(pgsz < MMU_PAGE_SIZES);
-
- if (pgsz != tmp_pgsz[i]) {
- page_order_changed = 1;
- break;
- }
- pgsz_count++;
- i++;
- }
-
- if (page_order_changed) {
- hatlockp = sfmmu_hat_enter(sfmmup);
- /* Save old values of ttecnt */
- for (i = 0; i < mmu_page_sizes; i++) {
- sfmmup->sfmmu_mmuttecnt[i] = ttecnt[i];
- }
- mmu_set_pgsz_order(sfmmup, 1);
- sfmmu_hat_exit(hatlockp);
- }
-}
-
-/*
- * If the mmu extension API is supported and pgsz_search_on is set,
- * patch out the instruction to branch over the hypervisor call in
- * sfmmu_load_mmustate().
- */
-void
-mmu_enable_pgsz_search()
-{
- if ((hsvc_mmu_ext_available == B_TRUE) && pgsz_search_on) {
- /* patch in hcall to set pgsz order */
- sfmmu_patch_pgsz_reg();
- }
-}
diff --git a/usr/src/uts/sun4v/cpu/rock_asm.s b/usr/src/uts/sun4v/cpu/rock_asm.s
deleted file mode 100644
index b6df570c77..0000000000
--- a/usr/src/uts/sun4v/cpu/rock_asm.s
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/asm_linkage.h>
-#include <sys/hypervisor_api.h> /* For FAST_TRAP */
-#include <sys/rock_hypervisor_api.h>
-#include <sys/sun4asi.h> /* ASI_BLK_P */
-#include <sys/machthread.h> /* THREAD_REG */
-#include <sys/fsr.h> /* FPRS_FEF, FPRS_DU */
-#include <vm/hat_sfmmu.h> /* TSBTAG_INVALID */
-
-#if defined(lint)
-#include <sys/types.h>
-
-void
-cpu_smt_pause(void)
-{}
-
-void
-fp_zero(void)
-{}
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_init(uint64_t counter)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_release(uint64_t counter)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_set(uint64_t counter, uint64_t value)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_get(uint64_t counter, uint64_t *value)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_start(uint64_t counter, uint64_t value)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_overflow(uint64_t counter, uint64_t *ovf_cnt)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_count_stop(uint64_t counter)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_init(uint64_t sampler, uint64_t ringbuf_pa)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_release(uint64_t sampler)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_start(uint64_t sampler, uint64_t freq,
- uint64_t list_size, uint64_t valist_pa)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_config(uint64_t sampler, uint64_t reg_va, uint64_t reg_value)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_pending(uint64_t sampler, uint64_t *pend_cnt)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_rk_perf_sample_stop(uint64_t sampler)
-{ return (0); }
-
-/*ARGSUSED*/
-void
-cpu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes)
-{}
-
-void
-cpu_atomic_delay(void)
-{}
-
-void
-rock_mutex_delay(void)
-{}
-#else /* lint */
-
-/*
- * Called from various spin loops to prevent this strand from
- * stealing too many cycles from its sibling, who is presumably
- * doing useful work.
- *
- * With a 2.1 GHz clock, 100 membar #Halt instructions plus
- * the call/return overhead will take approximately 500 nanoseconds.
- * That is a suitable time for a PAUSE, as it is roughly equal to
- * two memory accesses.
- */
- ENTRY_NP(cpu_smt_pause)
- mov 10, %o0
-1: membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- subcc %o0, 1, %o0
- bg,pt %xcc, 1b
- membar #Halt
- retl
- membar #Halt
- SET_SIZE(cpu_smt_pause)
-
-/*
- * fp_zero() - clear all fp data registers and the fsr
- */
-
-.global fp_zero_zero
-.align 8
-fp_zero_zero:
- .xword 0
-
- ENTRY_NP(fp_zero)
- sethi %hi(fp_zero_zero), %o0
- ldx [%o0 + %lo(fp_zero_zero)], %fsr
- movxtod %g0, %d0
- fzero %d2
- movxtod %g0, %d4
- fzero %d6
- movxtod %g0, %d8
- fzero %d10
- movxtod %g0, %d12
- fzero %d14
- movxtod %g0, %d16
- fzero %d18
- movxtod %g0, %d20
- fzero %d22
- movxtod %g0, %d24
- fzero %d26
- movxtod %g0, %d28
- fzero %d30
- movxtod %g0, %d32
- fzero %d34
- movxtod %g0, %d36
- fzero %d38
- movxtod %g0, %d40
- fzero %d42
- movxtod %g0, %d44
- fzero %d46
- movxtod %g0, %d48
- fzero %d50
- movxtod %g0, %d52
- fzero %d54
- movxtod %g0, %d56
- fzero %d58
- movxtod %g0, %d60
- retl
- fzero %d62
- SET_SIZE(fp_zero)
-
- /* hcalls for performance counters */
-
- /*
- * uint64_t hv_rk_perf_count_init(uint64_t counter);
- */
- ENTRY(hv_rk_perf_count_init)
- mov HV_RK_PERF_COUNT_INIT, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_count_init)
-
- /*
- * uint64_t hv_rk_perf_count_release(uint64_t counter);
- */
- ENTRY(hv_rk_perf_count_release)
- mov HV_RK_PERF_COUNT_RELEASE, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_count_release)
-
- /*
- * uint64_t hv_rk_perf_count_set(uint64_t counter, uint64_t value)
- */
- ENTRY(hv_rk_perf_count_set)
- mov HV_RK_PERF_COUNT_SET, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_count_set)
-
- /*
- * uint64_t hv_rk_perf_count_get(uint64_t counter, uint64_t *value)
- */
- ENTRY(hv_rk_perf_count_get)
- mov HV_RK_PERF_COUNT_GET, %o5
- mov %o1, %o2 ! Save the address
- ta FAST_TRAP
- retl
- stx %o1, [%o2] ! Value is returned in %o1 by the HV
- SET_SIZE(hv_rk_perf_count_get)
-
- /*
- * uint64_t hv_rk_perf_count_start(uint64_t counter, uint64_t value)
- */
- ENTRY(hv_rk_perf_count_start)
- mov HV_RK_PERF_COUNT_START, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_count_start)
-
- /*
- * uint64_t hv_rk_perf_count_overflow(uint64_t counter,
- * uint64_t *ovf_cnt)
- */
- ENTRY(hv_rk_perf_count_overflow)
- mov %o1, %o2
- mov HV_RK_PERF_COUNT_OVERFLOW, %o5
- ta FAST_TRAP
- retl
- stx %o1, [%o2]
- SET_SIZE(hv_rk_perf_count_overflow)
-
- /*
- * uint64_t hv_rk_perf_count_stop(uint64_t counter)
- */
- ENTRY(hv_rk_perf_count_stop)
- mov HV_RK_PERF_COUNT_STOP, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_count_stop)
-
- /*
- * uint64_t hv_rk_perf_sample_init(uint64_t counter,
- uint64_t ringbuf_pa)
- */
- ENTRY(hv_rk_perf_sample_init)
- mov HV_RK_PERF_SAMPLE_INIT, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_sample_init)
-
- /*
- * uint64_t hv_rk_perf_sample_release(uint64_t counter)
- */
- ENTRY(hv_rk_perf_sample_release)
- mov HV_RK_PERF_SAMPLE_RELEASE, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_sample_release)
-
- /*
- * uint64_t hv_rk_perf_sample_config(uint64_t sampler, uint64_t reg_va,
- * uint64_t reg_value)
- */
- ENTRY(hv_rk_perf_sample_config)
- mov HV_RK_PERF_SAMPLE_CONFIG, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_sample_config)
-
- /*
- * uint64_t hv_rk_perf_sample_start(uint64_t sampler, uint64_t freq,
- * uint64_t list_size, uint64_t valist_pa)
- */
- ENTRY(hv_rk_perf_sample_start)
- mov HV_RK_PERF_SAMPLE_START, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_sample_start)
-
- /*
- * uint64_t hv_rk_perf_sample_pending(uint64_t sampler,
- * uint64_t *pend_cnt)
- */
- ENTRY(hv_rk_perf_sample_pending)
- mov %o1, %o2
- mov HV_RK_PERF_SAMPLE_PENDING, %o5
- ta FAST_TRAP
- retl
- stx %o1, [%o2]
- SET_SIZE(hv_rk_perf_sample_pending)
-
- /*
- * uint64_t hv_rk_perf_sample_stop(uint64_t sampler)
- */
- ENTRY(hv_rk_perf_sample_stop)
- mov HV_RK_PERF_SAMPLE_STOP, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_rk_perf_sample_stop)
-
-/*
- * Invalidate all of the entries within the TSB, by setting the inv bit
- * in the tte_tag field of each tsbe.
- *
- * We take advantage of the fact that the TSBs are page aligned and a
- * multiple of PAGESIZE to use ASI_BLK_INIT_xxx ASI.
- *
- * See TSB_LOCK_ENTRY and the miss handlers for how this works in practice
- * (in short, we set all bits in the upper word of the tag, and we give the
- * invalid bit precedence over other tag bits in both places).
- */
-
-#define VIS_BLOCKSIZE 64
-#include "assym.h" /* T_PREEMPT */
-
- ENTRY(cpu_inv_tsb)
-
- ! Get space for aligned block of saved fp regs.
- save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
-
- ! kpreempt_disable();
- ldsb [THREAD_REG + T_PREEMPT], %l3
- inc %l3
- stb %l3, [THREAD_REG + T_PREEMPT]
-
- ! See if fpu was in use. If it was, we need to save off the
- ! floating point registers to the stack.
- rd %fprs, %l0 ! %l0 = cached copy of fprs
- mov %g0, %l2
-
- btst FPRS_FEF, %l0
- bz,pt %icc, 4f
- nop
-
- ! If upper half fp registers are in use, save them as they will be
- ! used below.
- btst FPRS_DU, %l0
- bz,pt %icc, 4f
- nop
-
- ! save in-use fpregs on stack
-
- add %fp, STACK_BIAS - 65, %l1 ! get stack frame for fp regs
- and %l1, -VIS_BLOCKSIZE, %l1 ! block align frame
- stda %d32, [%l1]ASI_BLK_P ! %l1 = addr of saved fp regs
-
- ! Set a flag saying fp regs are saved.
- mov 1, %l2
-
- ! enable fp
-
-4: membar #StoreStore|#StoreLoad|#LoadStore
- wr %g0, FPRS_FEF|FPRS_DU, %fprs
- wr %g0, ASI_BLK_P, %asi
-
- ! load up FP registers with invalid TSB tag.
- set TSBTAG_INVALID, %l3
- movxtod %l3, %d32
- movxtod %l3, %d36
- movxtod %l3, %d40 ! Invalidate context
- movxtod %l3, %d44
- movxtod %g0, %d34
- movxtod %g0, %d38
- movxtod %g0, %d42 ! Zero in TTE
- movxtod %g0, %d46
-
- ba,pt %xcc, .cpu_inv_doblock
- mov (4*VIS_BLOCKSIZE), %i4 ! we do 4 stda's each loop below
-
-.cpu_inv_blkstart:
- stda %d32, [%i0+128]%asi
- stda %d32, [%i0+64]%asi
- stda %d32, [%i0]%asi
-
- add %i0, %i4, %i0
- sub %i1, %i4, %i1
-
-.cpu_inv_doblock:
- cmp %i1, (4*VIS_BLOCKSIZE) ! check for completion
- bgeu,a %icc, .cpu_inv_blkstart
- stda %d32, [%i0+192]%asi
-
-.cpu_inv_finish:
- membar #Sync
- brz,a %l2, .cpu_inv_finished
- wr %l0, 0, %fprs ! restore fprs
-
- ! restore fpregs from stack
- ldda [%l1]ASI_BLK_P, %d32
-
- membar #Sync
- wr %l0, 0, %fprs ! restore fprs
-
-.cpu_inv_finished:
- ! kpreempt_enable();
- ldsb [THREAD_REG + T_PREEMPT], %l3
- dec %l3
- stb %l3, [THREAD_REG + T_PREEMPT]
- ret
- restore
- SET_SIZE(cpu_inv_tsb)
-
-/*
- * This is CPU specific delay routine for atomic backoff.
- * It is used in case of Rock CPU. The rd instruction uses
- * less resources than casx on these CPUs.
- */
- .align 32
- ENTRY(cpu_atomic_delay)
- rd %ccr, %g0
- rd %ccr, %g0
- retl
- rd %ccr, %g0
- SET_SIZE(cpu_atomic_delay)
-
-/*
- * Delay to last ~100 nano seconds on a 2.1 GHz. Membars
- * should be linear and not in a loop to avoid impact
- * on the sibling strand (BR pipeline is shared by
- * two sibling strands).
- */
- .align 64
- ENTRY(rock_mutex_delay)
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- membar #Halt
- retl
- membar #Halt
- SET_SIZE(rock_mutex_delay)
-#endif /* lint */
diff --git a/usr/src/uts/sun4v/cpu/rock_copy.s b/usr/src/uts/sun4v/cpu/rock_copy.s
deleted file mode 100644
index f0dd7fff12..0000000000
--- a/usr/src/uts/sun4v/cpu/rock_copy.s
+++ /dev/null
@@ -1,4941 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <sys/param.h>
-#include <sys/errno.h>
-#include <sys/asm_linkage.h>
-#include <sys/vtrace.h>
-#include <sys/machthread.h>
-#include <sys/clock.h>
-#include <sys/asi.h>
-#include <sys/fsr.h>
-#include <sys/privregs.h>
-#include <sys/rockasi.h>
-
-#if !defined(lint)
-#include "assym.h"
-#endif /* lint */
-
-/*
- * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed
- * to "break even" using FP/VIS-accelerated memory operations.
- * The FPBLK code assumes a minimum number of bytes are available
- * to be moved on entry. Check that code carefully before
- * reducing VIS_COPY_THRESHOLD below 256.
- */
-/*
- * This shadows sys/machsystm.h which can't be included due to
- * the lack of _ASM guards in include files it references.
- * Change it here, change it there.
- */
-#define VIS_COPY_THRESHOLD 256
-
-/*
- * TEST for very short copies
- * Be aware that the maximum unroll for the short unaligned case
- * is SHORTCOPY+1
- */
-#define SHORTCOPY 3
-#define CHKSIZE 39
-
-/*
- * Indicates that we're to trampoline to the error handler.
- * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag.
- * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag.
- */
-#define FPUSED_FLAG 1
-#define TRAMP_FLAG 2
-#define KCOPY_FLAG 4
-#define FPSAVED_FLAG 8
-#define MASK_FLAGS 0xf
-
-/*
- * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
- * handler was set
- */
-#define LOFAULT_SET 2
-
-/*
- * Number of outstanding prefetches.
- * Testing with 1200 MHz Cheetah+ and Jaguar gives best results with
- * two prefetches, one with a reach of 8*BLOCK_SIZE+8 and one with a
- * reach of 5*BLOCK_SIZE. The double prefetch gives an typical improvement
- * of 5% for large copies as compared to a single prefetch. The reason
- * for the improvement is that with Cheetah and Jaguar, some prefetches
- * are dropped due to the prefetch queue being full. The second prefetch
- * reduces the number of cache lines that are dropped.
- * Do not remove the double prefetch or change either FIRST_PREFETCH
- * or SECOND_PREFETCH without extensive performance tests to prove
- * there is no loss of performance.
- * XXX: For ROCK, the prefetch depth can be upto 16, but sticking
- * with 8 as of now pending more clarity on this.
- */
-#define FIRST_PREFETCH 8
-#define SECOND_PREFETCH 5
-
-#define VIS_BLOCKSIZE 64
-
-/*
- * Size of stack frame in order to accomodate a 64-byte aligned
- * floating-point register save area and 2 64-bit temp locations.
- * All copy functions use two quadrants of fp registers; to assure a
- * block-aligned two block buffer in which to save we must reserve
- * three blocks on stack. Not all functions preserve %pfrs on stack
- * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all.
- *
- * _______________________________________ <-- %fp + STACK_BIAS
- * | We may need to preserve 2 quadrants |
- * | of fp regs, but since we do so with |
- * | BST/BLD we need room in which to |
- * | align to VIS_BLOCKSIZE bytes. So |
- * | this area is 3 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET
- * |-------------------------------------|
- * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET
- * |-------------------------------------|
- * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET
- * ---------------------------------------
- */
-#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8))
-#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3)
-#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1)
-#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8)
-#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8)
-
-#define ICACHE_LINE_SIZE 64
-
-#define MEDIUM_MAX 255
-#define MED_WMAX 256 /* max copy for medium word-aligned case */
-#define MED_MAX 256 /* max copy for medium longword-aligned case */
-
-#define PAGE_MASK 8191
-#define ST_CACHE_ALIGN 127
-
-#ifndef BSTORE_SIZE
-#define BSTORE_SIZE 256 /* min copy size for block store */
-#endif
-
-/*
- * Common macros used by the various versions of the block copy
- * routines in this file.
- */
-
-/*
- * In FP copies if we do not have preserved data to restore over
- * the fp regs we used then we must zero those regs to avoid
- * exposing portions of the data to later threads (data security).
- *
- * Copy functions use either quadrants 1 and 3 or 2 and 4.
- *
- * FZEROQ3Q4: Zero quadrants 3 and 4, ie %d32 - %d46 and %d48 - %d62
- *
- */
-#define FZEROQ3Q4 \
- movxtod %g0, %d32 ;\
- movxtod %g0, %d34 ;\
- fsrc1 %d0, %d36 ;\
- fsrc1 %d0, %d38 ;\
- fsrc1 %d0, %d40 ;\
- fsrc1 %d0, %d42 ;\
- fsrc1 %d0, %d44 ;\
- fsrc1 %d0, %d46 ;\
- fsrc1 %d0, %d48 ;\
- fsrc1 %d0, %d50 ;\
- fsrc1 %d0, %d52 ;\
- fsrc1 %d0, %d54 ;\
- fsrc1 %d0, %d56 ;\
- fsrc1 %d0, %d58 ;\
- fsrc1 %d0, %d60 ;\
- fsrc1 %d0, %d62
-
-
-/*
- * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack.
- * Used to save and restore in-use fp registers when we want to use FP
- * and find fp already in use and copy size still large enough to justify
- * the additional overhead of this save and restore.
- *
- * A membar #Sync is needed before save to sync fp ops initiated before
- * the call to the copy function (by whoever has fp in use); for example
- * an earlier block load to the quadrant we are about to save may still be
- * "in flight". A membar #Sync is required at the end of the save to
- * sync our block store (the copy code is about to begin ldd's to the
- * first quadrant). Note, however, that since Cheetah pipeline block load
- * is blocking we can omit the initial membar before saving fp state (they're
- * commented below in case of future porting to a chip that does not block
- * on block load).
- *
- * Similarly: a membar #Sync before restore allows the block stores of
- * the copy operation to complete before we fill the quadrants with their
- * original data, and a membar #Sync after restore lets the block loads
- * of the restore complete before we return to whoever has the fp regs
- * in use. To avoid repeated membar #Sync we make it the responsibility
- * of the copy code to membar #Sync immediately after copy is complete
- * and before using the BLD_*_FROMSTACK macro.
- */
-#if !defined(lint)
-#define BST_FPQ3Q4_TOSTACK(tmp1) \
- /* membar #Sync */ ;\
- add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
- and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
- stda %d32, [tmp1]ASI_BLK_P ;\
- add tmp1, VIS_BLOCKSIZE, tmp1 ;\
- stda %d48, [tmp1]ASI_BLK_P ;\
- membar #Sync
-
-#define BLD_FPQ3Q4_FROMSTACK(tmp1) \
- /* membar #Sync - provided at copy completion */ ;\
- add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\
- and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\
- ldda [tmp1]ASI_BLK_P, %d32 ;\
- add tmp1, VIS_BLOCKSIZE, tmp1 ;\
- ldda [tmp1]ASI_BLK_P, %d48 ;\
- membar #Sync
-#endif
-
-/*
- * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger,
- * prevent preemption if there is no t_lwp to save FP state to on context
- * switch) before commencing a FP copy, and reallow it on completion or
- * in error trampoline paths when we were using FP copy.
- *
- * Both macros may call other functions, so be aware that all outputs are
- * forfeit after using these macros. For this reason we do not pass registers
- * to use - we just use any outputs we want.
- *
- * For fpRAS we need to perform the fpRAS mechanism test on the same
- * CPU as we use for the copy operation, both so that we validate the
- * CPU we perform the copy on and so that we know which CPU failed
- * if a failure is detected. Hence we need to be bound to "our" CPU.
- * This could be achieved through disabling preemption (and we have do it that
- * way for threads with no t_lwp) but for larger copies this may hold
- * higher priority threads off of cpu for too long (eg, realtime). So we
- * make use of the lightweight t_nomigrate mechanism where we can (ie, when
- * we have a t_lwp).
- *
- * Pseudo code:
- *
- * FP_NOMIGRATE:
- *
- * if (curthread->t_lwp) {
- * thread_nomigrate();
- * } else {
- * kpreempt_disable();
- * }
- *
- * FP_ALLOWMIGRATE:
- *
- * if (curthread->t_lwp) {
- * thread_allowmigrate();
- * } else {
- * kpreempt_enable();
- * }
- */
-
-#define FP_NOMIGRATE(label1, label2) \
- ldn [THREAD_REG + T_LWP], %o0 ;\
- brz,a,pn %o0, label1/**/f ;\
- ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
- call thread_nomigrate ;\
- nop ;\
- ba label2/**/f ;\
- nop ;\
-label1: ;\
- inc %o1 ;\
- stb %o1, [THREAD_REG + T_PREEMPT] ;\
-label2:
-
-#define FP_ALLOWMIGRATE(label1, label2) \
- ldn [THREAD_REG + T_LWP], %o0 ;\
- brz,a,pn %o0, label1/**/f ;\
- ldsb [THREAD_REG + T_PREEMPT], %o1 ;\
- call thread_allowmigrate ;\
- nop ;\
- ba label2/**/f ;\
- nop ;\
-label1: ;\
- dec %o1 ;\
- brnz,pn %o1, label2/**/f ;\
- stb %o1, [THREAD_REG + T_PREEMPT] ;\
- ldn [THREAD_REG + T_CPU], %o0 ;\
- ldub [%o0 + CPU_KPRUNRUN], %o0 ;\
- brz,pt %o0, label2/**/f ;\
- nop ;\
- call kpreempt ;\
- rdpr %pil, %o0 ;\
-label2:
-
-/*
- * Copy a block of storage, returning an error code if `from' or
- * `to' takes a kernel pagefault which cannot be resolved.
- * Returns errno value on pagefault error, 0 if all ok
- */
-
-#if defined(lint)
-
-/* ARGSUSED */
-int
-kcopy(const void *from, void *to, size_t count)
-{ return(0); }
-
-#else /* lint */
-
- .seg ".text"
- .align 4
-
- ENTRY(kcopy)
-
- sethi %hi(.copyerr_no_fp_used), %o4
- or %o4, %lo(.copyerr_fp_used), %o4
- stn %o4, [THREAD_REG + T_LOFAULT] ! set t_lofault
- ldn [THREAD_REG + T_LOFAULT], %o5 ! save existing handler
- or %o5, KCOPY_FLAG, %o5
- membar #Sync ! sync error barrier
- ba,pt %ncc, .forcpy ! common code
- nop
-
-
-/*
- * We got here because of a fault in .copyerr_fp_used. We can't safely
- * restore fp state, so we panic.
- */
-fp_panic_msg:
- .asciz "Unable to restore fp state after copy operation"
-
- .align 4
-.copyerr2:
- set fp_panic_msg, %o0
- call panic
- nop
-
-/*
- * We got here because of a fault during a small kcopy or bcopy.
- * No floating point registers were used in this copy.
- * Errno value is in %g1.
- */
-.copyerr_no_fp_used:
- btst TRAMP_FLAG, %o5
- membar #Sync
- andn %o5, TRAMP_FLAG, %o5
- bnz,pn %ncc, 3f
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g1, %o0
-3:
- jmp %o5 ! goto real handler
- mov %g0, %o0 !
-
-/*
- * We got here because of a fault during a small kcopy or bcopy.
- * floating point registers were used in this copy.
- * Errno value is in %g1.
- */
-.copyerr_fp_used:
- set .copyerr2, %l0
- membar #Sync ! sync error barrier
- stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault
- btst FPUSED_FLAG, %l6
- bz %ncc, 1f
- and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
- wr %o2, 0, %gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- ! No need to restore regs if they were not saved
- btst FPSAVED_FLAG, %l6
- bz %ncc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o2)
-
- ba,pt %ncc, 1f
- wr %o3, 0, %fprs ! restore fprs
-
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-
- !
- ! Need to cater for the different expectations of kcopy
- ! and bcopy. kcopy will *always* set a t_lofault handler
- ! If it fires, we're expected to just return the error code
- ! and *not* to invoke any existing error handler. As far as
- ! bcopy is concerned, we only set t_lofault if there was an
- ! existing lofault handler. In that case we're expected to
- ! invoke the previously existing handler after resetting the
- ! t_lofault value.
- !
-1:
- andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off
- membar #Sync ! sync error barrier
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- FP_ALLOWMIGRATE(5, 6)
-
- btst TRAMP_FLAG, %l0
- bnz,pn %ncc, 3f
- nop
- ret
- restore %g1, 0, %o0
-
-3:
- !
- ! We're here via bcopy. There *must* have been an error handler
- ! in place otherwise we would have died a nasty death already.
- !
- jmp %l6 ! goto real handler
- restore %g0, 0, %o0 ! dispose of copy window
-
- SET_SIZE(kcopy)
-#endif /* lint */
-
-#define ALIGN8(X) (((X) + 7) & ~7)
-#define ICACHE_LINE_SIZE 64
-#define PF_FAR 2048
-#define PF_NEAR 1024
-#define SMALL_MAX 39
-/*
- * Copy a block of storage - must not overlap (from + len <= to).
- * Registers: l6 - saved t_lofault
- * (for short copies, o5 - saved t_lofault)
- *
- * Copy a page of memory.
- * Assumes double word alignment and a count >= 256.
- */
-#if defined(lint)
-
-/* ARGSUSED */
-void
-bcopy(const void *from, void *to, size_t count)
-{}
-#else /* lint */
-
- .align ICACHE_LINE_SIZE
- ENTRY(bcopy)
- ENTRY(__align_cpy_1)
- ldn [THREAD_REG + T_LOFAULT], %o5 ! save t_lofault
- tst %o5
- bz,pt %icc, .forcpy
- nop
- sethi %hi(.copyerr_no_fp_used), %o4
- or %o4, %lo(.copyerr_no_fp_used), %o4
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! install new vector
- or %o5, TRAMP_FLAG, %o5 ! error should trampoline
-.forcpy:
- cmp %o2, SMALL_MAX ! check for not small case
- bgu,pn %ncc, .medium_bcopy ! go to larger cases
- cmp %o2, SHORTCOPY ! check for really short case
- ble,pt %ncc, .smallleft_bcopy !
- or %o1, %o0, %o3 ! prepare alignment check
- andcc %o3, 0x3, %g0 ! test for alignment
- bz,pt %ncc, .smallword_bcopy ! branch to word aligned case
- sub %o2, 3, %o2 ! adjust count to allow cc zero test
-.smallnotalign4_bcopy:
- ldub [%o0], %o3 ! read byte
- subcc %o2, 4, %o2 ! reduce count by 4
- stb %o3, [%o1] ! write byte
- ldub [%o0+1], %o3 ! repeat for a total of 4 bytes
- add %o0, 4, %o0 ! advance SRC by 4
- stb %o3, [%o1+1]
- ldub [%o0-2], %o3
- add %o1, 4, %o1 ! advance DST by 4
- stb %o3, [%o1-2]
- ldub [%o0-1], %o3
- bgu,pt %ncc, .smallnotalign4_bcopy ! loop til 3 or fewer bytes remain
- stb %o3, [%o1-1]
- add %o2, 3, %o2 ! restore count
-.smallleft_bcopy:
- tst %o2
- bz,pt %ncc, .smallexit_bcopy
- nop
-.smallleft3_bcopy: ! 1, 2, or 3 bytes remain
- ldub [%o0], %o3 ! load one byte
- deccc %o2 ! reduce count for cc test
- bz,pt %ncc, .smallexit_bcopy
- stb %o3, [%o1] ! store one byte
- ldub [%o0+1], %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .smallexit_bcopy
- stb %o3, [%o1+1] ! store second byte
- ldub [%o0+2], %o3 ! load third byte
- stb %o3, [%o1+2] ! store third byte
- membar #Sync ! sync error barrier
- andn %o5, TRAMP_FLAG, %o5
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- clr %o0
-
- .align 16
- nop ! affects loop icache alignment
-.smallwords_bcopy:
- lduw [%o0], %o3 ! read word
-.smallwordx_bcopy:
- subcc %o2, 8, %o2 ! update count
- stw %o3, [%o1] ! write word
- add %o0, 8, %o0 ! update SRC
- lduw [%o0-4], %o3 ! read word
- add %o1, 8, %o1 ! update DST
- bgu,pt %ncc, .smallwords_bcopy ! loop until done
- stw %o3, [%o1-4] ! write word
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .smallexit_bcopy ! check for completion
- nop
- cmp %o2, 4 ! check for 4 or more bytes left
- blt .smallleft3_bcopy ! if not, go to finish up
- nop
- lduw [%o0], %o3
- add %o0, 4, %o0
- subcc %o2, 4, %o2
- stw %o3, [%o1]
- add %o1, 4, %o1
- bnz,pt %ncc, .smallleft3_bcopy
- nop
- membar #Sync ! sync error barrier
- andn %o5, TRAMP_FLAG, %o5
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- clr %o0
-
-.smallword_bcopy:
- subcc %o2, 4, %o2 ! update count
- bgu,pt %ncc, .smallwordx_bcopy
- lduw [%o0], %o3 ! read word
- addcc %o2, 3, %o2 ! restore count
- bz,pt %ncc, .smallexit_bcopy
- stw %o3, [%o1] ! write word
- deccc %o2 ! reduce count for cc test
- ldub [%o0+4], %o3 ! load one byte
- bz,pt %ncc, .smallexit_bcopy
- stb %o3, [%o1+4] ! store one byte
- ldub [%o0+5], %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .smallexit_bcopy
- stb %o3, [%o1+5] ! store second byte
- ldub [%o0+6], %o3 ! load third byte
- stb %o3, [%o1+6] ! store third byte
-.smallexit_bcopy:
- membar #Sync ! sync error barrier
- andn %o5, TRAMP_FLAG, %o5
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- clr %o0
- .align 16
-.medium_bcopy:
- neg %o1, %g5
- neg %o0, %o3
- andcc %g5, 7, %g5 ! bytes till DST 8 byte aligned
- and %o3, 7, %o3 ! bytes till SRC 8 byte aligned
- cmp %g5, %o3
- bne %ncc, continue
- sub %g5, %o3, %o3 ! -(bytes till SRC aligned after DST aligned)
- ! o3={-7, -6, ... 7} o3>0 => SRC overaligned
- ! src and dst are aligned.
- mov %o3, %g1 ! save %o3
- andcc %o0, 7, %o3 ! is src buf aligned on a 8 byte bound
- brz,pt %o3, src_dst_aligned_on_8
- nop
- mov %o3, %g5
- mov 8, %o4
- sub %o4, %o3, %o3
- cmp %o3, %o2
- bg,a,pn %ncc, 1f
- mov %o2, %o3
-1:
- ! %o3 has the bytes to be written in partial store.
- sub %o2, %o3, %o2
- prefetch [%o0],2
-7:
- deccc %o3 ! byte clearing loop
- ldub [%o0], %o4 ! load one byte
- stb %o4, [%o1]
- inc %o1 ! increment dst
- bgu,pt %ncc, 7b
- inc %o0 ! increment src
- mov %g1, %o3 ! restore %o3
-src_dst_aligned_on_8:
- ! check if we are copying 1k or more bytes
- cmp %o2, 511
- bgu,pt %ncc, copying_ge_512
- nop
- ba .medlword_bcopy
- nop
-
-continue:
- andcc %g5, 7, %g5 ! bytes till DST 8 byte aligned
- bz %ncc, 2f
- nop
-
- sub %o2, %g5, %o2 ! update count
-
-1:
- ldub [%o0], %o4
- deccc %g5
- inc %o0
- stb %o4, [%o1]
- bgu,pt %ncc, 1b
- inc %o1
-
- ! Now DST is 8-byte aligned. dst, from, o2 are current.
-
-2:
- andcc %o0, 0x3, %g0 ! test alignment
- bnz,pt %ncc, .mediumsetup_bcopy ! branch to skip aligned cases
- ! if src, dst not aligned
- prefetch [%o0 + (1 * VIS_BLOCKSIZE)], #n_reads
-
-/*
- * Handle all cases where src and dest are aligned on word
- * or long word boundaries. Use unrolled loops for better
- * performance. This option wins over standard large data
- * move when source and destination is in cache for medium
- * to short data moves.
- */
- andcc %o0, 0x7, %g0 ! test word alignment
- bz,pt %ncc, src_dst_lword_aligned ! branch to long word aligned case
- prefetch [%o0 + (2 * VIS_BLOCKSIZE)], #n_reads
- cmp %o2, MED_WMAX ! limit to store buffer size
- bgu,pt %ncc, .mediumrejoin_bcopy ! otherwise rejoin main loop
- nop
- subcc %o2, 15, %o2 ! adjust length to allow cc test
- ! for end of loop
- ble,pt %ncc, .medw15_bcopy ! skip big loop if less than 16
- prefetch [%o0 + (3 * VIS_BLOCKSIZE)], #n_reads
-/*
- * no need to put prefetch in loop as prefetches have
- * already been issued for maximum loop size
- */
-.medw16_bcopy:
- ld [%o0], %o4 ! load
- subcc %o2, 16, %o2 ! decrement length count
- stw %o4, [%o1] ! and store
- ld [%o0+4], %o3 ! a block of 16 bytes
- add %o0, 16, %o0 ! increase src ptr by 16
- stw %o3, [%o1+4]
- ld [%o0-8], %o4
- add %o1, 16, %o1 ! increase dst ptr by 16
- stw %o4, [%o1-8]
- ld [%o0-4], %o3
- bgu,pt %ncc, .medw16_bcopy ! repeat if at least 16 bytes left
- stw %o3, [%o1-4]
-.medw15_bcopy:
- addcc %o2, 15, %o2 ! restore count
- bz,pt %ncc, .medwexit_bcopy ! exit if finished
- nop
- cmp %o2, 8
- blt,pt %ncc, .medw7_bcopy ! skip if 7 or fewer bytes left
- nop !
- ld [%o0], %o4 ! load 4 bytes
- subcc %o2, 8, %o2 ! decrease count by 8
- stw %o4, [%o1] ! and store 4 bytes
- add %o0, 8, %o0 ! increase src ptr by 8
- ld [%o0-4], %o3 ! load 4 bytes
- add %o1, 8, %o1 ! increase dst ptr by 8
- stw %o3, [%o1-4] ! and store 4 bytes
- bz %ncc, .medwexit_bcopy ! exit if finished
- nop
-.medw7_bcopy: ! count is ge 1, less than 8
- cmp %o2, 3 ! check for 4 bytes left
- ble,pt %ncc, .medw3_bcopy ! skip if 3 or fewer bytes left
- nop !
- ld [%o0], %o4 ! load 4 bytes
- sub %o2, 4, %o2 ! decrease count by 4
- add %o0, 4, %o0 ! increase src ptr by 4
- stw %o4, [%o1] ! and store 4 bytes
- add %o1, 4, %o1 ! increase dst ptr by 4
- tst %o2 ! check for zero bytes left
- bz %ncc, .medwexit_bcopy ! exit if finished
- nop
-.medw3_bcopy: ! count is known to be 1, 2, or 3
- deccc %o2 ! reduce count by one
- ldub [%o0], %o3 ! load one byte
- bz,pt %ncc, .medwexit_bcopy ! exit if last byte
- stb %o3, [%o1] ! store one byte
- ldub [%o0+1], %o3 ! load second byte
- deccc %o2 ! reduce count by one
- bz,pt %ncc, .medwexit_bcopy ! exit if last byte
- stb %o3, [%o1+1] ! store second byte
- ldub [%o0+2], %o3 ! load third byte
- stb %o3, [%o1+2] ! store third byte
-.medwexit_bcopy:
- membar #Sync ! sync error barrier
- andn %o5, TRAMP_FLAG, %o5
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- clr %o0
-
-/*
- * Special case for handling when src and dest are both long word aligned
- * and total data to move is between SMALL_MAX and MED_MAX bytes
- */
-
- .align 16
- nop
-src_dst_lword_aligned:
-.medlword_bcopy: ! long word aligned
- cmp %o2, MED_MAX ! limit to store buffer size
- bgu,pt %ncc, .mediumrejoin_bcopy ! otherwise rejoin main loop
- nop
- subcc %o2, 31, %o2 ! adjust length to allow cc test
- ! for end of loop
- ble,pt %ncc, .medl31_bcopy ! skip big loop if less than 32
- prefetch [%o0 + (3 * VIS_BLOCKSIZE)], #n_reads ! into the l2 cache
-/*
- * no need to put prefetch in loop as prefetches have
- * already been issued for maximum loop size
- */
-.medl32_bcopy:
- ldx [%o0], %o4 ! load
- subcc %o2, 32, %o2 ! decrement length count
- stx %o4, [%o1] ! and store
- ldx [%o0+8], %o3 ! a block of 32 bytes
- add %o0, 32, %o0 ! increase src ptr by 32
- stx %o3, [%o1+8]
- ldx [%o0-16], %o4
- add %o1, 32, %o1 ! increase dst ptr by 32
- stx %o4, [%o1-16]
- ldx [%o0-8], %o3
- bgu,pt %ncc, .medl32_bcopy ! repeat if at least 32 bytes left
- stx %o3, [%o1-8]
-.medl31_bcopy:
- addcc %o2, 16, %o2 ! adjust remaining count
- ble,pt %ncc, .medl15_bcopy ! skip if 15 or fewer bytes left
- nop !
- ldx [%o0], %o4 ! load and store 16 bytes
- add %o0, 16, %o0 ! increase src ptr by 16
- stx %o4, [%o1] !
- sub %o2, 16, %o2 ! decrease count by 16
- ldx [%o0-8], %o3 !
- add %o1, 16, %o1 ! increase dst ptr by 16
- stx %o3, [%o1-8]
-.medl15_bcopy:
- addcc %o2, 15, %o2 ! restore count
- bz,pt %ncc, .medwexit_bcopy ! exit if finished
- nop
- cmp %o2, 8
- blt,pt %ncc, .medw7_bcopy ! skip if 7 or fewer bytes left
- nop
- ldx [%o0], %o4 ! load 8 bytes
- add %o0, 8, %o0 ! increase src ptr by 8
- stx %o4, [%o1] ! and store 8 bytes
- subcc %o2, 8, %o2 ! decrease count by 8
- bz %ncc, .medwexit_bcopy ! exit if finished
- add %o1, 8, %o1 ! increase dst ptr by 8
- ba .medw7_bcopy
- nop
-
- .align 16
- nop
- nop
- nop
-unaligned_src_dst:
-
-.mediumsetup_bcopy:
- prefetch [%o0 + (2 * VIS_BLOCKSIZE)], #one_read
-.mediumrejoin_bcopy:
- ! %o5 has the saved T_LOFAULT when we come here.
- ! We set a new error handler if the T_LOFAULT was set earlier OR
- ! KCOPY_FLAG is set.
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- mov %i5, %l6
- andn %l6, TRAMP_FLAG, %o2
- brz,pt %o2, 1f
- nop
- ! We enter here if KCOPY_FLAG was set OR
- ! T_LOFAULT was set earlier.
- ! We only change the error handler pointer here.
- ! The flags TRAMP_FLAG or KCOPY_FLAG is left as it is in %l6.
- sethi %hi(.copyerr_fp_used), %o2
- or %o2, %lo(.copyerr_fp_used), %o2
- membar #Sync ! sync error barrier
- stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
-1:
- FP_NOMIGRATE(6, 7)
- mov %i0, %o0
- mov %i1, %o1
- mov %i2, %o2
- mov %i3, %o3
- mov %i5, %o5
- rd %fprs, %o4 ! check for unused fp
- st %o4, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %o4
- bz,a,pt %icc, continue_bcopy
- wr %g0, FPRS_FEF, %fprs
-
- ! save the FP registers even if DU is not set.
-
- BST_FPQ3Q4_TOSTACK(%o4)
- or %l6, FPSAVED_FLAG, %l6
-
-continue_bcopy:
- rd %gsr, %o4
- stx %o4, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
- or %l6, FPUSED_FLAG, %l6
-
- add %o0, 8, %o0 ! prepare to round SRC upward
-
- sethi %hi(0x1234567f), %o5 ! For GSR.MASK
- or %o5, 0x67f, %o5
-
- cmp %o2, MEDIUM_MAX
- bmask %o5, %g0, %g0
-
- ! Compute o5 (number of bytes that need copying using the main loop).
- ! First, compute for the medium case.
- ! Then, if large case, o5 is replaced by count for block alignment.
- ! Be careful not to read past end of SRC
- ! Currently, o2 is the actual count remaining
- ! o3 is how much sooner we'll cross the alignment boundary
- ! in SRC compared to in DST
- !
- ! Examples: Let # denote bytes that should not be accessed
- ! Let x denote a byte already copied to align DST
- ! Let . and - denote bytes not yet copied
- ! Let | denote double alignment boundaries
- !
- ! DST: ######xx|........|--------|..###### o2 = 18
- ! dst
- !
- ! o3 = -3: SRC: ###xx...|.....---|-----..#|######## o5 = 8
- ! from
- !
- ! o3 = 0: SRC: ######xx|........|--------|..###### o5 = 16-8 = 8
- ! from
- !
- ! o3 = +1: SRC: #######x|x.......|.-------|-..##### o5 = 16-8 = 8
- ! from
-
- mov %asi, %g1 ! save curr %asi
- wr %g0, ASI_CACHE_SPARING_P, %asi
-
- or %g0, -8, %o5
- alignaddr %o0, %g0, %o0 ! set GSR.ALIGN and align from
-
- movrlz %o3, %g0, %o5 ! subtract 8 from o2+o3 only if o3>=0
- add %o5, %o2, %o5
- add %o5, %o3, %o5
-
- bleu %ncc, 4f
- andn %o5, 7, %o5 ! 8 byte aligned count
- neg %o1, %o5 ! 'large' case
- and %o5, VIS_BLOCKSIZE-1, %o5 ! bytes till DST block aligned
-4:
- brgez,a %o3, .beginmedloop_bcopy
- ldda [%o0-8]%asi, %d32
-
- add %o0, %o3, %o0 ! back up from
-5:
- ldda [%o0]ASI_FL8_P, %d34
- inc %o0
- andcc %o0, 7, %g0
- bnz %ncc, 5b
- bshuffle %d32, %d34, %d32 ! shifts d32 left 1 byte and or's in d34
-
-.beginmedloop_bcopy:
- tst %o5
- bz %ncc, .endmedloop_bcopy
- sub %o2, %o5, %o2 ! update count for later
-
- ! Main loop to write out doubles. Note: o5 & 7 == 0
-
- ldd [%o0], %d34
- subcc %o5, 8, %o5 ! update local count
- bz,pn %ncc, 1f
- add %o0, 8, %o0 ! update SRC
-
-.medloop_bcopy:
- faligndata %d32, %d34, %d36
- ldda [%o0]%asi, %d32
- subcc %o5, 8, %o5 ! update local count
- add %o0, 16, %o0 ! update SRC
- std %d36, [%o1]
- bz,pn %ncc, 2f
- faligndata %d34, %d32, %d38
- ldda [%o0 - 8]%asi, %d34
- subcc %o5, 8, %o5 ! update local count
- std %d38, [%o1 + 8]
- bnz,pt %ncc, .medloop_bcopy
- add %o1, 16, %o1 ! update DST
-
-1:
- faligndata %d32, %d34, %d36
- fmovd %d34, %d32
- std %d36, [%o1]
- ba .endmedloop_bcopy
- add %o1, 8, %o1
-
-2:
- std %d38, [%o1 + 8]
- sub %o0, 8, %o0
- add %o1, 16, %o1
-
-
-.endmedloop_bcopy:
- ! Currently, from is pointing to the next double-aligned byte in SRC
- ! The 8 bytes starting at [from-8] are available in d32
- ! At least one, and possibly all, of these need to be written.
-
- cmp %o2, VIS_BLOCKSIZE
- bgu %ncc, .large_bcopy ! otherwise, less than 16 bytes left
-
-#if 1
-
- /* This code will use partial stores. */
-
- mov %g0, %o5
- and %o3, 7, %o3 ! Number of bytes needed to completely
- ! fill %d32 with good (unwritten) data.
-
- subcc %o2, 8, %o2 ! update count (maybe too much)
- movl %ncc, %o2, %o5
- addcc %o3, %o5, %o5 ! extra bytes we can stuff into %d32
- sub %o3, %o5, %o3 ! update o3 (# bad bytes in %d32)
-
- bz %ncc, 2f
- alignaddr %o3, %g0, %g0 ! set GSR.ALIGN
-
-1:
- deccc %o5
- ldda [%o0]ASI_FL8_P, %d34
- inc %o0
- bgu %ncc, 1b
- bshuffle %d32, %d34, %d32 ! shifts d32 left 1 byte and or's in d34
-
-2:
- not %o3
- faligndata %d32, %d32, %d32 ! shift bytes to the left
- and %o3, 7, %o3 ! last byte to be stored in [%o1+%o3]
- edge8n %g0, %o3, %o5
- stda %d32, [%o1]%o5, ASI_PST8_P
- brlez %o2, exit_bcopy
- add %o1, %o3, %o1 ! update DST to last stored byte
-3:
- inc %o1
- deccc %o2
- ldub [%o0], %o3
- stb %o3, [%o1]
- bgu %ncc, 3b
- inc %o0
-
-#else
-
- andcc %o3, 7, %o5 ! Number of bytes needed to completely
- ! fill %d32 with good (unwritten) data.
- bz %ncc, 2f
- sub %o5, 8, %o3 ! -(number of good bytes in %d32)
- cmp %o2, 8
- bl,a %ncc, 3f ! Not enough bytes to fill %d32
- add %o0, %o3, %o0 ! Back up %o0
-
-1:
- deccc %o5
- ldda [%o0]ASI_FL8_P, %d34
- inc %o0
- bgu %ncc, 1b
- bshuffle %d32, %d34, %d32 ! shifts d32 left 1 byte and or's in d34
-
-2:
- subcc %o2, 8, %o2
- std %d32, [%o1]
- bz %ncc, exit_bcopy
- add %o1, 8, %o1
-3:
- ldub [%o0], %o3
- deccc %o2
- inc %o0
- stb %o3, [%o1]
- bgu %ncc, 3b
- inc %o1
-#endif
-
-exit_bcopy:
- membar #Sync
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
- wr %o2, 0, %gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- ! No need to restore regs if they were not saved
- btst FPSAVED_FLAG, %l6
- bz %ncc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o2)
-
- ba,pt %ncc, 5f
- wr %o3, 0, %fprs ! restore fprs
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-5:
- membar #Sync ! sync error barrier
- andn %l6, MASK_FLAGS, %l6
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
-
- mov %g1, %asi ! restore %asi
- FP_ALLOWMIGRATE(6, 7)
- ret
- restore %g0, 0, %o0
-
-
- .align ICACHE_LINE_SIZE
-.large_bcopy:
- ! The following test for BSTORE_SIZE is used to decide whether
- ! to store data with a block store or with individual stores.
- ! The block store wins when the amount of data is so large
- ! that it is causes other application data to be moved out
- ! of the L1 or L2 cache.
- ! On a Panther, block store can lose more often because block
- ! store forces the stored data to be removed from the L3 cache.
- !
- sethi %hi(BSTORE_SIZE),%o5
- or %o5,%lo(BSTORE_SIZE),%o5
- cmp %o2, %o5
- bgu %ncc, .xlarge_bcopy
-
- ! %o1 I/O DST is 64-byte aligned
- ! %o0 I/O 8-byte aligned (and we've set GSR.ALIGN)
- ! %d32 I/O already loaded with SRC data from [%o0-8]
- ! %o2 I/O count (number of bytes that need to be written)
- ! %o3 I Not written. If zero, then SRC is double aligned.
- ! %o4 I Not written. Holds fprs.
- ! %o5 O The number of doubles that remain to be written.
-
- ! Load the rest of the current block
- ! Recall that %o0 is further into SRC than %o1 is into DST
-
- prefetch [%o1 + (0 * VIS_BLOCKSIZE)], #n_writes
- prefetch [%o1 + (1 * VIS_BLOCKSIZE)], #n_writes
- prefetch [%o1 + (2 * VIS_BLOCKSIZE)], #n_writes
- ldda [%o0]%asi, %d34
- prefetch [%o0 + (3 * VIS_BLOCKSIZE)], #one_read
- ldda [%o0 + 0x8]%asi, %d36
- faligndata %d32, %d34, %d48
- ldda [%o0 + 0x10]%asi, %d38
- faligndata %d34, %d36, %d50
- ldda [%o0 + 0x18]%asi, %d40
- faligndata %d36, %d38, %d52
- ldda [%o0 + 0x20]%asi, %d42
- or %g0, -8, %o5 ! if %o3 >= 0, %o5 = -8
- prefetch [%o0 + (4 * VIS_BLOCKSIZE)], #one_read
- faligndata %d38, %d40, %d54
- ldda [%o0 + 0x28]%asi, %d44
- movrlz %o3, %g0, %o5 ! if %o3 < 0, %o5 = 0 (needed lter)
- faligndata %d40, %d42, %d56
- ldda [%o0 + 0x30]%asi, %d46
- faligndata %d42, %d44, %d58
- ldda [%o0 + 0x38]%asi, %d32
- sub %o2, VIS_BLOCKSIZE, %o2 ! update count
- prefetch [%o0 + (5 * VIS_BLOCKSIZE)], #one_read
- add %o0, VIS_BLOCKSIZE, %o0 ! update SRC
-
- ! Main loop. Write previous block. Load rest of current block.
- ! Some bytes will be loaded that won't yet be written.
-1:
- ldda [%o0]%asi, %d34
- faligndata %d44, %d46, %d60
- ldda [%o0 + 0x8]%asi, %d36
- faligndata %d46, %d32, %d62
- std %d48, [%o1]
- std %d50, [%o1+8]
- std %d52, [%o1+16]
- std %d54, [%o1+24]
- std %d56, [%o1+32]
- std %d58, [%o1+40]
- std %d60, [%o1+48]
- std %d62, [%o1+56]
- sub %o2, VIS_BLOCKSIZE, %o2 ! update count
- prefetch [%o1 + (6 * VIS_BLOCKSIZE)], #n_writes
- prefetch [%o1 + (3 * VIS_BLOCKSIZE)], #n_writes
- add %o1, VIS_BLOCKSIZE, %o1 ! update DST
- ldda [%o0 + 0x10]%asi, %d38
- faligndata %d32, %d34, %d48
- ldda [%o0 + 0x18]%asi, %d40
- faligndata %d34, %d36, %d50
- ldda [%o0 + 0x20]%asi, %d42
- faligndata %d36, %d38, %d52
- ldda [%o0 + 0x28]%asi, %d44
- faligndata %d38, %d40, %d54
- ldda [%o0 + 0x30]%asi, %d46
- faligndata %d40, %d42, %d56
- ldda [%o0 + 0x38]%asi, %d32
- faligndata %d42, %d44, %d58
- cmp %o2, VIS_BLOCKSIZE + 8
- prefetch [%o0 + (5 * VIS_BLOCKSIZE)], #one_read
- bgu,pt %ncc, 1b
- add %o0, VIS_BLOCKSIZE, %o0 ! update SRC
- faligndata %d44, %d46, %d60
- faligndata %d46, %d32, %d62
- stda %d48, [%o1]ASI_BLK_P ! store 64 bytes, bypass cache
- cmp %o2, VIS_BLOCKSIZE
- bne %ncc, 2f ! exactly 1 block remaining?
- add %o1, VIS_BLOCKSIZE, %o1 ! update DST
- brz,a %o3, 3f ! is SRC double aligned?
- ldd [%o0], %d34
-
-2:
- add %o5, %o2, %o5 ! %o5 was already set to 0 or -8
- add %o5, %o3, %o5
-
- ba .beginmedloop_bcopy
- andn %o5, 7, %o5 ! 8 byte aligned count
-
- ! This is when there is exactly 1 block remaining and SRC is aligned
-3:
- ! %d32 was loaded in the last iteration of the loop above, and
- ! %d34 was loaded in the branch delay slot that got us here.
- ldd [%o0 + 0x08], %d36
- ldd [%o0 + 0x10], %d38
- ldd [%o0 + 0x18], %d40
- ldd [%o0 + 0x20], %d42
- ldd [%o0 + 0x28], %d44
- ldd [%o0 + 0x30], %d46
- stda %d32, [%o1]ASI_BLK_P
-
- ba exit_bcopy
- nop
-
- .align 16
- ! two nops here causes loop starting at 1f below to be
- ! on a cache line boundary, improving performance
- nop
- nop
-xlarge:
-.xlarge_bcopy:
- /*
- set 4096, %l2
- subcc %o2, %l2, %g0
- bge %ncc, size_ge_4k
- nop
- */
- ! %o1 I/O DST is 64-byte aligned
- ! %o0 I/O 8-byte aligned (and we've set GSR.ALIGN)
- ! %d32 I/O already loaded with SRC data from [%o0-8]
- ! %o2 I/O count (number of bytes that need to be written)
- ! %o3 I Not written. If zero, then SRC is double aligned.
- ! %o4 I Not written. Holds fprs.
- ! %o5 O The number of doubles that remain to be written.
-
- ! Load the rest of the current block
- ! Recall that %o0 is further into SRC than %o1 is into DST
-
- ! prefetch [%o0 + (3 * VIS_BLOCKSIZE)], #one_read
- ! executed in delay slot for branch to .xlarge
- prefetch [%o0 + (4 * VIS_BLOCKSIZE)], #one_read
- prefetch [%o0 + (5 * VIS_BLOCKSIZE)], #one_read
- ldda [%o0]%asi, %d34
- prefetch [%o0 + (6 * VIS_BLOCKSIZE)], #one_read
- ldda [%o0 + 0x8]%asi, %d36
- faligndata %d32, %d34, %d48
- ldda [%o0 + 0x10]%asi, %d38
- faligndata %d34, %d36, %d50
- ldda [%o0 + 0x18]%asi, %d40
- faligndata %d36, %d38, %d52
- ldda [%o0 + 0x20]%asi, %d42
- or %g0, -8, %o5 ! if %o3 >= 0, %o5 = -8
- faligndata %d38, %d40, %d54
- ldda [%o0 + 0x28]%asi, %d44
- movrlz %o3, %g0, %o5 ! if %o3 < 0, %o5 = 0 (needed later)
- faligndata %d40, %d42, %d56
- ldda [%o0 + 0x30]%asi, %d46
- faligndata %d42, %d44, %d58
- ldda [%o0 + 0x38]%asi, %d32
- sub %o2, VIS_BLOCKSIZE, %o2 ! update count
- prefetch [%o0 + (7 * VIS_BLOCKSIZE)], #one_read
- add %o0, VIS_BLOCKSIZE, %o0 ! update SRC
-
- ! This point is 32-byte aligned since 24 instructions appear since
- ! the previous alignment directive.
-
-
- ! Main loop. Write previous block. Load rest of current block.
- ! Some bytes will be loaded that won't yet be written.
-1:
- ldda [%o0]%asi, %d34
- faligndata %d44, %d46, %d60
- ldda [%o0 + 0x8]%asi, %d36
- faligndata %d46, %d32, %d62
- stda %d48, [%o1]ASI_BLK_P
- sub %o2, VIS_BLOCKSIZE, %o2 ! update count
- ldda [%o0 + 0x10]%asi, %d38
- faligndata %d32, %d34, %d48
- ldda [%o0 + 0x18]%asi, %d40
- faligndata %d34, %d36, %d50
- ldda [%o0 + 0x20]%asi, %d42
- faligndata %d36, %d38, %d52
- ldda [%o0 + 0x28]%asi, %d44
- faligndata %d38, %d40, %d54
- ldda [%o0 + 0x30]%asi, %d46
- faligndata %d40, %d42, %d56
- ldda [%o0 + 0x38]%asi, %d32
- faligndata %d42, %d44, %d58
- ! offset of 8*BLK+8 bytes works best over range of (src-dst) mod 1K
- prefetch [%o0 + (8 * VIS_BLOCKSIZE) + 8], #one_read
- add %o1, VIS_BLOCKSIZE, %o1 ! update DST
- cmp %o2, VIS_BLOCKSIZE + 8
- ! second prefetch important to correct for occasional dropped
- ! initial prefetches, 5*BLK works best over range of (src-dst) mod 1K
- ! strong prefetch prevents drops on Panther, but Jaguar and earlier
- ! US-III models treat strong prefetches as weak prefetchs
- ! to avoid regressions on customer hardware, we retain the prefetch
- prefetch [%o0 + (5 * VIS_BLOCKSIZE)], #one_read
- bgu,pt %ncc, 1b
- add %o0, VIS_BLOCKSIZE, %o0 ! update SRC
-
- faligndata %d44, %d46, %d60
- faligndata %d46, %d32, %d62
- stda %d48, [%o1]ASI_BLK_P ! store 64 bytes, bypass cache
- cmp %o2, VIS_BLOCKSIZE
- bne %ncc, 2f ! exactly 1 block remaining?
- add %o1, VIS_BLOCKSIZE, %o1 ! update DST
- brz,a %o3, 3f ! is SRC double aligned?
- ldd [%o0], %d34
-
-2:
- add %o5, %o2, %o5 ! %o5 was already set to 0 or -8
- add %o5, %o3, %o5
-
-
- ba .beginmedloop_bcopy
- andn %o5, 7, %o5 ! 8 byte aligned count
-
-
- ! This is when there is exactly 1 block remaining and SRC is aligned
-3:
- ! %d32 was loaded in the last iteration of the loop above, and
- ! %d34 was loaded in the branch delay slot that got us here.
- ldd [%o0 + 0x08], %d36
- ldd [%o0 + 0x10], %d38
- ldd [%o0 + 0x18], %d40
- ldd [%o0 + 0x20], %d42
- ldd [%o0 + 0x28], %d44
- ldd [%o0 + 0x30], %d46
- stda %d32, [%o1]ASI_BLK_P
-
- ba exit_bcopy
- nop
-
-copying_ge_512:
- ! both src and dst are aligned to 8 byte boundary
- ! and the number of bytes to copy is 512 or more.
- ! %o5 has the saved T_LOFAULT when we come here.
- ! We set a new error handler if the T_LOFAULT was set earlier OR
- ! KCOPY_FLAG is set.
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- mov %i5, %l6
- andn %l6, TRAMP_FLAG, %o2
- brz,pt %o2, 1f
- nop
- ! We enter here if KCOPY_FLAG was set OR
- ! T_LOFAULT was set earlier.
- ! We only change the error handler pointer here.
- ! The flags TRAMP_FLAG or KCOPY_FLAG is left as it is in %l6.
- sethi %hi(.copyerr_fp_used), %o2
- or %o2, %lo(.copyerr_fp_used), %o2
- membar #Sync ! sync error barrier
- stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
-1:
- FP_NOMIGRATE(6, 7)
- mov %i0, %o0
- mov %i1, %o1
- mov %i2, %o2
- mov %i3, %o3
- mov %i5, %o5
- rd %fprs, %o5 ! check for unused fp
- st %o5, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %o5
- bz,a,pt %icc, 1f
- wr %g0, FPRS_FEF, %fprs
-
-
- ! save the FP registers even if DU is not set.
-
- BST_FPQ3Q4_TOSTACK(%o5)
- or %l6, FPSAVED_FLAG, %l6
-1:
- rd %gsr, %o5
- stx %o5, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
- or %l6, FPUSED_FLAG, %l6
- !prefetch 256 bytes from nearest 128 byte aligned src buf
- sub %o0,1,%o3
- andn %o3,0x7f,%l1
- add %l1,128,%l1
- prefetch [%l1],2
- prefetch [%l1+64],2
- prefetch [%l1+(2*64)],2
- prefetch [%l1+(3*64)],2
- !prefetch 256 bytes from nearest 128 byte aligned dst buf
- sub %o1,1,%o3
- andn %o3,0x7f,%l1
- add %l1,128,%l1
- prefetch [%l1],2
- prefetch [%l1+64],2
- prefetch [%l1+(2*64)],2
- prefetch [%l1+(3*64)],2
-
- andcc %o1,0x7f,%o3 !Check if buffers are 128 byte aligned
- brz,pn %o3,aligned_on_128
- sub %o3,128,%o3
-
- add %o2,%o3,%o2
-align_to_128:
- ldxa [%o0]ASI_CACHE_SPARING_P, %o4
- add %o0,8,%o0 ! increment src pointer
- stxa %o4,[%o1]ASI_CACHE_SPARING_P
- addcc %o3,8,%o3
- bl,pt %ncc,align_to_128
- add %o1,8,%o1 ! increment dst pointer
-
-aligned_on_128:
- andcc %o1,0x1ff,%o3 !Check if buffers are 512 byte aligned.
- brnz,pn %o3, 4f
- mov %o2,%l4 !l4=number of bytes to copy
- ! buffers are now 512 byte aligned.
- ! if we have 4096 or more bytes to copy we will use the
- ! stingray_optimized_copy
- set 4096, %l2
- subcc %o2, %l2, %g0
- bge,pn %ncc, stingray_optimized_copy
- nop
-4:
- ! determine how many bytes are left to be copied after the buffers
- ! are aligned to 512 byte boundary.
- ! if we have 4096 or more then we can perform stingray_optimized_copy
- ! register l4 will contain the number of bytes to copy after buffers\
- ! are aligned to 512 byte boundary. l4 is set to 0 if we have less than
- ! 4096 bytes to copy after aligning buffers to 512 byte.
- sub %o1,8,%o5 ! should be in current 512 chunk
- andn %o5,0x1ff,%o3 ! %o3=aligned 512b addr
- add %o3,0x200,%o3 ! %o3=next aligned 512b addr
- sub %o3,%o1,%o3 ! %o3=how many bytes to copy for 512 byte
- ! alignment
- sub %o2,%o3,%l4 ! l4=bytes to copy after aligning buffers to 512
- ! if l4 is < 4096 do interleave128_copy only.
- set 4096, %l2
- subcc %l4, %l2, %g0
- bge,pn %ncc,6f
- nop
- mov %g0, %l4
- add %o1, %o2, %l1
- ba interleave128_copy
- nop
-6:
- mov %o3, %o2
- subcc %o3,256,%g0 !use interleave128_copy if 256 or more
- bl,pn %ncc,copy_word !o.w use copy_word to finish the 512 byte alignment.
- !%o2=new count i.e how many bytes to write
- add %o1,%o2,%l1 !cal the last byte to write %l1
- ba interleave128_copy
- nop
-
- .align 64
-interleave128_copy:
- ! %l1 has the addr of the dest. buffer at or beyond which no write
- ! is to be done.
- ! %l4 has the number of bytes to zero using stingray_optimized_bzero
- !prefetch src
-
- add %o0, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o0, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o0, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o0, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
-
- !prefetch dst
-
- add %o1, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o1, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o1, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o1, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
-
- ldxa [%o0]ASI_CACHE_SPARING_P, %o4
- stxa %o4,[%o1]ASI_CACHE_SPARING_P
- add %o0, 128, %o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, 128, %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (1 * 8), %o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (1 * 8), %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (1 * 8 + 128), %o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (1 * 8 + 128), %o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (2 * 8),%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (2 * 8),%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (2 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (2 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (3 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (3 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (3 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (3 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (4 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (4 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (4 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (4 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (5 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (5 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (5 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (5 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (6 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (6 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (6 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (6 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (7 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (7 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (7 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (7 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (8 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (8 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (8 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (8 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (9 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (9 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (9 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (9 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (10 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (10 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (10 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (10 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (11 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (11 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (11 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (11 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (12 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (12 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (12 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (12 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (13 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (13 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (13 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (13 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (14 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (14 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (14 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (14 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (15 * 8) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (15 * 8) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, (15 * 8 + 128) ,%o3
- ldxa [%o3]ASI_CACHE_SPARING_P, %o4
- add %o1, (15 * 8 + 128) ,%o3
- stxa %o4,[%o3]ASI_CACHE_SPARING_P
- add %o0, 256, %o0
-
- ! check if the next 256 byte copy will not exceed the number of
- ! bytes remaining to be copied.
- ! %l2 points to the dest buffer after copying 256 bytes more.
- ! %l1 points to dest. buffer at or beyond which no writes should be done.
- add %o1,512,%l2
- subcc %l1,%l2,%g0
- bge,pt %ncc,interleave128_copy
- add %o1,256,%o1
-
-copy_word:
- and %o2,255,%o3
- and %o3,7,%o2
-
- ! Set the remaining doubles
- subcc %o3, 8, %o3 ! Can we store any doubles?
- bl,pn %ncc, 6f
- and %o2, 7, %o2 ! calc bytes left after doubles
-
- !prefetch src
-
- mov %o0, %o4
- prefetch [%o4], 2 !1st 64 byte line of next 256 byte block
- add %o0, 128, %o4
- prefetch [%o4], 2 !3rd 64 byte line of next 256 byte block
- add %o0, 64, %o4
- prefetch [%o4], 2 !2nd 64 byte line of next 256 byte block
- add %o0, 192, %o4
- prefetch [%o4], 2 !4th 64 byte line of next 256 byte block
-
- !prefetch dst
-
- mov %o1, %o4
- prefetch [%o4], 2 !1st 64 byte line of next 256 byte block
- add %o1, 128, %o4
- prefetch [%o4], 2 !3rd 64 byte line of next 256 byte block
- add %o1, 64, %o4
- prefetch [%o4], 2 !2nd 64 byte line of next 256 byte block
- add %o1, 192, %o4
- prefetch [%o4], 2 !4th 64 byte line of next 256 byte block
-
-5:
- ldxa [%o0]ASI_CACHE_SPARING_P, %o4
- add %o0, 8, %o0
- stxa %o4, [%o1]ASI_CACHE_SPARING_P
- subcc %o3, 8, %o3
- bge,pt %ncc, 5b
- add %o1, 8, %o1
-6:
- ! Set the remaining bytes
- brz %o2, can_we_do_stingray_optimized_copy
- nop
-
-7:
- deccc %o2 ! byte clearing loop
- ldub [%o0], %o4 ! load one byte
- stb %o4, [%o1]
- inc %o1 ! increment dst
- bgu,pt %ncc, 7b
- inc %o0 ! increment src
-
-can_we_do_stingray_optimized_copy:
- ! %l4 contains the number of bytes to be copied
- mov %l4, %o2
- brnz,pn %o2, stingray_optimized_copy
- nop
-
-exit:
- membar #Sync
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o5 ! restore gsr
- wr %o5, 0, %gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- ! No need to restore regs if they were not saved
- btst FPSAVED_FLAG, %l6
- bz %ncc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o5)
-
- ba,pt %ncc, 5f
- wr %o3, 0, %fprs ! restore fprs
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-5:
- membar #Sync ! sync error barrier
- andn %l6, MASK_FLAGS, %l6
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- FP_ALLOWMIGRATE(6, 7)
- ret
- restore %g0, 0, %o0
-
-
-stingray_optimized_copy:
- ! This code tries to maximize bandwidth by being clever about
- ! accessing the two cache lines that are BUDDY PAIRS in the L3 cache.
- ! THIS VERSION IS OPTIMIZED FOR THE CASE OF SWAPPING PA BITS 6 and 9.
- ! To keep this code simple, we assume the addresses given are aligned
- ! at least on a 128-byte boundary, and the length is assumed to be
- ! a multiple of 4k bytes.
- ! THIS VERSION USES BLKSTORES, AND PREFETCHES BOTH SOURCE AND
- ! DESTINATION DATA.
-
- add %o1, %l4, %o2
-
- !save original value of %o0 so we can restore it.
- or %g0,%o0,%l2
-
- wr %g0,ASI_BLK_P,%asi
-
- prefetch [%o0+0],2
- prefetch [%o0+(64*1)],2
- prefetch [%o0+(64*2)],2
- prefetch [%o0+(64*3)],2
- prefetch [%o0+(64*4)],2
- prefetch [%o0+(64*5)],2
- prefetch [%o0+(64*6)],2
- prefetch [%o0+(64*7)],2
- prefetch [%o0+(64*8)],2
- prefetch [%o0+(64*9)],2
- prefetch [%o0+(64*10)],2
- prefetch [%o0+(64*11)],2
- prefetch [%o0+(64*12)],2
- prefetch [%o0+(64*13)],2
- prefetch [%o0+(64*14)],2
- prefetch [%o0+(64*15)],2
-
- prefetch [%o1+0],2
- prefetch [%o1+(64*1)],2
- prefetch [%o1+(64*2)],2
- prefetch [%o1+(64*3)],2
- prefetch [%o1+(64*4)],2
- prefetch [%o1+(64*5)],2
- prefetch [%o1+(64*6)],2
- prefetch [%o1+(64*7)],2
- prefetch [%o1+(64*8)],2
- prefetch [%o1+(64*9)],2
- prefetch [%o1+(64*10)],2
- prefetch [%o1+(64*11)],2
- prefetch [%o1+(64*12)],2
- prefetch [%o1+(64*13)],2
- prefetch [%o1+(64*14)],2
- prefetch [%o1+(64*15)],2
-
- ba stingray_optimized_4k_copy_loop
- srl %l4, 12, %l4
-
- ! Local register usage:
- ! %l1 address at short distance ahead of current src buf for prefetching
- ! into L1 cache.
- ! %l2 address at far ahead of current src buf for prefetching
- ! into L2 cache.
- ! %l3 save %o1 at start of inner loop.
- ! %l4 Number of 4k blocks to copy
- ! %g1 save src buf pointer at start of inner loop.
- ! %l5 iteration counter to make buddy loop execute 2 times.
- ! %o5 iteration counter to make inner loop execute 4 times.
- ! %l7 address at far ahead of current dst buf for prefetching dest
- ! into L2 cache.
-
- .align 64
-stingray_optimized_4k_copy_loop:
- set 2, %l5 ! %l5 is the loop count for the buddy loop
- add %o1, 0, %l3
- add %o0, 0, %g1
-buddyloop_bcopy:
- set PF_FAR, %g5
- add %o0, %g5, %l2 ! Set %l2 to far ahead of src buffer to prefetch
- ! For prefetching into L1 D$, set %l1 a little ahead of src buffer
- add %o0, PF_NEAR, %l1
- add %o1, %g5, %l7 ! Set %l7 to far ahead of dst buffer to prefetch
-
- add %l2, %g5, %g5 ! %g5 is now double far ahead of the src buffer
- prefetch [%g5+%g0],2 ! Prefetch ahead to get TLB entry in advance.
- set 2*PF_FAR, %g5
- add %o1, %g5, %g5 ! %g5 is now double far ahead of the dst buffer
- prefetch [%g5+%g0],2 ! Prefetch ahead to get TLB entry in advance.
-
- set 4,%o5 ! %o5 = loop count for the inner loop
- set 0, %g5
-
- ! Each iteration of the inner loop below copies 8 sequential lines.
- ! This loop is iterated 4 times, to move a total of 32 lines, all of
- ! which have the same value of PA[9], so we increment the base
- ! address by 1024 bytes in each iteration, which varies PA[10].
-innerloop_bcopy:
- ! copy line 1 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 2 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 3 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 4 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 5 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 6 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 7 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
- add %g5, 64, %g5
- add %o1, 64, %o1
- add %o0, 64, %o0
-
- ! copy line 8 of 8
- prefetch [%l2+%g5],2
- prefetch [%l7+%g5],2
- prefetch [%l1+%g5],1
-
- ldd [%o0],%d32
- ldd [%o0+8],%d34
- ldd [%o0+16],%d36
- ldd [%o0+24],%d38
- ldd [%o0+32],%d40
- ldd [%o0+40],%d42
- ldd [%o0+48],%d44
- ldd [%o0+56],%d46
- stda %d32,[%o1+0] %asi
-
- subcc %o5,1,%o5 ! Decrement the inner loop counter.
-
- ! Now increment by 64 + 512 so we don't toggle PA[9]
-
- add %g5, 576, %g5
- add %o1, 576, %o1 ! increment dst buffer
-
- bg,pt %icc,innerloop_bcopy
- add %o0, 576, %o0 ! increment src buffer
- ! END OF INNER LOOP
-
-
- subcc %l5,1,%l5
- add %l3, 512, %o1 ! increment dst buf to the first buddy line
- bg,pt %icc,buddyloop_bcopy
- add %g1, 512 ,%o0 ! increment src buf to the first buddy lines. */
-
- subcc %l4, 1, %l4
- add %o1, 3584, %o1 ! Advance src and dst buffers by 4k
- add %o0, 3584, %o0 ! They were already incremented by 512,
- ! so just add 3584.
-
- bg,pt %icc,stingray_optimized_4k_copy_loop
- nop
-
- ! End of stingray_optimized_copy
- ! if we have 256 or more bytes to copy we use interleave128_copy
- ! else we use copy_word
-
- sub %o2,%o1,%o2 ! bytes remaining to be copied
- brz,pn %o2,exit
- mov %g0,%l4
- add %o1,%o2,%l1 !cal the last byte to write %l1
- subcc %o2,256,%g0
- bge,pt %ncc,interleave128_copy
- mov %g0, %l4
-
- ba copy_word
- nop
-
- SET_SIZE(bcopy)
- SET_SIZE(__align_cpy_1)
-#endif /* lint */
-
-#define REALSRC %i0
-#define DST %i1
-#define CNT %i2
-#define SRC %i3
-#define TMP %i5
-
-/*
- * Block copy with possibly overlapped operands.
- */
-
-#if defined(lint)
-
-/*ARGSUSED*/
-void
-ovbcopy(const void *from, void *to, size_t count)
-{}
-
-#else /* lint */
-
- ENTRY(ovbcopy)
- tst %o2 ! check count
- bgu,a %ncc, 1f ! nothing to do or bad arguments
- subcc %o0, %o1, %o3 ! difference of from and to address
-
- retl ! return
- nop
-1:
- bneg,a %ncc, 2f
- neg %o3 ! if < 0, make it positive
-2: cmp %o2, %o3 ! cmp size and abs(from - to)
- bleu %ncc, bcopy ! if size <= abs(diff): use bcopy,
- .empty ! no overlap
- cmp %o0, %o1 ! compare from and to addresses
- blu %ncc, .ov_bkwd ! if from < to, copy backwards
- nop
- !
- ! Copy forwards.
- !
-.ov_fwd:
- ldub [%o0], %o3 ! read from address
- inc %o0 ! inc from address
- stb %o3, [%o1] ! write to address
- deccc %o2 ! dec count
- bgu %ncc, .ov_fwd ! loop till done
- inc %o1 ! inc to address
-
- retl ! return
- nop
- !
- ! Copy backwards.
- !
-.ov_bkwd:
- deccc %o2 ! dec count
- ldub [%o0 + %o2], %o3 ! get byte at end of src
- bgu %ncc, .ov_bkwd ! loop till done
- stb %o3, [%o1 + %o2] ! delay slot, store at end of dst
-
- retl ! return
- nop
-
- SET_SIZE(ovbcopy)
-
-#endif /* lint */
-
-
-/*
- * hwblkpagecopy()
- *
- * Copies exactly one page. This routine assumes the caller (ppcopy)
- * has already disabled kernel preemption and has checked
- * use_hw_bcopy. Preventing preemption also prevents cpu migration.
- */
-#ifdef lint
-/*ARGSUSED*/
-void
-hwblkpagecopy(const void *src, void *dst)
-{ }
-#else /* lint */
- ENTRY(hwblkpagecopy)
- ! get another window w/space for three aligned blocks of saved fpregs
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
-
- ! %i0 - source address (arg)
- ! %i1 - destination address (arg)
- ! %i2 - length of region (not arg)
- ! %l0 - saved fprs
- ! %l1 - pointer to saved fpregs
-
- rd %fprs, %l0 ! check for unused fp
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %l0
- bz,a,pt %icc, 1f
- wr %g0, FPRS_FEF, %fprs
-
- ! save the FP registers even if DU is not set.
-
- BST_FPQ3Q4_TOSTACK(%l1)
-
-1: set PAGESIZE, CNT
- mov %i1, %o0 ! store destination address for flushing
- mov REALSRC, SRC
-
- prefetch [SRC], #one_read
- prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
- prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
- prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
- ldd [SRC], %d32
-#if FIRST_PREFETCH > 4
- prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
-#endif
- ldd [SRC + 0x08], %d34
-#if FIRST_PREFETCH > 5
- prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
-#endif
- ldd [SRC + 0x10], %d36
-#if FIRST_PREFETCH > 6
- prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
-#endif
- faligndata %d32, %d34, %d48
- ldd [SRC + 0x18], %d38
-#if FIRST_PREFETCH > 7
- prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
-#endif
- faligndata %d34, %d36, %d50
- ldd [SRC + 0x20], %d40
- faligndata %d36, %d38, %d52
- ldd [SRC + 0x28], %d42
- faligndata %d38, %d40, %d54
- ldd [SRC + 0x30], %d44
- faligndata %d40, %d42, %d56
- ldd [SRC + 0x38], %d46
- faligndata %d42, %d44, %d58
- ldd [SRC + VIS_BLOCKSIZE], %d32
- sub CNT, VIS_BLOCKSIZE, CNT
- add SRC, VIS_BLOCKSIZE, SRC
- ba,a,pt %ncc, 2f
- nop
- .align ICACHE_LINE_SIZE
-2:
- ldd [SRC + 0x08], %d34
- faligndata %d44, %d46, %d60
- ldd [SRC + 0x10], %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_P
- ldd [SRC + 0x18], %d38
- faligndata %d32, %d34, %d48
- ldd [SRC + 0x20], %d40
- faligndata %d34, %d36, %d50
- ldd [SRC + 0x28], %d42
- faligndata %d36, %d38, %d52
- ldd [SRC + 0x30], %d44
- faligndata %d38, %d40, %d54
- ldd [SRC + 0x38], %d46
- faligndata %d40, %d42, %d56
- ldd [SRC + VIS_BLOCKSIZE], %d32
- faligndata %d42, %d44, %d58
- prefetch [SRC + ((FIRST_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
- sub CNT, VIS_BLOCKSIZE, CNT
- add DST, VIS_BLOCKSIZE, DST
- cmp CNT, VIS_BLOCKSIZE + 8
- prefetch [SRC + ((SECOND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
- bgu,pt %ncc, 2b
- add SRC, VIS_BLOCKSIZE, SRC
-
- ! trailing block
- ldd [SRC + 0x08], %d34
- faligndata %d44, %d46, %d60
- ldd [SRC + 0x10], %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_P
- ldd [SRC + 0x18], %d38
- ldd [SRC + 0x20], %d40
- ldd [SRC + 0x28], %d42
- ldd [SRC + 0x30], %d44
- ldd [SRC + 0x38], %d46
- sub CNT, VIS_BLOCKSIZE, CNT
- add DST, VIS_BLOCKSIZE, DST
- add SRC, VIS_BLOCKSIZE, SRC
- stda %d32, [DST]ASI_BLK_P
-
- set PAGESIZE, %o1
- call rock_sync_icache
- nop
-
- membar #Sync
-
- btst FPRS_FEF, %l0
- bz,pt %icc, 2f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%l3)
- ba 3f
- nop
-
-2: FZEROQ3Q4
-
-3: wr %l0, 0, %fprs ! restore fprs
- ret
- restore %g0, 0, %o0
-
- SET_SIZE(hwblkpagecopy)
-#endif /* lint */
-
-
-/*
- * Transfer data to and from user space -
- * Note that these routines can cause faults
- * It is assumed that the kernel has nothing at
- * less than KERNELBASE in the virtual address space.
- *
- * Note that copyin(9F) and copyout(9F) are part of the
- * DDI/DKI which specifies that they return '-1' on "errors."
- *
- * Sigh.
- *
- * So there's two extremely similar routines - xcopyin() and xcopyout()
- * which return the errno that we've faithfully computed. This
- * allows other callers (e.g. uiomove(9F)) to work correctly.
- * Given that these are used pretty heavily, we expand the calling
- * sequences inline for all flavours (rather than making wrappers).
- *
- * There are also stub routines for xcopyout_little and xcopyin_little,
- * which currently are intended to handle requests of <= 16 bytes from
- * do_unaligned. Future enhancement to make them handle 8k pages efficiently
- * is left as an exercise...
- */
-
-/*
- * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
- *
- * General theory of operation:
- *
- * The only difference between copy{in,out} and
- * xcopy{in,out} is in the error handling routine they invoke
- * when a memory access error occurs. xcopyOP returns the errno
- * while copyOP returns -1 (see above). copy{in,out}_noerr set
- * a special flag (by oring the TRAMP_FLAG into the fault handler address)
- * if they are called with a fault handler already in place. That flag
- * causes the default handlers to trampoline to the previous handler
- * upon an error.
- *
- * None of the copyops routines grab a window until it's decided that
- * we need to do a HW block copy operation. This saves a window
- * spill/fill when we're called during socket ops. The typical IO
- * path won't cause spill/fill traps.
- *
- * This code uses a set of 4 limits for the maximum size that will
- * be copied given a particular input/output address alignment.
- * If the value for a particular limit is zero, the copy will be performed
- * by the plain copy loops rather than FPBLK.
- *
- * See the description of bcopy above for more details of the
- * data copying algorithm and the default limits.
- *
- */
-
-/*
- * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
- */
-
-#if defined(lint)
-
-
-#else /* lint */
-/*
- * We save the arguments in the following registers in case of a fault:
- * kaddr - %l1
- * uaddr - %l2
- * count - %l3
- */
-#define SAVE_SRC %l1
-#define SAVE_DST %l2
-#define SAVE_COUNT %l3
-
-#define SM_SAVE_SRC %g4
-#define SM_SAVE_DST %g5
-#define SM_SAVE_COUNT %o5
-#define ERRNO %l5
-
-
-#define REAL_LOFAULT %l4
-/*
- * Generic copyio fault handler. This is the first line of defense when a
- * fault occurs in (x)copyin/(x)copyout. In order for this to function
- * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
- * This allows us to share common code for all the flavors of the copy
- * operations, including the _noerr versions.
- *
- * Note that this function will restore the original input parameters before
- * calling REAL_LOFAULT. So the real handler can vector to the appropriate
- * member of the t_copyop structure, if needed.
- */
- ENTRY(copyio_fault)
- membar #Sync
- mov %g1,ERRNO ! save errno in ERRNO
- btst FPUSED_FLAG, %l6
- bz %ncc, 1f
- nop
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
- wr %o2, 0, %gsr ! restore gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- btst FPRS_FEF, %o3
- bz,pt %icc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o2)
-
- ba,pt %ncc, 1f
- wr %o3, 0, %fprs ! restore fprs
-
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-
-1:
- andn %l6, FPUSED_FLAG, %l6
- membar #Sync
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- FP_ALLOWMIGRATE(5, 6)
-
- mov SAVE_SRC, %i0
- mov SAVE_DST, %i1
- jmp REAL_LOFAULT
- mov SAVE_COUNT, %i2
-
- SET_SIZE(copyio_fault)
-
-
-#endif
-
-#if defined(lint)
-
-/*ARGSUSED*/
-int
-copyout(const void *kaddr, void *uaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(copyout)
-
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .copyout_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .copyout_8 ! check for longword alignment
- nop
- btst 1, %o3 !
- bz,pt %ncc, .copyout_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .copyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_more ! otherwise go to large copy
- nop
-.copyout_2:
- btst 3, %o3 !
- bz,pt %ncc, .copyout_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .copyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_more ! otherwise go to large copy
- nop
-.copyout_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .copyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_more ! otherwise go to large copy
- nop
-.copyout_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .copyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_more ! otherwise go to large copy
- nop
-
- .align 16
- nop ! instruction alignment
- ! see discussion at start of file
-.copyout_small:
- sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault
- or %o5, %lo(.sm_copyout_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
-.sm_do_copyout:
- mov %o0, SM_SAVE_SRC
- mov %o1, SM_SAVE_DST
- cmp %o2, SHORTCOPY ! check for really short case
- bleu,pt %ncc, .co_sm_left !
- mov %o2, SM_SAVE_COUNT
- cmp %o2, CHKSIZE ! check for medium length cases
- bgu,pn %ncc, .co_med !
- or %o0, %o1, %o3 ! prepare alignment check
- andcc %o3, 0x3, %g0 ! test for alignment
- bz,pt %ncc, .co_sm_word ! branch to word aligned case
-.co_sm_movebytes:
- sub %o2, 3, %o2 ! adjust count to allow cc zero test
-.co_sm_notalign4:
- ldub [%o0], %o3 ! read byte
- subcc %o2, 4, %o2 ! reduce count by 4
- stba %o3, [%o1]ASI_USER ! write byte
- inc %o1 ! advance DST by 1
- ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes
- add %o0, 4, %o0 ! advance SRC by 4
- stba %o3, [%o1]ASI_USER
- inc %o1 ! advance DST by 1
- ldub [%o0 - 2], %o3
- stba %o3, [%o1]ASI_USER
- inc %o1 ! advance DST by 1
- ldub [%o0 - 1], %o3
- stba %o3, [%o1]ASI_USER
- bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain
- inc %o1 ! advance DST by 1
- add %o2, 3, %o2 ! restore count
-.co_sm_left:
- tst %o2
- bz,pt %ncc, .co_sm_exit ! check for zero length
- nop
- ldub [%o0], %o3 ! load one byte
- deccc %o2 ! reduce count for cc test
- bz,pt %ncc, .co_sm_exit
- stba %o3,[%o1]ASI_USER ! store one byte
- ldub [%o0 + 1], %o3 ! load second byte
- deccc %o2
- inc %o1
- bz,pt %ncc, .co_sm_exit
- stba %o3,[%o1]ASI_USER ! store second byte
- ldub [%o0 + 2], %o3 ! load third byte
- inc %o1
- stba %o3,[%o1]ASI_USER ! store third byte
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
- .align 16
-.co_sm_words:
- lduw [%o0], %o3 ! read word
-.co_sm_wordx:
- subcc %o2, 8, %o2 ! update count
- stwa %o3, [%o1]ASI_USER ! write word
- add %o0, 8, %o0 ! update SRC
- lduw [%o0 - 4], %o3 ! read word
- add %o1, 4, %o1 ! update DST
- stwa %o3, [%o1]ASI_USER ! write word
- bgt,pt %ncc, .co_sm_words ! loop til done
- add %o1, 4, %o1 ! update DST
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .co_sm_exit
- nop
- deccc %o2
- bz,pt %ncc, .co_sm_byte
-.co_sm_half:
- subcc %o2, 2, %o2 ! reduce count by 2
- lduh [%o0], %o3 ! read half word
- add %o0, 2, %o0 ! advance SRC by 2
- stha %o3, [%o1]ASI_USER ! write half word
- bgt,pt %ncc, .co_sm_half ! loop til done
- add %o1, 2, %o1 ! advance DST by 2
- addcc %o2, 1, %o2 ! restore count
- bz,pt %ncc, .co_sm_exit
- nop
-.co_sm_byte:
- ldub [%o0], %o3
- stba %o3, [%o1]ASI_USER
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
- .align 16
-.co_sm_word:
- subcc %o2, 4, %o2 ! update count
- bgt,pt %ncc, .co_sm_wordx
- lduw [%o0], %o3 ! read word
- addcc %o2, 3, %o2 ! restore count
- bz,pt %ncc, .co_sm_exit
- stwa %o3, [%o1]ASI_USER ! write word
- deccc %o2 ! reduce count for cc test
- ldub [%o0 + 4], %o3 ! load one byte
- add %o1, 4, %o1
- bz,pt %ncc, .co_sm_exit
- stba %o3, [%o1]ASI_USER ! store one byte
- ldub [%o0 + 5], %o3 ! load second byte
- deccc %o2
- inc %o1
- bz,pt %ncc, .co_sm_exit
- stba %o3, [%o1]ASI_USER ! store second byte
- ldub [%o0 + 6], %o3 ! load third byte
- inc %o1
- stba %o3, [%o1]ASI_USER ! store third byte
-.co_sm_exit:
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
-
- .align 16
-.co_med:
- xor %o0, %o1, %o3 ! setup alignment check
- btst 1, %o3
- bnz,pt %ncc, .co_sm_movebytes ! unaligned
- nop
- btst 3, %o3
- bnz,pt %ncc, .co_med_half ! halfword aligned
- nop
- btst 7, %o3
- bnz,pt %ncc, .co_med_word ! word aligned
- nop
-.co_med_long:
- btst 3, %o0 ! check for
- bz,pt %ncc, .co_med_long1 ! word alignment
- nop
-.co_med_long0:
- ldub [%o0], %o3 ! load one byte
- inc %o0
- stba %o3,[%o1]ASI_USER ! store byte
- inc %o1
- btst 3, %o0
- bnz,pt %ncc, .co_med_long0
- dec %o2
-.co_med_long1: ! word aligned
- btst 7, %o0 ! check for long word
- bz,pt %ncc, .co_med_long2
- nop
- lduw [%o0], %o3 ! load word
- add %o0, 4, %o0 ! advance SRC by 4
- stwa %o3, [%o1]ASI_USER ! store word
- add %o1, 4, %o1 ! advance DST by 4
- sub %o2, 4, %o2 ! reduce count by 4
-!
-! Now long word aligned and have at least 32 bytes to move
-!
-.co_med_long2:
- sub %o2, 31, %o2 ! adjust count to allow cc zero test
- sub %o1, 8, %o1 ! adjust pointer to allow store in
- ! branch delay slot instead of add
-.co_med_lmove:
- add %o1, 8, %o1 ! advance DST by 8
- ldx [%o0], %o3 ! read long word
- subcc %o2, 32, %o2 ! reduce count by 32
- stxa %o3, [%o1]ASI_USER ! write long word
- add %o1, 8, %o1 ! advance DST by 8
- ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words
- add %o0, 32, %o0 ! advance SRC by 32
- stxa %o3, [%o1]ASI_USER
- ldx [%o0 - 16], %o3
- add %o1, 8, %o1 ! advance DST by 8
- stxa %o3, [%o1]ASI_USER
- ldx [%o0 - 8], %o3
- add %o1, 8, %o1 ! advance DST by 8
- bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left
- stxa %o3, [%o1]ASI_USER
- add %o1, 8, %o1 ! advance DST by 8
- addcc %o2, 24, %o2 ! restore count to long word offset
- ble,pt %ncc, .co_med_lextra ! check for more long words to move
- nop
-.co_med_lword:
- ldx [%o0], %o3 ! read long word
- subcc %o2, 8, %o2 ! reduce count by 8
- stxa %o3, [%o1]ASI_USER ! write long word
- add %o0, 8, %o0 ! advance SRC by 8
- bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left
- add %o1, 8, %o1 ! advance DST by 8
-.co_med_lextra:
- addcc %o2, 7, %o2 ! restore rest of count
- bz,pt %ncc, .co_sm_exit ! if zero, then done
- deccc %o2
- bz,pt %ncc, .co_sm_byte
- nop
- ba,pt %ncc, .co_sm_half
- nop
-
- .align 16
- nop ! instruction alignment
- ! see discussion at start of file
-.co_med_word:
- btst 3, %o0 ! check for
- bz,pt %ncc, .co_med_word1 ! word alignment
- nop
-.co_med_word0:
- ldub [%o0], %o3 ! load one byte
- inc %o0
- stba %o3,[%o1]ASI_USER ! store byte
- inc %o1
- btst 3, %o0
- bnz,pt %ncc, .co_med_word0
- dec %o2
-!
-! Now word aligned and have at least 36 bytes to move
-!
-.co_med_word1:
- sub %o2, 15, %o2 ! adjust count to allow cc zero test
-.co_med_wmove:
- lduw [%o0], %o3 ! read word
- subcc %o2, 16, %o2 ! reduce count by 16
- stwa %o3, [%o1]ASI_USER ! write word
- add %o1, 4, %o1 ! advance DST by 4
- lduw [%o0 + 4], %o3 ! repeat for a total for 4 words
- add %o0, 16, %o0 ! advance SRC by 16
- stwa %o3, [%o1]ASI_USER
- add %o1, 4, %o1 ! advance DST by 4
- lduw [%o0 - 8], %o3
- stwa %o3, [%o1]ASI_USER
- add %o1, 4, %o1 ! advance DST by 4
- lduw [%o0 - 4], %o3
- stwa %o3, [%o1]ASI_USER
- bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left
- add %o1, 4, %o1 ! advance DST by 4
- addcc %o2, 12, %o2 ! restore count to word offset
- ble,pt %ncc, .co_med_wextra ! check for more words to move
- nop
-.co_med_word2:
- lduw [%o0], %o3 ! read word
- subcc %o2, 4, %o2 ! reduce count by 4
- stwa %o3, [%o1]ASI_USER ! write word
- add %o0, 4, %o0 ! advance SRC by 4
- bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left
- add %o1, 4, %o1 ! advance DST by 4
-.co_med_wextra:
- addcc %o2, 3, %o2 ! restore rest of count
- bz,pt %ncc, .co_sm_exit ! if zero, then done
- deccc %o2
- bz,pt %ncc, .co_sm_byte
- nop
- ba,pt %ncc, .co_sm_half
- nop
-
- .align 16
- nop ! instruction alignment
- nop ! see discussion at start of file
- nop
-.co_med_half:
- btst 1, %o0 ! check for
- bz,pt %ncc, .co_med_half1 ! half word alignment
- nop
- ldub [%o0], %o3 ! load one byte
- inc %o0
- stba %o3,[%o1]ASI_USER ! store byte
- inc %o1
- dec %o2
-!
-! Now half word aligned and have at least 38 bytes to move
-!
-.co_med_half1:
- sub %o2, 7, %o2 ! adjust count to allow cc zero test
-.co_med_hmove:
- lduh [%o0], %o3 ! read half word
- subcc %o2, 8, %o2 ! reduce count by 8
- stha %o3, [%o1]ASI_USER ! write half word
- add %o1, 2, %o1 ! advance DST by 2
- lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords
- add %o0, 8, %o0 ! advance SRC by 8
- stha %o3, [%o1]ASI_USER
- add %o1, 2, %o1 ! advance DST by 2
- lduh [%o0 - 4], %o3
- stha %o3, [%o1]ASI_USER
- add %o1, 2, %o1 ! advance DST by 2
- lduh [%o0 - 2], %o3
- stha %o3, [%o1]ASI_USER
- bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left
- add %o1, 2, %o1 ! advance DST by 2
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .co_sm_exit
- deccc %o2
- bz,pt %ncc, .co_sm_byte
- nop
- ba,pt %ncc, .co_sm_half
- nop
-
-/*
- * We got here because of a fault during short copyout.
- * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
- */
-.sm_copyout_err:
- membar #Sync
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- mov SM_SAVE_SRC, %o0
- mov SM_SAVE_DST, %o1
- mov SM_SAVE_COUNT, %o2
- ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
- tst %o3
- bz,pt %ncc, 3f ! if not, return error
- nop
- ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with
- jmp %o5 ! original arguments
- nop
-3:
- retl
- or %g0, -1, %o0 ! return error value
-
- SET_SIZE(copyout)
-
-/*
- * The _more entry points are not intended to be used directly by
- * any caller from outside this file. They are provided to allow
- * profiling and dtrace of the portions of the copy code that uses
- * the floating point registers.
- * This entry is particularly important as DTRACE (at least as of
- * 4/2004) does not support leaf functions.
- */
-
- ENTRY(copyout_more)
-.copyout_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- set .copyout_err, REAL_LOFAULT
-
-/*
- * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes
- */
-.do_copyout:
- set copyio_fault, %l7 ! .copyio_fault is lofault val
-
- ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
- membar #Sync ! sync error barrier
- stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
-
- mov %i0, SAVE_SRC
- mov %i1, SAVE_DST
- mov %i2, SAVE_COUNT
-
- FP_NOMIGRATE(6, 7)
-
- rd %fprs, %o2 ! check for unused fp
- st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %o2
- bz,a,pt %icc, .do_blockcopyout
- wr %g0, FPRS_FEF, %fprs
-
- ! save the FP registers even if DU is not set.
-
- BST_FPQ3Q4_TOSTACK(%o2)
-
-.do_blockcopyout:
- rd %gsr, %o2
- stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
- or %l6, FPUSED_FLAG, %l6
-
- andcc DST, VIS_BLOCKSIZE - 1, TMP
- mov ASI_USER, %asi
- bz,pt %ncc, 2f
- neg TMP
- add TMP, VIS_BLOCKSIZE, TMP
-
- ! TMP = bytes required to align DST on FP_BLOCK boundary
- ! Using SRC as a tmp here
- cmp TMP, 3
- bleu,pt %ncc, 1f
- sub CNT,TMP,CNT ! adjust main count
- sub TMP, 3, TMP ! adjust for end of loop test
-.co_blkalign:
- ldub [REALSRC], SRC ! move 4 bytes per loop iteration
- stba SRC, [DST]%asi
- subcc TMP, 4, TMP
- ldub [REALSRC + 1], SRC
- add REALSRC, 4, REALSRC
- stba SRC, [DST + 1]%asi
- ldub [REALSRC - 2], SRC
- add DST, 4, DST
- stba SRC, [DST - 2]%asi
- ldub [REALSRC - 1], SRC
- bgu,pt %ncc, .co_blkalign
- stba SRC, [DST - 1]%asi
-
- addcc TMP, 3, TMP ! restore count adjustment
- bz,pt %ncc, 2f ! no bytes left?
- nop
-1: ldub [REALSRC], SRC
- inc REALSRC
- inc DST
- deccc TMP
- bgu %ncc, 1b
- stba SRC, [DST - 1]%asi
-
-2:
- andn REALSRC, 0x7, SRC
- alignaddr REALSRC, %g0, %g0
-
- ! SRC - 8-byte aligned
- ! DST - 64-byte aligned
- prefetch [SRC], #one_read
- prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
- prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
- prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
- ldd [SRC], %d32
-#if FIRST_PREFETCH > 4
- prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
-#endif
- ldd [SRC + 0x08], %d34
-#if FIRST_PREFETCH > 5
- prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
-#endif
- ldd [SRC + 0x10], %d36
-#if FIRST_PREFETCH > 6
- prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
-#endif
- faligndata %d32, %d34, %d48
- ldd [SRC + 0x18], %d38
-#if FIRST_PREFETCH > 7
- prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
-#endif
- faligndata %d34, %d36, %d50
- ldd [SRC + 0x20], %d40
- faligndata %d36, %d38, %d52
- ldd [SRC + 0x28], %d42
- faligndata %d38, %d40, %d54
- ldd [SRC + 0x30], %d44
- faligndata %d40, %d42, %d56
- ldd [SRC + 0x38], %d46
- faligndata %d42, %d44, %d58
- ldd [SRC + VIS_BLOCKSIZE], %d32
- sub CNT, VIS_BLOCKSIZE, CNT
- add SRC, VIS_BLOCKSIZE, SRC
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- ba,a,pt %ncc, 1f
- nop
- .align ICACHE_LINE_SIZE
-1:
- ldd [SRC + 0x08], %d34
- faligndata %d44, %d46, %d60
- ldd [SRC + 0x10], %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_AIUS
- ldd [SRC + 0x18], %d38
- faligndata %d32, %d34, %d48
- ldd [SRC + 0x20], %d40
- faligndata %d34, %d36, %d50
- ldd [SRC + 0x28], %d42
- faligndata %d36, %d38, %d52
- ldd [SRC + 0x30], %d44
- faligndata %d38, %d40, %d54
- ldd [SRC + 0x38], %d46
- faligndata %d40, %d42, %d56
- sub CNT, VIS_BLOCKSIZE, CNT
- ldd [SRC + VIS_BLOCKSIZE], %d32
- faligndata %d42, %d44, %d58
- prefetch [SRC + ((FIRST_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
- add DST, VIS_BLOCKSIZE, DST
- prefetch [SRC + ((SECOND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- cmp CNT, VIS_BLOCKSIZE + 8
- bgu,pt %ncc, 1b
- add SRC, VIS_BLOCKSIZE, SRC
-
- ! only if REALSRC & 0x7 is 0
- cmp CNT, VIS_BLOCKSIZE
- bne %ncc, 3f
- andcc REALSRC, 0x7, %g0
- bz,pt %ncc, 2f
- nop
-3:
- faligndata %d44, %d46, %d60
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_AIUS
- add DST, VIS_BLOCKSIZE, DST
- ba,pt %ncc, 3f
- nop
-2:
- ldd [SRC + 0x08], %d34
- faligndata %d44, %d46, %d60
- ldd [SRC + 0x10], %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_AIUS
- ldd [SRC + 0x18], %d38
- ldd [SRC + 0x20], %d40
- ldd [SRC + 0x28], %d42
- ldd [SRC + 0x30], %d44
- ldd [SRC + 0x38], %d46
- sub CNT, VIS_BLOCKSIZE, CNT
- add DST, VIS_BLOCKSIZE, DST
- add SRC, VIS_BLOCKSIZE, SRC
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- stda %d32, [DST]ASI_BLK_AIUS
- add DST, VIS_BLOCKSIZE, DST
- ba,a,pt %ncc, 4f
- nop
-
-3: tst CNT
- bz,a %ncc, 4f
- nop
-
-5: ldub [REALSRC], TMP
- inc REALSRC
- inc DST
- deccc CNT
- bgu %ncc, 5b
- stba TMP, [DST - 1]%asi
-4:
-
-.copyout_exit:
- membar #Sync
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
- wr %o2, 0, %gsr ! restore gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- btst FPRS_FEF, %o3
- bz,pt %icc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o2)
-
- ba,pt %ncc, 1f
- wr %o3, 0, %fprs ! restore fprs
-
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-
-1:
- membar #Sync
- andn %l6, FPUSED_FLAG, %l6
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- FP_ALLOWMIGRATE(5, 6)
- ret
- restore %g0, 0, %o0
-
-/*
- * We got here because of a fault during copyout.
- * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
- */
-.copyout_err:
- ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
- tst %o4
- bz,pt %ncc, 2f ! if not, return error
- nop
- ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with
- jmp %g2 ! original arguments
- restore %g0, 0, %g0 ! dispose of copy window
-2:
- ret
- restore %g0, -1, %o0 ! return error value
-
-
- SET_SIZE(copyout_more)
-
-#endif /* lint */
-
-
-#ifdef lint
-
-/*ARGSUSED*/
-int
-xcopyout(const void *kaddr, void *uaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(xcopyout)
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .xcopyout_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .xcopyout_8 !
- nop
- btst 1, %o3 !
- bz,pt %ncc, .xcopyout_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyout_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
- nop
-.xcopyout_2:
- btst 3, %o3 !
- bz,pt %ncc, .xcopyout_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyout_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
- nop
-.xcopyout_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyout_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
- nop
-.xcopyout_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .xcopyout_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyout_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyout_more ! otherwise go to large copy
- nop
-
-.xcopyout_small:
- sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault
- or %o5, %lo(.sm_xcopyout_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler
- membar #Sync ! sync error barrier
- ba,pt %ncc, .sm_do_copyout ! common code
- stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault
-
-.xcopyout_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- sethi %hi(.xcopyout_err), REAL_LOFAULT
- ba,pt %ncc, .do_copyout ! common code
- or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
-
-/*
- * We got here because of fault during xcopyout
- * Errno value is in ERRNO
- */
-.xcopyout_err:
- ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
- tst %o4
- bz,pt %ncc, 2f ! if not, return error
- nop
- ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with
- jmp %g2 ! original arguments
- restore %g0, 0, %g0 ! dispose of copy window
-2:
- ret
- restore ERRNO, 0, %o0 ! return errno value
-
-.sm_xcopyout_err:
-
- membar #Sync
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- mov SM_SAVE_SRC, %o0
- mov SM_SAVE_DST, %o1
- mov SM_SAVE_COUNT, %o2
- ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
- tst %o3
- bz,pt %ncc, 3f ! if not, return error
- nop
- ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with
- jmp %o5 ! original arguments
- nop
-3:
- retl
- or %g1, 0, %o0 ! return errno value
-
- SET_SIZE(xcopyout)
-
-#endif /* lint */
-
-#ifdef lint
-
-/*ARGSUSED*/
-int
-xcopyout_little(const void *kaddr, void *uaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(xcopyout_little)
- sethi %hi(.xcopyio_err), %o5
- or %o5, %lo(.xcopyio_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT]
- mov %o4, %o5
-
- subcc %g0, %o2, %o3
- add %o0, %o2, %o0
- bz,pn %ncc, 2f ! check for zero bytes
- sub %o2, 1, %o4
- add %o0, %o4, %o0 ! start w/last byte
- add %o1, %o2, %o1
- ldub [%o0 + %o3], %o4
-
-1: stba %o4, [%o1 + %o3]ASI_AIUSL
- inccc %o3
- sub %o0, 2, %o0 ! get next byte
- bcc,a,pt %ncc, 1b
- ldub [%o0 + %o3], %o4
-
-2:
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return (0)
-
- SET_SIZE(xcopyout_little)
-
-#endif /* lint */
-
-/*
- * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
- */
-
-#if defined(lint)
-
-/*ARGSUSED*/
-int
-copyin(const void *uaddr, void *kaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(copyin)
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .copyin_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .copyin_8 ! check for longword alignment
- nop
- btst 1, %o3 !
- bz,pt %ncc, .copyin_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .copyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_more ! otherwise go to large copy
- nop
-.copyin_2:
- btst 3, %o3 !
- bz,pt %ncc, .copyin_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .copyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_more ! otherwise go to large copy
- nop
-.copyin_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .copyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_more ! otherwise go to large copy
- nop
-.copyin_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .copyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_more ! otherwise go to large copy
- nop
-
- .align 16
- nop ! instruction alignment
- ! see discussion at start of file
-.copyin_small:
- sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault
- or %o5, %lo(.sm_copyin_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT]
-.sm_do_copyin:
- mov %o0, SM_SAVE_SRC
- mov %o1, SM_SAVE_DST
- cmp %o2, SHORTCOPY ! check for really short case
- bleu,pt %ncc, .ci_sm_left !
- mov %o2, SM_SAVE_COUNT
- cmp %o2, CHKSIZE ! check for medium length cases
- bgu,pn %ncc, .ci_med !
- or %o0, %o1, %o3 ! prepare alignment check
- andcc %o3, 0x3, %g0 ! test for alignment
- bz,pt %ncc, .ci_sm_word ! branch to word aligned case
-.ci_sm_movebytes:
- sub %o2, 3, %o2 ! adjust count to allow cc zero test
-.ci_sm_notalign4:
- lduba [%o0]ASI_USER, %o3 ! read byte
- subcc %o2, 4, %o2 ! reduce count by 4
- stb %o3, [%o1] ! write byte
- add %o0, 1, %o0 ! advance SRC by 1
- lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes
- add %o0, 1, %o0 ! advance SRC by 1
- stb %o3, [%o1 + 1]
- add %o1, 4, %o1 ! advance DST by 4
- lduba [%o0]ASI_USER, %o3
- add %o0, 1, %o0 ! advance SRC by 1
- stb %o3, [%o1 - 2]
- lduba [%o0]ASI_USER, %o3
- add %o0, 1, %o0 ! advance SRC by 1
- bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain
- stb %o3, [%o1 - 1]
- add %o2, 3, %o2 ! restore count
-.ci_sm_left:
- tst %o2
- bz,pt %ncc, .ci_sm_exit
- nop
- lduba [%o0]ASI_USER, %o3 ! load one byte
- deccc %o2 ! reduce count for cc test
- bz,pt %ncc, .ci_sm_exit
- stb %o3,[%o1] ! store one byte
- inc %o0
- lduba [%o0]ASI_USER, %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .ci_sm_exit
- stb %o3,[%o1 + 1] ! store second byte
- inc %o0
- lduba [%o0]ASI_USER, %o3 ! load third byte
- stb %o3,[%o1 + 2] ! store third byte
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
- .align 16
-.ci_sm_words:
- lduwa [%o0]ASI_USER, %o3 ! read word
-.ci_sm_wordx:
- subcc %o2, 8, %o2 ! update count
- stw %o3, [%o1] ! write word
- add %o0, 4, %o0 ! update SRC
- add %o1, 8, %o1 ! update DST
- lduwa [%o0]ASI_USER, %o3 ! read word
- add %o0, 4, %o0 ! update SRC
- bgt,pt %ncc, .ci_sm_words ! loop til done
- stw %o3, [%o1 - 4] ! write word
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .ci_sm_exit
- nop
- deccc %o2
- bz,pt %ncc, .ci_sm_byte
-.ci_sm_half:
- subcc %o2, 2, %o2 ! reduce count by 2
- lduha [%o0]ASI_USER, %o3 ! read half word
- add %o0, 2, %o0 ! advance SRC by 2
- add %o1, 2, %o1 ! advance DST by 2
- bgt,pt %ncc, .ci_sm_half ! loop til done
- sth %o3, [%o1 - 2] ! write half word
- addcc %o2, 1, %o2 ! restore count
- bz,pt %ncc, .ci_sm_exit
- nop
-.ci_sm_byte:
- lduba [%o0]ASI_USER, %o3
- stb %o3, [%o1]
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
- .align 16
-.ci_sm_word:
- subcc %o2, 4, %o2 ! update count
- bgt,pt %ncc, .ci_sm_wordx
- lduwa [%o0]ASI_USER, %o3 ! read word
- addcc %o2, 3, %o2 ! restore count
- bz,pt %ncc, .ci_sm_exit
- stw %o3, [%o1] ! write word
- deccc %o2 ! reduce count for cc test
- add %o0, 4, %o0
- lduba [%o0]ASI_USER, %o3 ! load one byte
- bz,pt %ncc, .ci_sm_exit
- stb %o3, [%o1 + 4] ! store one byte
- inc %o0
- lduba [%o0]ASI_USER, %o3 ! load second byte
- deccc %o2
- bz,pt %ncc, .ci_sm_exit
- stb %o3, [%o1 + 5] ! store second byte
- inc %o0
- lduba [%o0]ASI_USER, %o3 ! load third byte
- stb %o3, [%o1 + 6] ! store third byte
-.ci_sm_exit:
- membar #Sync ! sync error barrier
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return 0
-
- .align 16
-.ci_med:
- xor %o0, %o1, %o3 ! setup alignment check
- btst 1, %o3
- bnz,pt %ncc, .ci_sm_movebytes ! unaligned
- nop
- btst 3, %o3
- bnz,pt %ncc, .ci_med_half ! halfword aligned
- nop
- btst 7, %o3
- bnz,pt %ncc, .ci_med_word ! word aligned
- nop
-.ci_med_long:
- btst 3, %o0 ! check for
- bz,pt %ncc, .ci_med_long1 ! word alignment
- nop
-.ci_med_long0:
- lduba [%o0]ASI_USER, %o3 ! load one byte
- inc %o0
- stb %o3,[%o1] ! store byte
- inc %o1
- btst 3, %o0
- bnz,pt %ncc, .ci_med_long0
- dec %o2
-.ci_med_long1: ! word aligned
- btst 7, %o0 ! check for long word
- bz,pt %ncc, .ci_med_long2
- nop
- lduwa [%o0]ASI_USER, %o3 ! load word
- add %o0, 4, %o0 ! advance SRC by 4
- stw %o3, [%o1] ! store word
- add %o1, 4, %o1 ! advance DST by 4
- sub %o2, 4, %o2 ! reduce count by 4
-!
-! Now long word aligned and have at least 32 bytes to move
-!
-.ci_med_long2:
- sub %o2, 31, %o2 ! adjust count to allow cc zero test
-.ci_med_lmove:
- ldxa [%o0]ASI_USER, %o3 ! read long word
- subcc %o2, 32, %o2 ! reduce count by 32
- stx %o3, [%o1] ! write long word
- add %o0, 8, %o0 ! advance SRC by 8
- ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words
- add %o0, 8, %o0 ! advance SRC by 8
- stx %o3, [%o1 + 8]
- add %o1, 32, %o1 ! advance DST by 32
- ldxa [%o0]ASI_USER, %o3
- add %o0, 8, %o0 ! advance SRC by 8
- stx %o3, [%o1 - 16]
- ldxa [%o0]ASI_USER, %o3
- add %o0, 8, %o0 ! advance SRC by 8
- bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left
- stx %o3, [%o1 - 8]
- addcc %o2, 24, %o2 ! restore count to long word offset
- ble,pt %ncc, .ci_med_lextra ! check for more long words to move
- nop
-.ci_med_lword:
- ldxa [%o0]ASI_USER, %o3 ! read long word
- subcc %o2, 8, %o2 ! reduce count by 8
- stx %o3, [%o1] ! write long word
- add %o0, 8, %o0 ! advance SRC by 8
- bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left
- add %o1, 8, %o1 ! advance DST by 8
-.ci_med_lextra:
- addcc %o2, 7, %o2 ! restore rest of count
- bz,pt %ncc, .ci_sm_exit ! if zero, then done
- deccc %o2
- bz,pt %ncc, .ci_sm_byte
- nop
- ba,pt %ncc, .ci_sm_half
- nop
-
- .align 16
- nop ! instruction alignment
- ! see discussion at start of file
-.ci_med_word:
- btst 3, %o0 ! check for
- bz,pt %ncc, .ci_med_word1 ! word alignment
- nop
-.ci_med_word0:
- lduba [%o0]ASI_USER, %o3 ! load one byte
- inc %o0
- stb %o3,[%o1] ! store byte
- inc %o1
- btst 3, %o0
- bnz,pt %ncc, .ci_med_word0
- dec %o2
-!
-! Now word aligned and have at least 36 bytes to move
-!
-.ci_med_word1:
- sub %o2, 15, %o2 ! adjust count to allow cc zero test
-.ci_med_wmove:
- lduwa [%o0]ASI_USER, %o3 ! read word
- subcc %o2, 16, %o2 ! reduce count by 16
- stw %o3, [%o1] ! write word
- add %o0, 4, %o0 ! advance SRC by 4
- lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words
- add %o0, 4, %o0 ! advance SRC by 4
- stw %o3, [%o1 + 4]
- add %o1, 16, %o1 ! advance DST by 16
- lduwa [%o0]ASI_USER, %o3
- add %o0, 4, %o0 ! advance SRC by 4
- stw %o3, [%o1 - 8]
- lduwa [%o0]ASI_USER, %o3
- add %o0, 4, %o0 ! advance SRC by 4
- bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left
- stw %o3, [%o1 - 4]
- addcc %o2, 12, %o2 ! restore count to word offset
- ble,pt %ncc, .ci_med_wextra ! check for more words to move
- nop
-.ci_med_word2:
- lduwa [%o0]ASI_USER, %o3 ! read word
- subcc %o2, 4, %o2 ! reduce count by 4
- stw %o3, [%o1] ! write word
- add %o0, 4, %o0 ! advance SRC by 4
- bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left
- add %o1, 4, %o1 ! advance DST by 4
-.ci_med_wextra:
- addcc %o2, 3, %o2 ! restore rest of count
- bz,pt %ncc, .ci_sm_exit ! if zero, then done
- deccc %o2
- bz,pt %ncc, .ci_sm_byte
- nop
- ba,pt %ncc, .ci_sm_half
- nop
-
- .align 16
- nop ! instruction alignment
- ! see discussion at start of file
-.ci_med_half:
- btst 1, %o0 ! check for
- bz,pt %ncc, .ci_med_half1 ! half word alignment
- nop
- lduba [%o0]ASI_USER, %o3 ! load one byte
- inc %o0
- stb %o3,[%o1] ! store byte
- inc %o1
- dec %o2
-!
-! Now half word aligned and have at least 38 bytes to move
-!
-.ci_med_half1:
- sub %o2, 7, %o2 ! adjust count to allow cc zero test
-.ci_med_hmove:
- lduha [%o0]ASI_USER, %o3 ! read half word
- subcc %o2, 8, %o2 ! reduce count by 8
- sth %o3, [%o1] ! write half word
- add %o0, 2, %o0 ! advance SRC by 2
- lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords
- add %o0, 2, %o0 ! advance SRC by 2
- sth %o3, [%o1 + 2]
- add %o1, 8, %o1 ! advance DST by 8
- lduha [%o0]ASI_USER, %o3
- add %o0, 2, %o0 ! advance SRC by 2
- sth %o3, [%o1 - 4]
- lduha [%o0]ASI_USER, %o3
- add %o0, 2, %o0 ! advance SRC by 2
- bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left
- sth %o3, [%o1 - 2]
- addcc %o2, 7, %o2 ! restore count
- bz,pt %ncc, .ci_sm_exit
- deccc %o2
- bz,pt %ncc, .ci_sm_byte
- nop
- ba,pt %ncc, .ci_sm_half
- nop
-
-.sm_copyin_err:
- membar #Sync
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- mov SM_SAVE_SRC, %o0
- mov SM_SAVE_DST, %o1
- mov SM_SAVE_COUNT, %o2
- ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
- tst %o3
- bz,pt %ncc, 3f ! if not, return error
- nop
- ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with
- jmp %o5 ! original arguments
- nop
-3:
- retl
- or %g0, -1, %o0 ! return errno value
-
- SET_SIZE(copyin)
-
-
-/*
- * The _more entry points are not intended to be used directly by
- * any caller from outside this file. They are provided to allow
- * profiling and dtrace of the portions of the copy code that uses
- * the floating point registers.
- * This entry is particularly important as DTRACE (at least as of
- * 4/2004) does not support leaf functions.
- */
-
- ENTRY(copyin_more)
-.copyin_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- set .copyin_err, REAL_LOFAULT
-
-/*
- * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes
- */
-.do_copyin:
- set copyio_fault, %l7 ! .copyio_fault is lofault val
-
- ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler
- membar #Sync ! sync error barrier
- stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault
-
- mov %i0, SAVE_SRC
- mov %i1, SAVE_DST
- mov %i2, SAVE_COUNT
-
- FP_NOMIGRATE(6, 7)
-
- rd %fprs, %o2 ! check for unused fp
- st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %o2
- bz,a,pt %icc, .do_blockcopyin
- wr %g0, FPRS_FEF, %fprs
-
- ! save the FP registers even if DU is not set.
-
- BST_FPQ3Q4_TOSTACK(%o2)
-
-.do_blockcopyin:
- rd %gsr, %o2
- stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr
- or %l6, FPUSED_FLAG, %l6
-
- andcc DST, VIS_BLOCKSIZE - 1, TMP
- mov ASI_USER, %asi
- bz,pt %ncc, 2f
- neg TMP
- add TMP, VIS_BLOCKSIZE, TMP
-
- ! TMP = bytes required to align DST on FP_BLOCK boundary
- ! Using SRC as a tmp here
- cmp TMP, 3
- bleu,pt %ncc, 1f
- sub CNT,TMP,CNT ! adjust main count
- sub TMP, 3, TMP ! adjust for end of loop test
-.ci_blkalign:
- lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration
- stb SRC, [DST]
- subcc TMP, 4, TMP
- lduba [REALSRC + 1]%asi, SRC
- add REALSRC, 4, REALSRC
- stb SRC, [DST + 1]
- lduba [REALSRC - 2]%asi, SRC
- add DST, 4, DST
- stb SRC, [DST - 2]
- lduba [REALSRC - 1]%asi, SRC
- bgu,pt %ncc, .ci_blkalign
- stb SRC, [DST - 1]
-
- addcc TMP, 3, TMP ! restore count adjustment
- bz,pt %ncc, 2f ! no bytes left?
- nop
-1: lduba [REALSRC]%asi, SRC
- inc REALSRC
- inc DST
- deccc TMP
- bgu %ncc, 1b
- stb SRC, [DST - 1]
-
-2:
- andn REALSRC, 0x7, SRC
- alignaddr REALSRC, %g0, %g0
-
- ! SRC - 8-byte aligned
- ! DST - 64-byte aligned
- prefetcha [SRC]%asi, #one_read
- prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read
- prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read
- prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read
- ldda [SRC]%asi, %d32
-#if FIRST_PREFETCH > 4
- prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read
-#endif
- ldda [SRC + 0x08]%asi, %d34
-#if FIRST_PREFETCH > 5
- prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read
-#endif
- ldda [SRC + 0x10]%asi, %d36
-#if FIRST_PREFETCH > 6
- prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read
-#endif
- faligndata %d32, %d34, %d48
- ldda [SRC + 0x18]%asi, %d38
-#if FIRST_PREFETCH > 7
- prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read
-#endif
- faligndata %d34, %d36, %d50
- ldda [SRC + 0x20]%asi, %d40
- faligndata %d36, %d38, %d52
- ldda [SRC + 0x28]%asi, %d42
- faligndata %d38, %d40, %d54
- ldda [SRC + 0x30]%asi, %d44
- faligndata %d40, %d42, %d56
- ldda [SRC + 0x38]%asi, %d46
- faligndata %d42, %d44, %d58
- ldda [SRC + VIS_BLOCKSIZE]%asi, %d32
- sub CNT, VIS_BLOCKSIZE, CNT
- add SRC, VIS_BLOCKSIZE, SRC
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- ba,a,pt %ncc, 1f
- nop
- .align ICACHE_LINE_SIZE
-1:
- ldda [SRC + 0x08]%asi, %d34
- faligndata %d44, %d46, %d60
- ldda [SRC + 0x10]%asi, %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_P
- ldda [SRC + 0x18]%asi, %d38
- faligndata %d32, %d34, %d48
- ldda [SRC + 0x20]%asi, %d40
- faligndata %d34, %d36, %d50
- ldda [SRC + 0x28]%asi, %d42
- faligndata %d36, %d38, %d52
- ldda [SRC + 0x30]%asi, %d44
- faligndata %d38, %d40, %d54
- ldda [SRC + 0x38]%asi, %d46
- faligndata %d40, %d42, %d56
- sub CNT, VIS_BLOCKSIZE, CNT
- ldda [SRC + VIS_BLOCKSIZE]%asi, %d32
- faligndata %d42, %d44, %d58
- prefetcha [SRC + ((FIRST_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read
- add DST, VIS_BLOCKSIZE, DST
- prefetcha [SRC + ((SECOND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- cmp CNT, VIS_BLOCKSIZE + 8
- bgu,pt %ncc, 1b
- add SRC, VIS_BLOCKSIZE, SRC
-
- ! only if REALSRC & 0x7 is 0
- cmp CNT, VIS_BLOCKSIZE
- bne %ncc, 3f
- andcc REALSRC, 0x7, %g0
- bz,pt %ncc, 2f
- nop
-3:
- faligndata %d44, %d46, %d60
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_P
- add DST, VIS_BLOCKSIZE, DST
- ba,pt %ncc, 3f
- nop
-2:
- ldda [SRC + 0x08]%asi, %d34
- faligndata %d44, %d46, %d60
- ldda [SRC + 0x10]%asi, %d36
- faligndata %d46, %d32, %d62
- stda %d48, [DST]ASI_BLK_P
- ldda [SRC + 0x18]%asi, %d38
- ldda [SRC + 0x20]%asi, %d40
- ldda [SRC + 0x28]%asi, %d42
- ldda [SRC + 0x30]%asi, %d44
- ldda [SRC + 0x38]%asi, %d46
- sub CNT, VIS_BLOCKSIZE, CNT
- add DST, VIS_BLOCKSIZE, DST
- add SRC, VIS_BLOCKSIZE, SRC
- add REALSRC, VIS_BLOCKSIZE, REALSRC
- stda %d32, [DST]ASI_BLK_P
- add DST, VIS_BLOCKSIZE, DST
- ba,a,pt %ncc, 4f
- nop
-
-3: tst CNT
- bz,a %ncc, 4f
- nop
-
-5: lduba [REALSRC]ASI_USER, TMP
- inc REALSRC
- inc DST
- deccc CNT
- bgu %ncc, 5b
- stb TMP, [DST - 1]
-4:
-
-.copyin_exit:
- membar #Sync
-
- ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr
- wr %o2, 0, %gsr
-
- ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
- btst FPRS_FEF, %o3
- bz,pt %icc, 4f
- nop
-
- BLD_FPQ3Q4_FROMSTACK(%o2)
-
- ba,pt %ncc, 1f
- wr %o3, 0, %fprs ! restore fprs
-
-4:
- FZEROQ3Q4
- wr %o3, 0, %fprs ! restore fprs
-
-1:
- membar #Sync ! sync error barrier
- andn %l6, FPUSED_FLAG, %l6
- stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- FP_ALLOWMIGRATE(5, 6)
- ret
- restore %g0, 0, %o0
-/*
- * We got here because of a fault during copyin
- * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
- */
-.copyin_err:
- ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
- tst %o4
- bz,pt %ncc, 2f ! if not, return error
- nop
- ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with
- jmp %g2 ! original arguments
- restore %g0, 0, %g0 ! dispose of copy window
-2:
- ret
- restore %g0, -1, %o0 ! return error value
-
-
- SET_SIZE(copyin_more)
-
-#endif /* lint */
-
-#ifdef lint
-
-/*ARGSUSED*/
-int
-xcopyin(const void *uaddr, void *kaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(xcopyin)
-
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .xcopyin_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .xcopyin_8 ! check for longword alignment
- nop
- btst 1, %o3 !
- bz,pt %ncc, .xcopyin_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyin_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
- nop
-.xcopyin_2:
- btst 3, %o3 !
- bz,pt %ncc, .xcopyin_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyin_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
- nop
-.xcopyin_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyin_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
- nop
-.xcopyin_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .xcopyin_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .xcopyin_small ! go to small copy
- nop
- ba,pt %ncc, .xcopyin_more ! otherwise go to large copy
- nop
-
-.xcopyin_small:
- sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value
- or %o5, %lo(.sm_xcopyin_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul
- membar #Sync ! sync error barrier
- ba,pt %ncc, .sm_do_copyin ! common code
- stn %o5, [THREAD_REG + T_LOFAULT]
-
-.xcopyin_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value
- ba,pt %ncc, .do_copyin
- or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
-
-/*
- * We got here because of fault during xcopyin
- * Errno value is in ERRNO
- */
-.xcopyin_err:
- ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler
- tst %o4
- bz,pt %ncc, 2f ! if not, return error
- nop
- ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with
- jmp %g2 ! original arguments
- restore %g0, 0, %g0 ! dispose of copy window
-2:
- ret
- restore ERRNO, 0, %o0 ! return errno value
-
-.sm_xcopyin_err:
-
- membar #Sync
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- mov SM_SAVE_SRC, %o0
- mov SM_SAVE_DST, %o1
- mov SM_SAVE_COUNT, %o2
- ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler
- tst %o3
- bz,pt %ncc, 3f ! if not, return error
- nop
- ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with
- jmp %o5 ! original arguments
- nop
-3:
- retl
- or %g1, 0, %o0 ! return errno value
-
- SET_SIZE(xcopyin)
-
-#endif /* lint */
-
-#ifdef lint
-
-/*ARGSUSED*/
-int
-xcopyin_little(const void *uaddr, void *kaddr, size_t count)
-{ return (0); }
-
-#else /* lint */
-
- ENTRY(xcopyin_little)
- sethi %hi(.xcopyio_err), %o5
- or %o5, %lo(.xcopyio_err), %o5
- ldn [THREAD_REG + T_LOFAULT], %o4
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT]
- mov %o4, %o5
-
- subcc %g0, %o2, %o3
- add %o0, %o2, %o0
- bz,pn %ncc, 2f ! check for zero bytes
- sub %o2, 1, %o4
- add %o0, %o4, %o0 ! start w/last byte
- add %o1, %o2, %o1
- lduba [%o0 + %o3]ASI_AIUSL, %o4
-
-1: stb %o4, [%o1 + %o3]
- inccc %o3
- sub %o0, 2, %o0 ! get next byte
- bcc,a,pt %ncc, 1b
- lduba [%o0 + %o3]ASI_AIUSL, %o4
-
-2:
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g0, %o0 ! return (0)
-
-.xcopyio_err:
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
- retl
- mov %g1, %o0
-
- SET_SIZE(xcopyin_little)
-
-#endif /* lint */
-
-
-/*
- * Copy a block of storage - must not overlap (from + len <= to).
- * No fault handler installed (to be called under on_fault())
- */
-#if defined(lint)
-
-/* ARGSUSED */
-void
-copyin_noerr(const void *ufrom, void *kto, size_t count)
-{}
-
-#else /* lint */
- ENTRY(copyin_noerr)
-
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .copyin_ne_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .copyin_ne_8 ! check for longword alignment
- nop
- btst 1, %o3 !
- bz,pt %ncc, .copyin_ne_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
- nop
-.copyin_ne_2:
- btst 3, %o3 !
- bz,pt %ncc, .copyin_ne_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
- nop
-.copyin_ne_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
- nop
-.copyin_ne_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyin_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy
- nop
-
-.copyin_ne_small:
- ldn [THREAD_REG + T_LOFAULT], %o4
- tst %o4
- bz,pn %ncc, .sm_do_copyin
- nop
- sethi %hi(.sm_copyio_noerr), %o5
- or %o5, %lo(.sm_copyio_noerr), %o5
- membar #Sync ! sync error barrier
- ba,pt %ncc, .sm_do_copyin
- stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
-
-.copyin_noerr_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- sethi %hi(.copyio_noerr), REAL_LOFAULT
- ba,pt %ncc, .do_copyin
- or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
-
-.copyio_noerr:
- jmp %l6
- restore %g0,0,%g0
-
-.sm_copyio_noerr:
- membar #Sync
- stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault
- jmp %o4
- nop
-
- SET_SIZE(copyin_noerr)
-#endif /* lint */
-
-/*
- * Copy a block of storage - must not overlap (from + len <= to).
- * No fault handler installed (to be called under on_fault())
- */
-
-#if defined(lint)
-
-/* ARGSUSED */
-void
-copyout_noerr(const void *kfrom, void *uto, size_t count)
-{}
-
-#else /* lint */
- ENTRY(copyout_noerr)
-
- cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case
- bleu,pt %ncc, .copyout_ne_small ! go to larger cases
- xor %o0, %o1, %o3 ! are src, dst alignable?
- btst 7, %o3 !
- bz,pt %ncc, .copyout_ne_8 ! check for longword alignment
- nop
- btst 1, %o3 !
- bz,pt %ncc, .copyout_ne_2 ! check for half-word
- nop
- sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_1)], %o3
- tst %o3
- bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
- nop
-.copyout_ne_2:
- btst 3, %o3 !
- bz,pt %ncc, .copyout_ne_4 ! check for word alignment
- nop
- sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_2)], %o3
- tst %o3
- bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
- nop
-.copyout_ne_4:
- ! already checked longword, must be word aligned
- sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_4)], %o3
- tst %o3
- bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
- nop
-.copyout_ne_8:
- sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit
- ld [%o3 + %lo(hw_copy_limit_8)], %o3
- tst %o3
- bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy
- cmp %o2, %o3 ! if length <= limit
- bleu,pt %ncc, .copyout_ne_small ! go to small copy
- nop
- ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy
- nop
-
-.copyout_ne_small:
- ldn [THREAD_REG + T_LOFAULT], %o4
- tst %o4
- bz,pn %ncc, .sm_do_copyout
- nop
- sethi %hi(.sm_copyio_noerr), %o5
- or %o5, %lo(.sm_copyio_noerr), %o5
- membar #Sync ! sync error barrier
- ba,pt %ncc, .sm_do_copyout
- stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault
-
-.copyout_noerr_more:
- save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
- sethi %hi(.copyio_noerr), REAL_LOFAULT
- ba,pt %ncc, .do_copyout
- or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
-
- SET_SIZE(copyout_noerr)
-#endif /* lint */
-
-
-/*
- * hwblkclr - clears block-aligned, block-multiple-sized regions that are
- * longer than 256 bytes in length using spitfire's block stores. If
- * the criteria for using this routine are not met then it calls bzero
- * and returns 1. Otherwise 0 is returned indicating success.
- * Caller is responsible for ensuring use_hw_bzero is true and that
- * kpreempt_disable() has been called.
- */
-#ifdef lint
-/*ARGSUSED*/
-int
-hwblkclr(void *addr, size_t len)
-{
- return(0);
-}
-#else /* lint */
- ! %i0 - start address
- ! %i1 - length of region (multiple of 64)
- ! %l0 - saved fprs
- ! %l1 - pointer to saved %d32 block
- ! %l2 - saved curthread->t_lwp
-
-
- ENTRY(hwblkclr)
- ! get another window w/space for one aligned block of saved fpregs
- save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
-
-#ifdef ROCK_CR_6654578
- ! Address aligned to 128 byte
- andcc %i0, ST_CACHE_ALIGN, %g0
- bnz,pn %ncc, .normal_hwblkclr
- nop
- ! multiple of 8k len, call page_hwblkclr
- set PAGE_MASK, %i3
- andcc %i1, %i3, %g0
- bnz,pn %ncc, .normal_hwblkclr
- nop
- mov %i0, %o0
- call page_hwblkclr
- mov %i1, %o1
- ret
- restore %g0, 0, %o0 ! I$ sync not required
-
-.normal_hwblkclr:
-#endif
- ! Must be block-aligned
- andcc %i0, (VIS_BLOCKSIZE-1), %g0
- bnz,pn %ncc, 1f
- nop
-
- ! ... and must be 256 bytes or more
- cmp %i1, 256
- blu,pn %ncc, 1f
- nop
-
- ! ... and length must be a multiple of VIS_BLOCKSIZE
- andcc %i1, (VIS_BLOCKSIZE-1), %g0
- bz,pn %ncc, 2f
- nop
-
-1: ! punt, call bzero but notify the caller that bzero was used
- mov %i0, %o0
- call bzero
- mov %i1, %o1
- ! call rock_sync_icache
- mov %i0, %o0
- call rock_sync_icache
- mov %i0, %o0
- ret
- restore %g0, 0, %o0 ! did not use block operations
-
-2: mov %g0, %l3 ! clear flag to say fp regs not saved
- rd %fprs, %l0 ! check for unused fp
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %l0
- bz,a,pt %icc, 1f
- wr %g0, FPRS_FEF, %fprs
-
- ! save the FP registers even if DU is not set.
-
- membar #Sync
- add %fp, STACK_BIAS - 65, %l1
- and %l1, -VIS_BLOCKSIZE, %l1
- stda %d32, [%l1]ASI_BLK_P
- ! Set a flag saying fp regs are saved.
- mov 1, %l3
-
- ! Need to wait only here for the above save to be completed
- membar #StoreStore|#StoreLoad|#LoadStore
-
-1: wr %g0, ASI_BLK_P, %asi
-
- ! Clear block
- movxtod %g0, %d32
- movxtod %g0, %d34
- fsrc1 %d32, %d36
- fsrc1 %d32, %d38
- fsrc1 %d32, %d40
- fsrc1 %d32, %d42
- fsrc1 %d32, %d44
- fsrc1 %d32, %d46
-
- mov 256, %i3
- ba,pt %ncc, .pz_doblock
- nop
-
-.pz_blkstart:
- ! stda %d32, [%i0 + 192]%asi ! in dly slot of branch that got us here
-#ifdef ROCK_CR_6654578
- prefetcha [%i0 + VIS_COPY_THRESHOLD + 128]%asi, #n_writes
-#endif
- stda %d32, [%i0 + 128]%asi
-#ifdef ROCK_CR_6654578
- prefetcha [%i0 + VIS_COPY_THRESHOLD + 64]%asi, #n_writes
-#endif
- stda %d32, [%i0 + 64]%asi
-#ifdef ROCK_CR_6654578
- prefetcha [%i0 + VIS_COPY_THRESHOLD + 0]%asi, #n_writes
-#endif
- stda %d32, [%i0]%asi
-.pz_zinst:
- add %i0, %i3, %i0
- sub %i1, %i3, %i1
-.pz_doblock:
-#ifdef ROCK_CR_6654578
- prefetcha [%i0 + VIS_COPY_THRESHOLD + 192]%asi, #n_writes
-#endif
- cmp %i1, 256
- bgeu,a %ncc, .pz_blkstart
- stda %d32, [%i0 + 192]%asi
-
- cmp %i1, 64
- blu %ncc, .pz_finish
-
- andn %i1, (64-1), %i3
- srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words
- set .pz_zinst, %i4
- sub %i4, %i2, %i4
- jmp %i4
- nop
-
-.pz_finish:
- brz,a %l3, .pz_finished
- wr %l0, 0, %fprs ! restore fprs
-
- ! restore fpregs from stack
- ldda [%l1]ASI_BLK_P, %d32
- wr %l0, 0, %fprs ! restore fprs
-
-.pz_finished:
- membar #Sync
- ret
- restore %g0, 0, %o0 ! return (bzero or not)
-
- SET_SIZE(hwblkclr)
-#endif /* lint */
-
-#ifdef lint
-/*ARGSUSED*/
-void
-hw_pa_bcopy32(uint64_t src, uint64_t dst)
-{}
-#else /*!lint */
- /*
- * Copy 32 bytes of data from src (%o0) to dst (%o1)
- * using physical addresses.
- */
- ENTRY_NP(hw_pa_bcopy32)
- rdpr %pstate, %g1
- andn %g1, PSTATE_IE, %g2
- wrpr %g0, %g2, %pstate
-
- rdpr %pstate, %g0
- ldxa [%o0]ASI_MEM, %o2
- add %o0, 8, %o0
- ldxa [%o0]ASI_MEM, %o3
- add %o0, 8, %o0
- ldxa [%o0]ASI_MEM, %o4
- add %o0, 8, %o0
- ldxa [%o0]ASI_MEM, %o5
- stxa %o2, [%o1]ASI_MEM
- add %o1, 8, %o1
- stxa %o3, [%o1]ASI_MEM
- add %o1, 8, %o1
- stxa %o4, [%o1]ASI_MEM
- add %o1, 8, %o1
- stxa %o5, [%o1]ASI_MEM
-
- retl
- wrpr %g0, %g1, %pstate
-
- SET_SIZE(hw_pa_bcopy32)
-
-#endif /* lint */
-
-
-/*
- * Zero a block of storage.
- *
- * uzero is used by the kernel to zero a block in user address space.
- */
-
-
-#if defined(lint)
-
-/* ARGSUSED */
-int
-kzero(void *addr, size_t count)
-{ return(0); }
-
-/* ARGSUSED */
-void
-uzero(void *addr, size_t count)
-{}
-
-#else /* lint */
-
- ENTRY(uzero)
- !
- ! Set a new lo_fault handler only if we came in with one
- ! already specified.
- !
- wr %g0, ASI_USER, %asi
- ldn [THREAD_REG + T_LOFAULT], %o5
- tst %o5
- bz,pt %ncc, .do_zero
- sethi %hi(.zeroerr), %o2
- or %o2, %lo(.zeroerr), %o2
- membar #Sync
- ba,pt %ncc, .do_zero
- stn %o2, [THREAD_REG + T_LOFAULT]
-
- ENTRY(kzero)
- !
- ! Always set a lo_fault handler
- !
- wr %g0, ASI_P, %asi
- ldn [THREAD_REG + T_LOFAULT], %o5
- sethi %hi(.zeroerr), %o2
- or %o5, LOFAULT_SET, %o5
- or %o2, %lo(.zeroerr), %o2
- membar #Sync
- ba,pt %ncc, .do_zero
- stn %o2, [THREAD_REG + T_LOFAULT]
-
-/*
- * We got here because of a fault during kzero or if
- * uzero or bzero was called with t_lofault non-zero.
- * Otherwise we've already run screaming from the room.
- * Errno value is in %g1. Note that we're here iff
- * we did set t_lofault.
- */
-.zeroerr:
- !
- ! Undo asi register setting. Just set it to be the
- ! kernel default without checking.
- !
- wr %g0, ASI_P, %asi
-
- !
- ! We did set t_lofault. It may well have been zero coming in.
- !
-1:
- tst %o5
- membar #Sync
- bne,pn %ncc, 3f
- andncc %o5, LOFAULT_SET, %o5
-2:
- !
- ! Old handler was zero. Just return the error.
- !
- retl ! return
- mov %g1, %o0 ! error code from %g1
-3:
- !
- ! We're here because %o5 was non-zero. It was non-zero
- ! because either LOFAULT_SET was present, a previous fault
- ! handler was present or both. In all cases we need to reset
- ! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
- ! before we either simply return the error or we invoke the
- ! previously specified handler.
- !
- be %ncc, 2b
- stn %o5, [THREAD_REG + T_LOFAULT]
- jmp %o5 ! goto real handler
- nop
- SET_SIZE(kzero)
- SET_SIZE(uzero)
-
-#endif /* lint */
-
-/*
- * Zero a block of storage.
- */
-
-#if defined(lint)
-
-/* ARGSUSED */
-void
-bzero(void *addr, size_t count)
-{}
-
-#else /* lint */
-
- ENTRY(bzero)
-
- wr %g0, ASI_P, %asi
- ldn [THREAD_REG + T_LOFAULT], %o5 ! save old vector
- tst %o5
- bz,pt %ncc, .do_zero
- sethi %hi(.zeroerr), %o2
- or %o2, %lo(.zeroerr), %o2
- membar #Sync ! sync error barrier
- stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector
-
-.do_zero:
- /*
- * If 0 bytes to xfer return
- */
- brnz %o1, continue_bzero
- nop
- ba .bzero_exit
- nop
-continue_bzero:
- prefetch [%o0],2
- cmp %o1, 8
- bge,pt %ncc, xfer_8_or_more
- nop
-
-.byteclr:
- deccc %o1 ! byte clearing loop
- stba %g0, [%o0]%asi
- bgu,pt %ncc, .byteclr
- inc %o0
- ba .bzero_exit
- nop
-
-xfer_8_or_more:
- andcc %o0, 7, %o3 ! is add aligned on a 8 byte bound
- brz,pt %o3, blkchk
- sub %o3, 8, %o3 ! -(bytes till double aligned)
- add %o1, %o3, %o1 ! update o1 with new count
-1:
- stba %g0, [%o0]%asi
- inccc %o3
- bl,pt %ncc, 1b
- inc %o0
-
- ! Now addr is double word aligned
-blkchk:
- cmp %o1, 767 ! if large count use Block ld/st
- bg,pt %ncc,blkwr
- nop
- and %o1, 24, %o3 ! o3 is {0, 8, 16, 24}
- brz %o3, skip_dw_loop
- nop
-1: subcc %o3, 8, %o3 ! double-word loop
- stxa %g0, [%o0]%asi
- bgu,pt %ncc, 1b
- add %o0, 8, %o0
-skip_dw_loop:
- andncc %o1, 31, %o4 ! o4 has 32 byte aligned count
- brz,pn %o4, 3f
- nop
- ba loop_32byte
- nop
-
- .align ICACHE_LINE_SIZE
-
-loop_32byte:
- subcc %o4, 32, %o4 ! main loop, 32 bytes per iteration
- stxa %g0, [%o0]%asi
- stxa %g0, [%o0 + 8]%asi
- stxa %g0, [%o0 + 16]%asi
- stxa %g0, [%o0 + 24]%asi
- bne,pt %ncc, loop_32byte
- add %o0, 32, %o0
-3:
- and %o1, 7, %o1 ! o1 has the remaining bytes (<8)
- brnz %o1, .byteclr
- nop
- ba .bzero_exit
- nop
-blkwr:
- sub %o0,1,%o3
- andn %o3,0x7f,%o4
- add %o4,128,%o4
- prefetch [%o4],2 !prefetch next 128b
- prefetch [%o4+64],2
- prefetch [%o4+(2*64)],2
- prefetch [%o4+(3*64)],2
-
- andcc %o0,0x7f,%o3 !o3=0 , means it is already 128 align
- brz,pn %o3,aligned_on_128_bzero
- sub %o3,128,%o3
-
- add %o1,%o3,%o1
-align_to_128_bzero:
- stxa %g0,[%o0]%asi
- addcc %o3,8,%o3
- bl,pt %ncc,align_to_128_bzero
- add %o0,8,%o0
-
-
-
-aligned_on_128_bzero:
- ! if the addr is 512 byte aligned and bytes to zero
- ! are greater than or equal to 4096 do a stingray_optimized_bzero
- andcc %o0,0x1ff,%o3 ! Is addr 512 byte aligned ?
- brnz,pn %o3, 4f
- mov %o1,%g5
- set 4096, %g4
- subcc %o1, %g4, %g0
- bge,pn %ncc, stingray_optimized_bzero
- nop
-4:
- ! addr(dest. buffer) is not aligned to 512 byte
- ! if the number of bytes to zero are less than 4096 after
- ! aligning the addr to 512 byte then do interleave128_bzero.
-
- sub %o0,8,%o4
- andn %o4,0x1ff,%o3
- add %o3,0x200,%o3 !o3 = addr aligned to 512 byte.
- sub %o3,%o0,%o3 !o3 = number of bytes to zero to align addr to 512
- sub %o1,%o3,%g5 !g5 = bytes to zero from 512 byte aligned addr
- set 4096, %g4
- subcc %g5, %g4, %g0
- bge,pn %ncc,6f
- nop
- ! clear %g5 to indicate that there is no need to do
- ! stingray_optimized_bzero
- mov %g0, %g5
- add %o0, %o1, %o4
- ba interleave128_bzero
- nop
-6:
- ! %g5 contains the number of bytes to zero after 512 byte alignment
- ! We zero the bytes in dest. buffer until it is 512 byte aligned
- ! and call stingray_optimized_bzero
- ! if the nuber of bytes to zero(until 512 alignment) is less than 256
- ! we call bzero_word, else we call interleave128_bzero
- mov %o3, %o1
- subcc %o3,256,%g0
- bl,pn %ncc,bzero_word
- add %o0,%o1,%o4 !cal the last byte to write %o4
- ba interleave128_bzero
- nop
-
- .align 64
-interleave128_bzero:
- ! %o0 has the dest. buffer addr
- ! %o1 has the number of bytes to zero
- ! %o4 has the addr of the dest. buffer at or beyond which no write
- ! is to be done.
- ! %g5 has the number of bytes to zero using stingray_optimized_bzero
-
- add %o0, 256, %o3
- prefetch [%o3], 2 !1st 64 byte line of next 256 byte block
- add %o0, 384, %o3
- prefetch [%o3], 2 !3rd 64 byte line of next 256 byte block
- add %o0, 320, %o3
- prefetch [%o3], 2 !2nd 64 byte line of next 256 byte block
- add %o0, 448, %o3
- prefetch [%o3], 2 !4th 64 byte line of next 256 byte block
- mov %o0, %o3
- stxa %g0,[%o3]%asi !1st 64 byte line
- add %o0,128,%o3
- stxa %g0,[%o3]%asi !3rd 64 byte line
- add %o0,8,%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(2 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128 ,%o3
- stxa %g0,[%o3]%asi
- add %o0,(3 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(4 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(5 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(6 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(7 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(8 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(9 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(10 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(11 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(12 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(13 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(14 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(15 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- ! check if the next 256 byte copy will not exceed the number of
- ! bytes remaining to be copied.
- ! %g4 points to the dest buffer after copying 256 bytes more.
- ! %o4 points to dest. buffer at or beyond which no writes should be done.
- add %o0,512,%g4
- subcc %o4,%g4,%g0
- bge,pt %ncc,interleave128_bzero
- add %o0,256,%o0
-
-bzero_word:
- and %o1,255,%o3
- and %o3,7,%o1
-
- ! Set the remaining doubles
- subcc %o3, 8, %o3 ! Can we store any doubles?
- bl,pn %ncc, 6f
- and %o1, 7, %o1 ! calc bytes left after doubles
-
-5:
- stxa %g0, [%o0]%asi
- subcc %o3, 8, %o3
- bge,pt %ncc, 5b
- add %o0, 8, %o0
-6:
- ! Set the remaining bytes
- brz %o1, can_we_do_stingray_optimized_bzero
-
-7:
- deccc %o1 ! byte clearing loop
- stba %g0, [%o0]%asi
- bgu,pt %ncc, 7b
- inc %o0
-can_we_do_stingray_optimized_bzero:
- mov %g5, %o1
- brnz,pn %o1, stingray_optimized_bzero
- nop
-
- ba .bzero_exit
- nop
-
-stingray_optimized_bzero:
- save %sp, -SA(MINFRAME), %sp
- mov %i0, %o0
- mov %i1, %o1
- mov %i2, %o2
- mov %i3, %o3
- mov %i5, %o5
-init:
- set 4096,%o2
-
- prefetch [%o0+0],2
- prefetch [%o0+(64*1)],2
- prefetch [%o0+(64*2)],2
- prefetch [%o0+(64*3)],2
- prefetch [%o0+(64*4)],2
- prefetch [%o0+(64*5)],2
- prefetch [%o0+(64*6)],2
- prefetch [%o0+(64*7)],2
- prefetch [%o0+(64*8)],2
- prefetch [%o0+(64*9)],2
- prefetch [%o0+(64*10)],2
- prefetch [%o0+(64*11)],2
- prefetch [%o0+(64*12)],2
- prefetch [%o0+(64*13)],2
- prefetch [%o0+(64*14)],2
- prefetch [%o0+(64*15)],2
- ba stingray_optimized_4k_zero_loop
- add %o0,%g5,%g5
- ! Local register usage:
- ! prefetching into L1 cache.
- ! %l3 dest. buffer at start of inner loop.
- ! %l5 iteration counter to make buddy loop execute 2 times.
- ! %l6 iteration counter to make inner loop execute 4 times.
- ! %l7 address at far ahead of current dest. buffer for prefetching
- ! into L2 cache.
-
- .align 64
-stingray_optimized_4k_zero_loop:
- set 2,%l5
- add %o0, 0, %l3
-bzero_buddyloop:
- set PF_FAR, %g4
- add %o0, %g4, %l7
-
- ! Prefetch ahead by 2 pages to get TLB entry in advance.
- set 2*PF_FAR, %g4
- add %o0, %g4, %g4
- prefetch [%g4+%g0],2
-
- set 4,%l6
- set 0, %g4
-
- ! Each iteration of the inner loop below writes 8 sequential lines.
- ! This loop is iterated 4 times, to move a total of 32 lines, all of
- ! which have the same value of PA[9], so we increment the base
- ! address by 1024 bytes in each iteration, which varies PA[10].
-bzero_innerloop:
- add %o0, PF_FAR, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
- add %o3, 64, %o3
- prefetch [%o3],2
-
- mov %o0, %o3
- stxa %g0,[%o3]%asi !1st 64 byte line
- add %o0,128,%o3
- stxa %g0,[%o3]%asi !3rd 64 byte line
- add %o0,8,%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(2 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128 ,%o3
- stxa %g0,[%o3]%asi
- add %o0,(3 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(4 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(5 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(6 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(7 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(8 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(9 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(10 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(11 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(12 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(13 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(14 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(15 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
-
- add %o0,256,%o0
-
- mov %o0, %o3
- stxa %g0,[%o3]%asi !1st 64 byte line
- add %o0,128,%o3
- stxa %g0,[%o3]%asi !3rd 64 byte line
- add %o0,8,%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(2 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128 ,%o3
- stxa %g0,[%o3]%asi
- add %o0,(3 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(4 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(5 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(6 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(7 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(8 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(9 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(10 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(11 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(12 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(13 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(14 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
- add %o0,(15 * 8),%o3
- stxa %g0,[%o3]%asi
- add %o3,128,%o3
- stxa %g0,[%o3]%asi
-
- subcc %l6,1,%l6 ! Decrement the inner loop counter.
-
- ! Now increment by 256 + 512 so we don't toggle PA[9]
- add %o0, 768, %o0
-
- bg,pt %ncc,bzero_innerloop
- nop
- ! END OF INNER LOOP
-
- subcc %l5,1,%l5
- add %l3, 512, %o0 ! increment %o0 to first buddy line of dest.
- bg,pt %ncc, bzero_buddyloop
- nop
- add %o0, 3584, %o0 ! Advance both base addresses by 4k
- add %o0,%o2,%i5
- subcc %g5,%i5,%g0
- bge,pt %ncc,stingray_optimized_4k_zero_loop
- nop
-
- ! stingray_optimized_bzero_ends_here
-
- mov %o0, %i0
- mov %o1, %i1
- mov %o2, %i2
- mov %o3, %i3
- mov %o5, %i5
- restore
- sub %g5,%o0,%o1 !how many byte left
- brz,pn %o1,.bzero_exit
- mov %g0,%g5
- add %o0,%o1,%o4 !cal the last byte to write %o4
- subcc %o1,256,%g0
- bge,pt %ncc,interleave128_bzero
- mov %g0,%g5
-
- ba bzero_word
- nop
-
-.bzero_exit:
- !
- ! We're just concerned with whether t_lofault was set
- ! when we came in. We end up here from either kzero()
- ! or bzero(). kzero() *always* sets a lofault handler.
- ! It ors LOFAULT_SET into %o5 to indicate it has done
- ! this even if the value of %o5 is otherwise zero.
- ! bzero() sets a lofault handler *only* if one was
- ! previously set. Accordingly we need to examine
- ! %o5 and if it is non-zero be sure to clear LOFAULT_SET
- ! before resetting the error handler.
- !
- tst %o5
- bz %ncc, 1f
- andn %o5, LOFAULT_SET, %o5
- membar #Sync ! sync error barrier
- stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault
-1:
- retl
- clr %o0 ! return (0)
-
- SET_SIZE(bzero)
-#endif /* lint */
-
-#ifdef ROCK_CR_6654578
-/* This code tries to maximize bandwidth by being clever about accessing
- * the two cache lines that are BUDDY PAIRS in the L3 cache. When line 0
- * of a pair is accessed, it will take hundreds of cycles to get the line
- * from memory, which brings in a 128-byte line to L3. Until the line is
- * installed in L3, any other access to that line (such as buddy line 1)
- * is blocked. For best throughput, we access many lines that are the first
- * of their buddy pairs, and only after many such accesses have been made,
- * we access the sequence of second buddy pair lines. Hopefully the second
- * set of accesses comes after the L3 lines are installed, so the accesses
- * hitin L3 without being delayed. This should yield better throughput.
- * To keep this code simple, we assume the addresses given are aligned at
- * least on a 128 byte boundary, and the length is assumed to be a multiple
- * of 8k bytes.
- */
-
-#ifdef lint
-/*ARGSUSED*/
-int
-page_hwblkclr(void *addr, size_t len)
-{
- return(0);
-}
-#else /* lint */
- ENTRY(page_hwblkclr)
- save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
-
- ! %i0 address
- ! %i1 len
-
- rd %fprs, %l0
- mov %g0, %l2 ! clear flag to say fp regs not saved
-
- ! FPU enabled ? If not, enable it.
- btst FPRS_FEF, %l0
- bz,a,pt %icc, 1f
- wr %g0, FPRS_FEF, %fprs
-
- ! save in-use fpregs on stack
-
- add %fp, STACK_BIAS - 65, %l1 ! get stack frame for fp regs
- and %l1, -VIS_BLOCKSIZE, %l1 ! block align frame
- stda %d32, [%l1]ASI_BLK_P ! %l1 = addr of saved fp regs
-
- ! Set a flag saying fp regs are saved.
- mov 1, %l2
-
- ! enable fp
-
-1: membar #StoreStore|#StoreLoad|#LoadStore
-
- movxtod %g0, %d32
- movxtod %g0, %d34
- movxtod %g0, %d36
- movxtod %g0, %d38
- movxtod %g0, %d40
- movxtod %g0, %d42
- movxtod %g0, %d44
- movxtod %g0, %d46
-
- ba myloop2
- srl %i1,12,%i1
-.align 64
-myloop2:
- mov 2,%l5
- mov %i0, %l3
-buddyloop:
- set 4096, %l4
- add %i0, %l4, %l4
- prefetcha [%l4]ASI_BLK_P, #n_writes
- mov 32,%l6
-innerloop:
-
- subcc %l6,1,%l6
- stda %d32,[%i0]ASI_BLK_P
- bg,pt %icc,innerloop
- add %i0, 128, %i0
-
- subcc %l5,1,%l5
- add %l3, 64, %i0
- bg,pt %icc,buddyloop
- nop
- subcc %i1,1,%i1
- add %i0, 4032, %i0
- bg,pt %icc,myloop2
- nop
-
- brz,a %l2, 2f
- wr %l0, 0, %fprs ! restore fprs
-
- ! restore fpregs from stack
- ldda [%l1]ASI_BLK_P, %d32
-
- wr %l0, 0, %fprs ! restore fprs
-2:
- membar #Sync
-
- ret
- restore %g0, 0, %o0
-
- SET_SIZE(page_hwblkclr)
-#endif /* lint */
-#endif /* ROCK_CR_6654578 */
-
-#if defined(lint)
-
-int use_hw_bcopy = 1;
-int use_hw_bzero = 1;
-uint_t hw_copy_limit_1 = 0x100;
-uint_t hw_copy_limit_2 = 0x200;
-uint_t hw_copy_limit_4 = 0x400;
-uint_t hw_copy_limit_8 = 0x400;
-
-#else /* !lint */
-
- DGDEF(use_hw_bcopy)
- .word 1
- DGDEF(use_hw_bzero)
- .word 1
- DGDEF(hw_copy_limit_1)
- .word 0x100
- DGDEF(hw_copy_limit_2)
- .word 0x200
- DGDEF(hw_copy_limit_4)
- .word 0x400
- DGDEF(hw_copy_limit_8)
- .word 0x400
-
-
- .align 64
- .section ".text"
-#endif /* !lint */
diff --git a/usr/src/uts/sun4v/io/px/px_lib4v.c b/usr/src/uts/sun4v/io/px/px_lib4v.c
index 71baac6af0..a006991f1c 100644
--- a/usr/src/uts/sun4v/io/px/px_lib4v.c
+++ b/usr/src/uts/sun4v/io/px/px_lib4v.c
@@ -41,8 +41,6 @@
#include <sys/hotplug/pci/pcihp.h>
#include "px_lib4v.h"
#include "px_err.h"
-#include <vm/vm_dep.h>
-#include <vm/hat_sfmmu.h>
/* mask for the ranges property in calculating the real PFN range */
uint_t px_ranges_phi_mask = ((1 << 28) -1);
@@ -547,9 +545,6 @@ px_lib_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
else
sync_dir = HVIO_DMA_SYNC_DIR_TO_DEV;
- if (force_sync_icache_after_dma == 0 && !icache_is_coherent)
- sync_dir |= HVIO_DMA_SYNC_DIR_NO_ICACHE_FLUSH;
-
off += mp->dmai_offset;
pg_off = off & MMU_PAGEOFFSET;
@@ -560,27 +555,12 @@ px_lib_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
end = MMU_BTOPR(off + len - 1);
for (idx = MMU_BTOP(off); idx < end; idx++,
len -= bytes_synced, pg_off = 0) {
- size_t bytes_to_sync = MIN(len, MMU_PAGESIZE - pg_off);
-
- while (hvio_dma_sync(hdl,
- MMU_PTOB(PX_GET_MP_PFN(mp, idx)) + pg_off,
- bytes_to_sync, sync_dir, &bytes_synced) != H_EOK) {
-
- if (!(sync_dir & HVIO_DMA_SYNC_DIR_NO_ICACHE_FLUSH)) {
- bytes_synced = 0;
- break;
- }
+ size_t bytes_to_sync = bytes_to_sync =
+ MIN(len, MMU_PAGESIZE - pg_off);
- /*
- * Some versions of firmware do not support
- * this sync_dir flag. If the call fails clear
- * the flag and retry the call. Also, set the
- * global so that we dont set the sync_dir
- * flag again.
- */
- sync_dir &= ~HVIO_DMA_SYNC_DIR_NO_ICACHE_FLUSH;
- force_sync_icache_after_dma = 1;
- }
+ if (hvio_dma_sync(hdl, MMU_PTOB(PX_GET_MP_PFN(mp, idx)) +
+ pg_off, bytes_to_sync, sync_dir, &bytes_synced) != H_EOK)
+ break;
DBG(DBG_LIB_DMA, dip, "px_lib_dma_sync: Called hvio_dma_sync "
"ra = %p bytes to sync = %x bytes synced %x\n",
diff --git a/usr/src/uts/sun4v/io/px/px_lib4v.h b/usr/src/uts/sun4v/io/px/px_lib4v.h
index 1cf491e4e9..e9be90a7eb 100644
--- a/usr/src/uts/sun4v/io/px/px_lib4v.h
+++ b/usr/src/uts/sun4v/io/px/px_lib4v.h
@@ -97,8 +97,7 @@ extern "C" {
#define PX_VPCI_MINOR_VER_0 0x0ull
#define PX_VPCI_MINOR_VER_1 0x1ull
-#define PX_VPCI_MINOR_VER_2 0x2ull
-#define PX_VPCI_MINOR_VER PX_VPCI_MINOR_VER_2
+#define PX_VPCI_MINOR_VER PX_VPCI_MINOR_VER_1
extern uint64_t hvio_config_get(devhandle_t dev_hdl, pci_device_t bdf,
pci_config_offset_t off, pci_config_size_t size, pci_cfg_data_t *data_p);
diff --git a/usr/src/uts/sun4v/ml/hcall.s b/usr/src/uts/sun4v/ml/hcall.s
index 90a120111a..70635baf63 100644
--- a/usr/src/uts/sun4v/ml/hcall.s
+++ b/usr/src/uts/sun4v/ml/hcall.s
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -321,20 +321,10 @@ hv_api_set_version(uint64_t api_group, uint64_t major, uint64_t minor,
/*ARGSUSED*/
uint64_t
-hv_mem_iflush(uint64_t real_addr, uint64_t length, uint64_t *flushed_len)
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
-hv_mem_iflush_all()
-{ return (0); }
-
-/*ARGSUSED*/
-uint64_t
hv_tm_enable(uint64_t enable)
{ return (0); }
-/*ARGSUSED*/
+/*ARGSUSED*/
uint64_t
hv_mach_set_watchdog(uint64_t timeout, uint64_t *time_remaining)
{ return (0); }
@@ -742,34 +732,7 @@ hv_soft_state_get(uint64_t string, uint64_t *state)
SET_SIZE(hv_mem_sync)
/*
- * HV_MEM_IFLUSH
- * arg0 memory real address
- * arg1 flush length
- * ret0 status
- * ret1 flushed length
- *
- */
- ENTRY(hv_mem_iflush)
- mov %o2, %o4
- mov HV_MEM_IFLUSH, %o5
- ta FAST_TRAP
- retl
- stx %o1, [%o4]
- SET_SIZE(hv_mem_iflush)
-
- /*
- * HV_MEM_IFLUSH_ALL
- * ret0 status
- */
- ENTRY(hv_mem_iflush_all)
- mov HV_MEM_IFLUSH_ALL, %o5
- ta FAST_TRAP
- retl
- nop
- SET_SIZE(hv_mem_iflush_all)
-
- /*
- * uint64_t hv_rk_tm_enable(uint64_t enable)
+ * uint64_t hv_tm_enable(uint64_t enable)
*/
ENTRY(hv_tm_enable)
mov HV_TM_ENABLE, %o5
diff --git a/usr/src/uts/sun4v/ml/mach_interrupt.s b/usr/src/uts/sun4v/ml/mach_interrupt.s
index b9172ad508..e5978dbb7f 100644
--- a/usr/src/uts/sun4v/ml/mach_interrupt.s
+++ b/usr/src/uts/sun4v/ml/mach_interrupt.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -41,20 +41,7 @@
#include <sys/error.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
-
#define INTR_REPORT_SIZE 64
-#define ERRH_ASI_SHIFT 56 /* bits[63:56]; see errh_er_t */
-#define NRE_ASI 0x00000001 /* ASI observed in attr field */
-#define NRE_CTX 0x00000002 /* ASI equals ASI_MMU_CTX */
-#define CRP_OBSERVED (NRE_ASI | NRE_CTX)
-
-#define OR_MCPU_NRE_ERROR(reg1,reg2,val) \
- add reg1, CPU_MCPU, reg2; \
- add reg2, MCPU_NRE_ERROR, reg2; \
- ldxa [reg2]ASI_MEM, reg1; \
- or reg1, val, reg1; \
- stxa reg1, [reg2]ASI_MEM
-
#ifdef TRAPTRACE
#include <sys/traptrace.h>
@@ -533,10 +520,6 @@ nonresumable_error(void)
CPU_PADDR(%g1, %g4) ! %g1 = cpu struct paddr
- add %g1, CPU_MCPU, %g4
- add %g4, MCPU_NRE_ERROR, %g4 ! &CPU->cpu_m.cpu_nre_error
- stxa %g0, [%g4]ASI_MEM ! clear cpu_nre_error
-
2: set CPU_NRQ_BASE_OFF, %g4
ldxa [%g1 + %g4]ASI_MEM, %g4 ! %g4 = queue base PA
add %g6, %g4, %g4 ! %g4 = PA of ER in Q
@@ -548,7 +531,7 @@ nonresumable_error(void)
bne,pn %xcc, 1f ! first 8 byte is not 0
nop
- /* BEGIN: move 64 bytes from queue to buf */
+ /* Now we can move 64 bytes from queue to buf */
set 0, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 0 - 7
@@ -558,14 +541,7 @@ nonresumable_error(void)
add %g5, 8, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 16 - 23
- /* Check for sun4v ASI */
- and %g1, ERRH_ATTR_ASI, %g1 ! isolate ASI bit
- cmp %g1, ERRH_ATTR_ASI
- bne,pt %xcc, 3f
- nop
- CPU_PADDR(%g1, %g5)
- OR_MCPU_NRE_ERROR(%g1, %g5, NRE_ASI) ! cpu_nre_error |= NRE_ASI
-3: set 24, %g5
+ add %g5, 8, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 24 - 31
add %g5, 8, %g5
@@ -574,20 +550,12 @@ nonresumable_error(void)
add %g5, 8, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 40 - 47
- /* Check for ASI==ASI_MMU_CTX */
- srlx %g1, ERRH_ASI_SHIFT, %g1 ! isolate the ASI field
- cmp %g1, ASI_MMU_CTX ! ASI=0x21 for CRP
- bne,pt %xcc, 4f
- nop
- CPU_PADDR(%g1, %g5)
- OR_MCPU_NRE_ERROR(%g1, %g5, NRE_CTX) ! cpu_nre_error |= NRE_CTX
-4: set 48, %g5
+ add %g5, 8, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 48 - 55
add %g5, 8, %g5
ldxa [%g4 + %g5]ASI_MEM, %g1
stxa %g1, [%g7 + %g5]ASI_MEM ! byte 56 - 63
- /* END: move 64 bytes from queue to buf */
set CPU_NRQ_SIZE, %g5 ! %g5 = queue size
sub %g5, 1, %g5 ! %g5 = queu size mask
@@ -608,36 +576,6 @@ nonresumable_error(void)
membar #Sync
/*
- * For CRP, force a hat reload as if the context were stolen
- * by storing INVALID_CONTEXT in the secondary and nulling TSB.
- * Primary will be reset by usr_rtt for user-mode traps, or
- * has been reset in iae_crp or dae_crp for kernel-mode.
- */
- CPU_PADDR(%g1, %g5)
- add %g1, CPU_MCPU, %g5
- add %g5, MCPU_NRE_ERROR, %g5 ! &CPU->cpu_m.cpu_nre_error
- ldxa [%g5]ASI_MEM, %g4
- cmp %g4, CRP_OBSERVED ! confirm CRP
- bne,pt %xcc, 5f
- nop
- mov INVALID_CONTEXT, %g5 ! force hat reload of context
- mov MMU_SCONTEXT, %g7
- sethi %hi(FLUSH_ADDR), %g4
- stxa %g5, [%g7]ASI_MMU_CTX ! set secondary context reg
- flush %g4
- mov %o0, %g4
- mov %o1, %g5
- mov %o5, %g7
- mov %g0, %o0
- mov %g0, %o1
- mov MMU_TSB_CTXNON0, %o5
- ta FAST_TRAP ! null TSB
- nop
- mov %g4, %o0
- mov %g5, %o1
- mov %g7, %o5
-
- /*
* Call sys_trap. %g2 is TL(arg2), %g3 is head and tail
* offset(arg3).
* %g3 looks like following:
@@ -648,7 +586,7 @@ nonresumable_error(void)
*
* Run at PIL 14 unless we're already at PIL 15.
*/
-5: sllx %g3, 32, %g3 ! %g3.h = tail offset
+ sllx %g3, 32, %g3 ! %g3.h = tail offset
or %g3, %g2, %g3 ! %g3.l = head offset
rdpr %tl, %g2 ! %g2 = current tl
diff --git a/usr/src/uts/sun4v/ml/mach_offsets.in b/usr/src/uts/sun4v/ml/mach_offsets.in
index 892e81579c..f640168e94 100644
--- a/usr/src/uts/sun4v/ml/mach_offsets.in
+++ b/usr/src/uts/sun4v/ml/mach_offsets.in
@@ -98,7 +98,6 @@ machcpu
cpu_nrq_base_pa MCPU_NRQ_BASE
cpu_nrq_size MCPU_NRQ_SIZE
cpu_tstat_flags MCPU_TSTAT_FLAGS
- cpu_nre_error MCPU_NRE_ERROR
\#define CPU_MPCB_PA (CPU_MCPU + MCPU_MPCB_PA)
\#define CPU_KWBUF_FULL (CPU_MCPU + MCPU_KWBUF_FULL)
@@ -145,8 +144,6 @@ hat HAT_SIZE
sfmmu_cext
sfmmu_ctx_lock
sfmmu_ctxs
- sfmmu_pgsz_order
- sfmmu_pgsz_map
sf_scd SCD_SIZE
scd_sfmmup
@@ -184,7 +181,6 @@ tsbmiss TSBMISS_SIZE
scratch TSBMISS_SCRATCH
shmermap TSBMISS_SHMERMAP
scd_shmermap TSBMISS_SCDSHMERMAP
- pgsz_bitmap TSBMISS_PGSZ_BITMAP
\#define TSB_TAGACC (0 * TSBMISS_SCRATCH_INCR)
\#define TSBMISS_HMEBP (1 * TSBMISS_SCRATCH_INCR)
@@ -252,9 +248,6 @@ hv_tsb_block
hv_tsb_info_pa
hv_tsb_info_cnt
-hv_pgsz_order
- hv_pgsz_order_pa
-
cpu_node CPU_NODE_SIZE
nodeid
clock_freq
diff --git a/usr/src/uts/sun4v/ml/trap_table.s b/usr/src/uts/sun4v/ml/trap_table.s
index cbe2eccd8d..9def20fbc8 100644
--- a/usr/src/uts/sun4v/ml/trap_table.s
+++ b/usr/src/uts/sun4v/ml/trap_table.s
@@ -1396,10 +1396,6 @@ etrap_table:
* (0=kernel, 1=invalid, or 2=user) rather than context ID)
*/
ALTENTRY(exec_fault)
- set icache_is_coherent, %g6 /* check soft exec mode */
- ld [%g6], %g6
- brz,pn %g6, sfmmu_slow_immu_miss
- nop
TRACE_TSBHIT(TT_MMU_EXEC)
MMU_FAULT_STATUS_AREA(%g4)
ldx [%g4 + MMFSA_I_ADDR], %g2 /* g2 = address */
diff --git a/usr/src/uts/sun4v/os/error.c b/usr/src/uts/sun4v/os/error.c
index 05d014d0aa..8fc0aa6cf1 100644
--- a/usr/src/uts/sun4v/os/error.c
+++ b/usr/src/uts/sun4v/os/error.c
@@ -38,8 +38,6 @@
#include <sys/error.h>
#include <sys/fm/util.h>
#include <sys/ivintr.h>
-#include <sys/machasi.h>
-#include <sys/mmu.h>
#include <sys/archsystm.h>
#define MAX_CE_FLTS 10
@@ -212,7 +210,6 @@ process_nonresumable_error(struct regs *rp, uint64_t flags,
int expected = DDI_FM_ERR_UNEXPECTED;
uint64_t exec_mode;
uint8_t u_spill_fill;
- int u_kill = 1;
mcpup = &(CPU->cpu_m);
@@ -278,33 +275,8 @@ process_nonresumable_error(struct regs *rp, uint64_t flags,
break;
}
/*
- * Context Register Parity - for reload of secondary
- * context register, see nonresumable_error.
- */
- if ((errh_flt.errh_er.attr & ERRH_ATTR_ASI) &&
- (errh_flt.errh_er.asi == ASI_MMU_CTX)) {
-
- if (aflt->flt_tl) /* TL>0, so panic */
- break;
-
- /* Panic on unknown context registers */
- if (errh_flt.errh_er.addr < MMU_PCONTEXT0 ||
- errh_flt.errh_er.addr + errh_flt.errh_er.sz
- > MMU_SCONTEXT1 + sizeof (uint64_t)) {
- cmn_err(CE_WARN, "Parity error on "
- "unknown context register\n");
- aflt->flt_panic = 1;
- break;
- }
-
- u_kill = 0; /* do not terminate */
- break;
- }
- /*
- * All other PR_NRE fall through in order to
- * check for protection. The list can include
- * ERRH_ATTR_FRF, ERRH_ATTR_IRF, ERRH_ATTR_MEM,
- * and ERRH_ATTR_PIO.
+ * Fall through, precise fault also need to check
+ * to see if it was protected.
*/
/*FALLTHRU*/
@@ -344,7 +316,7 @@ process_nonresumable_error(struct regs *rp, uint64_t flags,
* for fatal errors.
*/
if (aflt->flt_class == BUS_FAULT) {
- aflt->flt_addr = errh_flt.errh_er.addr;
+ aflt->flt_addr = errh_flt.errh_er.ra;
errh_cpu_run_bus_error_handlers(aflt,
expected);
}
@@ -393,13 +365,13 @@ process_nonresumable_error(struct regs *rp, uint64_t flags,
errh_page_retire(&errh_flt, PR_UE);
/*
- * If we queued an error for a thread that should terminate
- * and it was in user mode or protected by t_lofault, set AST
- * flag so the queue will be drained before returning to user
- * mode. Note that user threads can be killed via pcb_flags.
+ * If we queued an error and the it was in user mode, or
+ * protected by t_lofault, or user_spill_fill is set, we
+ * set AST flag so the queue will be drained before
+ * returning to user mode.
*/
- if (u_kill && (!aflt->flt_priv ||
- aflt->flt_prot == AFLT_PROT_COPY || u_spill_fill)) {
+ if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY ||
+ u_spill_fill) {
int pcb_flag = 0;
if (aflt->flt_class == CPU_FAULT)
@@ -550,7 +522,7 @@ cpu_async_log_err(void *flt)
* If we are going to panic, scrub the page first
*/
if (errh_fltp->cmn_asyncflt.flt_panic)
- mem_scrub(errh_fltp->errh_er.addr,
+ mem_scrub(errh_fltp->errh_er.ra,
errh_fltp->errh_er.sz);
}
break;
@@ -606,7 +578,7 @@ cpu_ue_log_err(struct async_flt *aflt)
static void
errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
{
- uint64_t flt_real_addr_start = errh_fltp->errh_er.addr;
+ uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
uint64_t flt_real_addr_end = flt_real_addr_start +
errh_fltp->errh_er.sz - 1;
int64_t current_addr;
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index aeea794451..68654e69ec 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -41,7 +41,6 @@
#include <sys/cmp.h>
#include <sys/async.h>
#include <vm/page.h>
-#include <vm/vm_dep.h>
#include <vm/hat_sfmmu.h>
#include <sys/sysmacros.h>
#include <sys/mach_descrip.h>
@@ -66,7 +65,6 @@ static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t);
static uint64_t get_mmu_shcontexts(md_t *, mde_cookie_t);
static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
-static int check_mmu_pgsz_search(md_t *, mde_cookie_t);
static char *construct_isalist(md_t *, mde_cookie_t, char **);
static void init_md_broken(md_t *, mde_cookie_t *);
static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
@@ -356,65 +354,10 @@ found:
}
md_free_scan_dag(mdp, &node);
}
- md_free_scan_dag(mdp, &eunit);
- }
-}
-
-/*
- * Setup instruction cache coherency. The "memory-coherent" property
- * is optional. Default for Icache_coherency is 1 (I$ is coherent).
- * If we find an Icache with coherency == 0, then enable non-coherent
- * Icache support.
- */
-void
-setup_icache_coherency(md_t *mdp)
-{
- int ncache;
- mde_cookie_t *cachelist;
- int i;
-
- ncache = md_alloc_scan_dag(mdp, md_root_node(mdp), "cache",
- "fwd", &cachelist);
-
- /*
- * The "cache" node is optional in MD, therefore ncaches can be 0.
- */
- if (ncache < 1) {
- return;
- }
-
- for (i = 0; i < ncache; i++) {
- uint64_t cache_level;
- uint64_t memory_coherent;
- uint8_t *type;
- int typelen;
-
- if (md_get_prop_val(mdp, cachelist[i], "level",
- &cache_level))
- continue;
-
- if (cache_level != 1)
- continue;
-
- if (md_get_prop_data(mdp, cachelist[i], "type",
- &type, &typelen))
- continue;
-
- if (strcmp((char *)type, "instn") != 0)
- continue;
- if (md_get_prop_val(mdp, cachelist[i], "memory-coherent",
- &memory_coherent))
- continue;
-
- if (memory_coherent != 0)
- continue;
- mach_setup_icache(memory_coherent);
- break;
+ md_free_scan_dag(mdp, &eunit);
}
-
- md_free_scan_dag(mdp, &cachelist);
}
/*
@@ -461,11 +404,6 @@ cpu_setup_common(char **cpu_module_isa_set)
shctx_on = 1;
}
- /*
- * Get and check page search register properties.
- */
- pgsz_search_on = check_mmu_pgsz_search(mdp, cpulist[0]);
-
for (i = 0; i < nocpus; i++)
fill_cpu(mdp, cpulist[i]);
@@ -474,7 +412,6 @@ cpu_setup_common(char **cpu_module_isa_set)
setup_chip_mappings(mdp);
setup_exec_unit_mappings(mdp);
- setup_icache_coherency(mdp);
/*
* If MD is broken then append the passed ISA set,
@@ -1116,50 +1053,3 @@ init_md_broken(md_t *mdp, mde_cookie_t *cpulist)
md_free_scan_dag(mdp, &platlist);
}
-
-/*
- * This routine gets the MD properties associated with the TLB search order API
- * and compares these against the expected values for a processor which supports
- * this API. The return value is used to determine whether use the API.
- */
-static int
-check_mmu_pgsz_search(md_t *mdp, mde_cookie_t cpu_node_cookie)
-{
-
- uint64_t mmu_search_nshared_contexts;
- uint64_t mmu_max_search_order;
- uint64_t mmu_non_priv_search_unified;
- uint64_t mmu_search_page_size_list;
-
- if (md_get_prop_val(mdp, cpu_node_cookie,
- "mmu-search-#shared-contexts", &mmu_search_nshared_contexts))
- mmu_search_nshared_contexts = 0;
-
- if (mmu_search_nshared_contexts == 0 ||
- mmu_search_nshared_contexts != NSEARCH_SHCONTEXTS)
- return (0);
-
- if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-search-order",
- &mmu_max_search_order))
- mmu_max_search_order = 0;
-
- if (mmu_max_search_order == 0 || mmu_max_search_order !=
- MAX_PGSZ_SEARCH_ORDER)
- return (0);
-
- if (md_get_prop_val(mdp, cpu_node_cookie,
- "mmu-non-priv-search-unified", &mmu_non_priv_search_unified))
- mmu_non_priv_search_unified = -1;
-
- if (mmu_non_priv_search_unified != 1) {
- return (0);
- }
-
- if (md_get_prop_val(mdp, cpu_node_cookie,
- "mmu-search-page-size-list", &mmu_search_page_size_list)) {
- mmu_search_page_size_list = 0;
- return (0);
- }
-
- return (1);
-}
diff --git a/usr/src/uts/sun4v/os/mach_cpu_states.c b/usr/src/uts/sun4v/os/mach_cpu_states.c
index a6ffb2b256..737bb0e1ac 100644
--- a/usr/src/uts/sun4v/os/mach_cpu_states.c
+++ b/usr/src/uts/sun4v/os/mach_cpu_states.c
@@ -1075,20 +1075,7 @@ kdi_cpu_init(int dcache_size, int dcache_linesize, int icache_size,
void
kdi_flush_caches(void)
{
- /*
- * May not be implemented by all sun4v architectures.
- *
- * Cannot use hsvc_version to see if the group is already
- * negotiated or not because, this function is called by
- * KMDB when it is at the console prompt which is running
- * at highest PIL. hsvc_version grabs an adaptive mutex and
- * this is a no-no at this PIL level.
- */
- if (hsvc_kdi_mem_iflush_negotiated) {
- uint64_t status = hv_mem_iflush_all();
- if (status != H_EOK)
- cmn_err(CE_PANIC, "Flushing all I$ entries failed");
- }
+ /* Not required on sun4v architecture. */
}
/*ARGSUSED*/
@@ -1101,16 +1088,6 @@ kdi_get_stick(uint64_t *stickp)
void
cpu_kdi_init(kdi_t *kdi)
{
- /*
- * Any API negotiation this early in the boot will be unsuccessful.
- * Therefore firmware for Sun4v platforms that have incoherent I$ are
- * assumed to support pre-negotiated MEM_IFLUSH APIs. Successful
- * invokation the MEM_IFLUSH_ALL is a test for is availability.
- * Set a flag if successful indicating its availabitlity.
- */
- if (hv_mem_iflush_all() == 0)
- hsvc_kdi_mem_iflush_negotiated = B_TRUE;
-
kdi->kdi_flush_caches = kdi_flush_caches;
kdi->mkdi_cpu_init = kdi_cpu_init;
kdi->mkdi_cpu_ready_iter = kdi_cpu_ready_iter;
diff --git a/usr/src/uts/sun4v/pcbe/rock_pcbe.c b/usr/src/uts/sun4v/pcbe/rock_pcbe.c
deleted file mode 100644
index 6a752a5291..0000000000
--- a/usr/src/uts/sun4v/pcbe/rock_pcbe.c
+++ /dev/null
@@ -1,2316 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Rock Performance Counter Back End
- */
-
-#include <sys/cpuvar.h>
-#include <sys/systm.h>
-#include <sys/cmn_err.h>
-#include <sys/cpc_impl.h>
-#include <sys/cpc_pcbe.h>
-#include <sys/modctl.h>
-#include <sys/machsystm.h>
-#include <sys/sdt.h>
-#include <sys/hypervisor_api.h>
-#include <sys/rock_hypervisor_api.h>
-#include <sys/hsvc.h>
-
-#define NT_END 0xFF
-
-/* Counter Types */
-#define NUM_PCBE_COUNTERS 6
-#define RK_PERF_CYC 0x0100
-#define RK_PERF_INSTR 0x0200
-#define RK_PERF_L2 0x0400
-#define RK_PERF_MMU 0x0800
-#define RK_PERF_YANK 0x2000
-#define RK_PERF_SIBLK 0x4000
-#define RK_PERF_LVLK 0x8000
-#define RK_PERF_SPEC 0x1000 /* Reserved */
-
-#define NORMAL_COUNTER 0x1
-#define SYNTHETIC_COUNTER 0x2
-
-/* ASI_PERF_MMU_CNT_FILTER TXN bits */
-#define ASI_PERF_MMU_CNT_FILTER_UTLB_HITS 0x1
-#define ASI_PERF_MMU_CNT_FILTER_UTLB_MISS 0x2
-#define ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS 0x8
-#define ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS 0x10
-#define ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL 0x20
-#define ASI_PERF_MMU_CNT_FILTER_EA_REAL 0x40
-
-#define MMU_ALL_TXNS (ASI_PERF_MMU_CNT_FILTER_UTLB_HITS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_REAL)
-
-#define MMU_ITLB_MISS (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_HITS)
-
-#define MMU_DTLB_MISS (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_HITS)
-
-#define MMU_UTLB_MISS (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS)
-
-#define MMU_UTLB_HIT (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_HITS)
-
-#define MMU_ITLB_MISS_UTLB_HIT (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_HITS)
-
-#define MMU_ITLB_MISS_UTLB_MISS (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_INSTR_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS)
-
-#define MMU_DTLB_MISS_UTLB_HIT (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_HITS)
-
-#define MMU_DTLB_MISS_UTLB_MISS (ASI_PERF_MMU_CNT_FILTER_EA_REAL | \
- ASI_PERF_MMU_CNT_FILTER_EA_VIRTUAL | \
- ASI_PERF_MMU_CNT_FILTER_DATA_ACCESS | \
- ASI_PERF_MMU_CNT_FILTER_UTLB_MISS)
-
-/*
- * These values will be loaded to nametable.bits which is a 32 bit number.
- * Please see the description of bits in nametable struct. If the counters
- * are a part of different pic, then we can re-use GROUP and TYPE.
- */
-#define SYN_BIT ((uint32_t)1 << 31) /* Set bit 32 */
-#define GROUP_MASK 0xFFF000 /* Bits 12-23 */
-#define ID_TO_GROUP(GROUP_ID) ((GROUP_ID)<<12)
-#define GROUP(SYN_COUNTER) ((SYN_COUNTER) & GROUP_MASK)
-#define TYPE(SYN_COUNTER) ((SYN_COUNTER) & 0x000FFF) /* Bits 0-12 */
-
-/* Synthetic counter types */
-#define L2_GROUP_DS ID_TO_GROUP(0)
-#define DS_DRAM 0x0 /* From PRM */
-#define DS_L3 0x1 /* ditto */
-#define DS_OTHER_L2 0x2 /* ditto */
-#define DS_LOCAL_L2 0x3 /* ditto */
-
-#define L2_DS_DRAM (SYN_BIT | L2_GROUP_DS | DS_DRAM)
-#define L2_DS_L3 (SYN_BIT | L2_GROUP_DS | DS_L3)
-#define L2_DS_OTHER_L2 (SYN_BIT | L2_GROUP_DS | DS_OTHER_L2)
-#define L2_DS_LOCAL_L2 (SYN_BIT | L2_GROUP_DS | DS_LOCAL_L2)
-
-#define L2_GROUP_TXN_MISS ID_TO_GROUP(1)
-#define TXN_LD 0x3 /* From PRM */
-#define TXN_ST 0x18 /* ditto */
-#define L2_TXN_LD_MISS (SYN_BIT | L2_GROUP_TXN_MISS | TXN_LD)
-#define L2_TXN_ST_MISS (SYN_BIT | L2_GROUP_TXN_MISS | TXN_ST)
-
-#define L2_GROUP_TXN_HIT ID_TO_GROUP(2)
-#define L2_TXN_LD_HIT (SYN_BIT | L2_GROUP_TXN_HIT | TXN_LD)
-#define L2_TXN_ST_HIT (SYN_BIT | L2_GROUP_TXN_HIT | TXN_ST)
-
-#define L2_GROUP_EVT ID_TO_GROUP(3)
-#define EVT_L2_MISS 0x8 /* From PRM */
-#define EVT_L2_PEND_ST 0x2 /* ditto */
-#define EVT_L2_PRIOR_MISS 0x1 /* ditto */
-#define EVT_L2_NOEVENTS 0x0 /* ditto */
-#define L2_HIT 0
-#define L2_MISS 1
-
-#define L2_EVT_HIT (SYN_BIT | L2_GROUP_EVT | L2_HIT)
-#define L2_EVT_MISS (SYN_BIT | L2_GROUP_EVT | L2_MISS)
-
-/* Instruction types. Corresponds to ASI_PERF_IS_INFO.TYP */
-#define I_GROUP_TYPE ID_TO_GROUP(0)
-#define TYPE_HELPER (1<<0)
-#define TYPE_LD (1<<1)
-#define TYPE_ST (1<<2)
-#define TYPE_CTI (1<<3)
-#define TYPE_FP (1<<4)
-#define TYPE_INT_ALU (1<<5)
-#define TYPE_CMPLX_ALU (1<<6)
-
-#define INSTR_TYPE_LD (SYN_BIT | I_GROUP_TYPE | TYPE_LD)
-#define INSTR_TYPE_ST (SYN_BIT | I_GROUP_TYPE | TYPE_ST)
-#define INSTR_TYPE_CTI (SYN_BIT | I_GROUP_TYPE | TYPE_CTI)
-#define INSTR_TYPE_FP (SYN_BIT | I_GROUP_TYPE | TYPE_FP)
-
-/* Execution modes. Corresponds to ASI_PERF_IS_INFO.MODE */
-#define I_GROUP_MODE ID_TO_GROUP(1)
-#define MODE_NOR 0x0 /* From PRM */
-#define MODE_OOO 0x1 /* ditto */
-#define MODE_EXE 0x2 /* ditto */
-#define MODE_DLY 0x3 /* ditto */
-#define MODE_DEF 0x4 /* ditto */
-#define MODE_HWS 0x5 /* ditto */
-
-#define INSTR_MODE_NOR (SYN_BIT | I_GROUP_MODE | MODE_NOR)
-#define INSTR_MODE_OOO (SYN_BIT | I_GROUP_MODE | MODE_OOO)
-#define INSTR_MODE_EXE (SYN_BIT | I_GROUP_MODE | MODE_EXE)
-#define INSTR_MODE_DLY (SYN_BIT | I_GROUP_MODE | MODE_DLY)
-#define INSTR_MODE_DEF (SYN_BIT | I_GROUP_MODE | MODE_DEF)
-#define INSTR_MODE_HWS (SYN_BIT | I_GROUP_MODE | MODE_HWS)
-
-/* Instruction events. Corresponds to ASI_PERF_IS_INFO.EVT */
-#define I_GROUP_EVT ID_TO_GROUP(2)
-
-/* Bit numbers from PRM */
-#define EVT_DC_MISS (1<<0)
-#define EVT_PRIOR_MISS (1<<1)
-#define EVT_DTLB_MISS (1<<2)
-#define EVT_LDB_FULL (1<<3)
-#define EVT_STB_FULL (1<<4)
-#define EVT_FE_STALL (1<<5)
-#define EVT_FROM_DQ (1<<6)
-#define EVT_CORRECT_BP (1<<7)
-#define EVT_BYPASS_RAW (1<<8)
-#define EVT_NONBYPASS_RAW (1<<9)
-#define EVT_CTI_TAKEN (1<<10)
-#define EVT_FAILED_SPEC (1<<11)
-
-#define INSTR_EVT_DC_MISS (SYN_BIT | I_GROUP_EVT | EVT_DC_MISS)
-#define INSTR_EVT_PRIOR_MISS (SYN_BIT | I_GROUP_EVT | EVT_PRIOR_MISS)
-#define INSTR_EVT_DTLB_MISS (SYN_BIT | I_GROUP_EVT | EVT_DTLB_MISS)
-#define INSTR_EVT_LDB_FULL (SYN_BIT | I_GROUP_EVT | EVT_LDB_FULL)
-#define INSTR_EVT_STB_FULL (SYN_BIT | I_GROUP_EVT | EVT_STB_FULL)
-#define INSTR_EVT_FE_STALL (SYN_BIT | I_GROUP_EVT | EVT_FE_STALL)
-#define INSTR_EVT_FROM_DQ (SYN_BIT | I_GROUP_EVT | EVT_FROM_DQ)
-#define INSTR_EVT_CORRECT_BP (SYN_BIT | I_GROUP_EVT | EVT_CORRECT_BP)
-#define INSTR_EVT_BYPASS_RAW (SYN_BIT | I_GROUP_EVT | EVT_BYPASS_RAW)
-#define INSTR_EVT_NONBYPASS_RAW (SYN_BIT | I_GROUP_EVT | EVT_NONBYPASS_RAW)
-#define INSTR_EVT_CTI_TAKEN (SYN_BIT | I_GROUP_EVT | EVT_CTI_TAKEN)
-#define INSTR_EVT_FAILED_SPEC (SYN_BIT | I_GROUP_EVT | EVT_FAILED_SPEC)
-
-/*
- * Synthetic counters to count MCCDESR error events
- * All the events are mutually exclusive therefore can be counted
- * simultaneously. Hence each one is a different pic. Therefore
- * there is no need to have GROUP or TYPE for these counters.
- */
-#define MCCDESR_YANK (SYN_BIT)
-#define MCCDESR_SIBLK (SYN_BIT)
-#define MCCDESR_LVLK (SYN_BIT)
-
-/* Number of samples to be taken before Performance Event Trap is generated */
-/* Maximum frequencies that can be configured */
-#define INSTR_SAM_MAX_FREQ 0x3FF /* 10 bits */
-#define L2_SAM_MAX_FREQ 0xFFFF /* 16 bits */
-#define MMU_SAM_MAX_FREQ 0xFFFF /* 16 bits */
-
-/* Minimum frequencies that should be configured to prevent DOS */
-#define INSTR_SAM_MIN_FREQ 100
-#define L2_SAM_MIN_FREQ 250
-#define MMU_SAM_MIN_FREQ 250
-
-/* Default frequencies that are configured */
-#define INSTR_SAM_DEF_FREQ 250
-#define L2_SAM_DEF_FREQ 1000
-
-/* Number of bits in the hardware for the counter */
-#define CYC_COUNTER_BITS 18
-#define INSTR_COUNTER_BITS 18
-#define L2_COUNTER_BITS 48
-#define MMU_COUNTER_BITS 48
-#define YANK_COUNTER_BITS 64
-#define SIBLK_COUNTER_BITS 64
-#define LVLK_COUNTER_BITS 64
-
-#define RK_PERF_COUNT_TOE_SHIFT (63)
-
-#define STATE_CONFIGURED 0x1
-#define STATE_PROGRAMMED 0x2
-#define STATE_STOPPED 0x4
-#define STATE_RELEASED 0x8
-#define UNINITIALIZED 2 /* should be other than 0/1 */
-#define TLZ 1 /* Do not make it zero */
-#define TLNZ 2
-
-#define CPU_REF_URL " Documentation for Sun processors can be found at: " \
- "http://www.sun.com/processors/manuals"
-
-#define MIN_RINGBUF_ENTRIES 100
-
-#define RINGBUF_GET_HEAD(RB) \
- (uint64_t *)((uint64_t)(&RB->va_values) + RB->head);
-
-#define RINGBUF_GET_TAIL(RB) \
- (uint64_t *)((uint64_t)(&RB->va_values) + RB->tail);
-
-#define RINGBUF_SET_HEAD(RB, PTR) \
- RB->head = (uint64_t)PTR - (uint64_t)(&RB->va_values); \
- RB->hwm = RB->head + (RB->size >> 1); \
- if (RB->hwm >= RB->size) \
- RB->hwm -= RB->size;
-
-#define RINGBUF_MOVE_HEAD(RB, PTR, SAMPLE_SZ) \
- PTR = (uint64_t *)((uint64_t)PTR + SAMPLE_SZ); \
- if (PTR >= (uint64_t *)((uint64_t)(&RB->va_values) + RB->size)) \
- PTR = (uint64_t *)&RB->va_values;
-
-#define MAKE_MASK(NBITS, SHIFT) (((unsigned long)(1<<(NBITS))-1)<<SHIFT)
-
-#define COUNTER_MAX(_p) ((int64_t)((1ULL << (_p->counter_bits - 1)) - 1))
-#define COUNTER_MIN(_p) ((int64_t)-(COUNTER_MAX(_p)))
-#define COUNTER_MASK(_p) (bitmask(_p->counter_bits))
-
-/* Global Structures and typedefs */
-struct _rk_pcbe_ringbuf { /* INIT-ER WRITTER READER */
- uint32_t head; /* offset guest guest guest */
- uint32_t tail; /* offset guest hv both */
- uint32_t size; /* bytes guest n/a both */
- uint32_t hwm; /* bytes guest hv guest */
- uint64_t va_values; /* guest hv guest */
-};
-
-typedef struct _rk_pcbe_ringbuf rk_pcbe_ringbuf_t;
-
-typedef struct _sampler {
- rk_pcbe_ringbuf_t *ring_buffer; /* Ring buffer start address */
- uint64_t synthetic_pic;
- uint32_t frequency; /* Sampling Frequency */
- uint32_t syn_counter; /* Synthetic Counter Type */
- uint32_t sample_size; /* Size of each sample in bytes */
- uint32_t flags; /* instr sampler: priv */
- uint8_t tl; /* Trap Level Filtering */
- uint8_t nohws; /* Filter out HW Scouting samples */
-} sampler_t;
-
-typedef struct _rk_pcbe_config {
- uint8_t pcbe_picno; /* 0-6:instr,l2,mmu,yank,siblk,lvlk */
- uint8_t counter_bits; /* Number of counter bits */
- uint8_t counter_type; /* Normal or Synthetic */
- uint8_t toe; /* Trap on Enable */
- uint32_t counter; /* Counter name */
- uint32_t src_type; /* Strand, Strands, SIU, MMU */
- uint32_t flags; /* instr counter:priv. l2,mmu:Xn */
- uint64_t pcbe_pic; /* PIC counter value */
- uint8_t inuse; /* pic in use or not */
- uint8_t state; /* Current state of the pic */
- processorid_t cpu; /* CPU associated to this pic */
- sampler_t sampler;
-#ifdef RKPCBE_DBG
- char name[64]; /* Human readable counter name */
-#endif
-} rk_pcbe_config_t;
-
-/* Function Prototypes for those that are invoked using rk_pcbe_ops */
-static int rk_pcbe_init(void);
-static int rk_pcbe_fini(void);
-static uint_t rk_pcbe_ncounters(void);
-static const char *rk_pcbe_impl_name(void);
-static const char *rk_pcbe_cpuref(void);
-static char *rk_pcbe_list_events(uint_t picnum);
-static char *rk_pcbe_list_attrs(void);
-static uint64_t rk_pcbe_event_coverage(char *event);
-static uint64_t rk_pcbe_overflow_bitmap(void);
-static int rk_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
- uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
- void *token);
-static void rk_pcbe_program(void *token);
-static void rk_pcbe_allstop(void);
-static void rk_pcbe_sample(void *token);
-static void rk_pcbe_free(void *config);
-
-pcbe_ops_t rk_pcbe_ops = {
- PCBE_VER_1,
- CPC_CAP_OVERFLOW_INTERRUPT,
- rk_pcbe_ncounters,
- rk_pcbe_impl_name,
- rk_pcbe_cpuref,
- rk_pcbe_list_events,
- rk_pcbe_list_attrs,
- rk_pcbe_event_coverage,
- rk_pcbe_overflow_bitmap,
- rk_pcbe_configure,
- rk_pcbe_program,
- rk_pcbe_allstop,
- rk_pcbe_sample,
- rk_pcbe_free
-};
-
-/*
- * bits:
- *
- * | 31 |30 24|23 12|11 0
- * | Syn/Normal | Rsvd | Group | Type |
- */
-struct nametable {
- const uint32_t bits;
- const char *name;
-};
-
-/* Instruction Counter. picno: 0 */
-static const struct nametable Rock_names0[] = {
- {0x1, "Instr_All"},
- /* Synthetic counters */
- {INSTR_MODE_NOR, "Instr_Normal"},
- {INSTR_MODE_OOO, "Instr_Out_Of_Order"},
- {INSTR_MODE_EXE, "Instr_Execute_Ahead"},
- {INSTR_MODE_DLY, "Instr_Delay"},
- {INSTR_MODE_DEF, "Instr_Deferred"},
- {INSTR_MODE_HWS, "Instr_Scout"},
-
- {INSTR_TYPE_LD, "Instr_Load"},
- {INSTR_TYPE_ST, "Instr_Store"},
- {INSTR_TYPE_CTI, "Instr_Branch"},
- {INSTR_TYPE_FP, "Instr_Float"},
-
- {INSTR_EVT_DC_MISS, "Instr_Dcache_Miss"},
- {INSTR_EVT_PRIOR_MISS, "Instr_Prior_Miss"},
- {INSTR_EVT_DTLB_MISS, "Instr_Dtlb_Miss"},
- {INSTR_EVT_LDB_FULL, "Instr_Loadbuf_Full"},
- {INSTR_EVT_STB_FULL, "Instr_Storebuf_Full"},
- {INSTR_EVT_FE_STALL, "Instr_Stall"},
- {INSTR_EVT_FROM_DQ, "Instr_DQ"},
- {INSTR_EVT_CORRECT_BP, "Instr_Correct_Branch_Predict"},
- {INSTR_EVT_BYPASS_RAW, "Instr_Bypass_Raw"},
- {INSTR_EVT_NONBYPASS_RAW, "Instr_Nonbypass_Raw"},
- {INSTR_EVT_CTI_TAKEN, "Instr_Branch_Taken"},
- {INSTR_EVT_FAILED_SPEC, "Instr_Failed_Spec"},
-
- {NT_END, ""}
-};
-
-/* L2 Counters. picno: 1 */
-static const struct nametable Rock_names1[] = {
- {0x1, "L2_Icache_Load"},
- {0x2, "L2_Dcache_Load"},
- {0x4, "L2_Instr_Prefetch"},
- {0x8, "L2_Store_Prefetch"},
- {0x10, "L2_Store"},
- {0x20, "L2_Atomic_Ops"},
- {0x40, "L2_Flush"},
- /* Synthetic counters */
- {L2_DS_L3, "L2_Load_From_L3"},
- {L2_DS_DRAM, "L2_Load_From_Dram"},
- {L2_DS_OTHER_L2, "L2_Load_From_Other_L2"},
-
- {L2_TXN_LD_MISS, "L2_Load_Miss"},
- {L2_TXN_ST_MISS, "L2_Store_Miss"},
- {L2_TXN_LD_HIT, "L2_Load_Hit"},
- {L2_TXN_ST_HIT, "L2_Store_Hit"},
-
- {L2_EVT_HIT, "L2_Hit"},
- {L2_EVT_MISS, "L2_Miss"},
- {NT_END, ""}
-};
-
-/* MMU Counters. picno: 2 */
-static const struct nametable Rock_names2[] = {
- {MMU_ALL_TXNS, "MMU_All"},
- {MMU_ITLB_MISS, "MMU_Itlb_Miss"},
- {MMU_DTLB_MISS, "MMU_Dtlb_Miss"},
- {MMU_UTLB_MISS, "MMU_Utlb_Miss"},
- {MMU_UTLB_HIT, "MMU_Utlb_Hit"},
- {MMU_ITLB_MISS_UTLB_MISS, "MMU_I_Utlb_Miss"},
- {MMU_ITLB_MISS_UTLB_HIT, "MMU_I_Utlb_Hit"},
- {MMU_DTLB_MISS_UTLB_MISS, "MMU_D_Utlb_Miss"},
- {MMU_DTLB_MISS_UTLB_HIT, "MMU_D_Utlb_Hit"},
- {NT_END, ""}
-};
-
-/* YANK Counter. picno: 3 */
-static const struct nametable Rock_names3[] = {
- {MCCDESR_YANK, "Yank"},
- {NT_END, ""}
-};
-
-/* SIBLK Counter. picno: 4 */
-static const struct nametable Rock_names4[] = {
- {MCCDESR_SIBLK, "Siblk"},
- {NT_END, ""}
-};
-
-/* LVLK Counter. picno: 5 */
-static const struct nametable Rock_names5[] = {
- {MCCDESR_LVLK, "Lvlk"},
- {NT_END, ""}
-};
-
-static const struct nametable *Rock_names[NUM_PCBE_COUNTERS] = {
- Rock_names0,
- Rock_names1,
- Rock_names2,
- Rock_names3,
- Rock_names4,
- Rock_names5
-};
-
-extern char cpu_module_name[];
-uint32_t num_ringbuf_entries = 500; /* Should be a EVEN # */
-static const struct nametable **events;
-static char *pic_events[NUM_PCBE_COUNTERS];
-static rk_pcbe_config_t *active_pics[NUM_PCBE_COUNTERS][NCPU];
-static boolean_t rock_pcbe_hsvc_available = B_TRUE;
-
-static char *rock_name;
-static char rock_cpuref[256];
-static char pcbe_module_name[64] = "pcbe.";
-
-static hsvc_info_t rock_pcbe_hsvc = {
- HSVC_REV_1, /* HSVC rev num */
- NULL, /* Private */
- HSVC_GROUP_RKPERF, /* Requested API Group */
- ROCK_HSVC_MAJOR, /* Requested Major */
- ROCK_HSVC_MINOR, /* Requested Minor */
- pcbe_module_name /* Module name */
-};
-
-/* Function Definitions */
-static struct modlpcbe modlpcbe = {
- &mod_pcbeops,
- "Perf Counters v1.1",
- &rk_pcbe_ops
-};
-
-static struct modlinkage modl = {
- MODREV_1,
- &modlpcbe,
-};
-
-/*
- * Below two structures are used to pass data from program_*_sampler() to
- * program_a_sampler()
- */
-struct asi {
- uint64_t va;
- uint64_t value;
-};
-
-typedef struct _s {
- char name[32]; /* User friendly name */
- int asi_config_num; /* Num of ASIs to be configured */
- struct asi asi_config[10]; /* ASIs that gets configured */
- int asi_sample_num; /* Num of data return ASIs */
- uint64_t asi_sample[10]; /* Data return ASIs when sampled */
-} program_sampler_data_t;
-
-/* Local Function prototypes */
-static void rk_pcbe_stop_synthetic(rk_pcbe_config_t *pic);
-static void rk_pcbe_release(rk_pcbe_config_t *pic);
-static void rk_pcbe_free_synthetic(rk_pcbe_config_t *pic);
-
-static int rk_pcbe_program_normal(rk_pcbe_config_t *pic);
-static int rk_pcbe_program_synthetic(rk_pcbe_config_t *pic);
-static int program_l2_sampler(rk_pcbe_config_t *pic);
-static int program_instr_sampler(rk_pcbe_config_t *pic);
-static int program_a_sampler(rk_pcbe_config_t *pic,
- program_sampler_data_t *sdata);
-
-static int rk_pcbe_sample_internal(rk_pcbe_config_t *pic, uint64_t *diffp);
-static int rk_pcbe_sample_synthetic(rk_pcbe_config_t *pic, int64_t *diffp);
-static int sample_l2_sampler(rk_pcbe_config_t *pic, int64_t *diffp);
-static int sample_instr_sampler(rk_pcbe_config_t *pic, int64_t *diffp);
-static int sample_mccdesr(rk_pcbe_config_t *pic, int64_t *diffp);
-static int synthesize_sample_count(rk_pcbe_config_t *pic, uint64_t sample_count,
- uint64_t sample_hit_count, char *name, int64_t *diffp);
-
-static int alloc_ringbuffer(rk_pcbe_config_t *pic, uint32_t size,
- uint32_t num_samples);
-static void free_ringbuffer(rk_pcbe_config_t *pic);
-static void print_hv_error(uint64_t rc, int *cntp, char *funcname,
- rk_pcbe_config_t *pic);
-static void set_string_constants(void);
-static uint64_t bitmask(uint8_t);
-
-#ifdef RKPCBE_DBG
-static void print_pic(rk_pcbe_config_t *pic, char *heading);
-static void set_pic_name(rk_pcbe_config_t *pic);
-/* lock for print clarity */
-static kmutex_t print_pic_lock;
-#define PRINT_PIC(pic, heading) \
- print_pic(pic, heading)
-#define DBG_PRINT(_z) printf _z
-#else
-#define PRINT_PIC(pic, heading) (void)0
-#define DBG_PRINT(ignore) (void)0
-#endif
-
-int
-_init(void)
-{
- if (rk_pcbe_init() != 0)
- return (ENOTSUP);
- return (mod_install(&modl));
-}
-
-int
-_fini(void)
-{
- if (rk_pcbe_fini() != 0)
- return (EBUSY);
- return (mod_remove(&modl));
-}
-
-int
-_info(struct modinfo *mi)
-{
- return (mod_info(&modl, mi));
-}
-
-static int
-rk_pcbe_init(void)
-{
- const struct nametable *n;
- int i, status, j;
- size_t size;
- uint64_t rock_pcbe_hsvc_sup_minor;
-
- set_string_constants();
- /*
- * Validate API version for Rock pcbe hypervisor services
- */
- status = hsvc_register(&rock_pcbe_hsvc, &rock_pcbe_hsvc_sup_minor);
- if ((status != 0) || (rock_pcbe_hsvc_sup_minor <
- (uint64_t)ROCK_HSVC_MINOR)) {
- cmn_err(CE_WARN, "%s cannot negotiate hypervisor services: "
- "major: 0x%lx minor: 0x%lx group: 0x%x errno: %d",
- pcbe_module_name, rock_pcbe_hsvc.hsvc_major,
- rock_pcbe_hsvc.hsvc_minor, HSVC_GROUP_RKPERF, status);
- rock_pcbe_hsvc_available = B_FALSE;
- return (-1);
- }
-
- events = Rock_names;
- /*
- * Initialize the list of events for each PIC.
- * Do two passes: one to compute the size necessary and another
- * to copy the strings. Need room for event, comma, and NULL terminator.
- */
- for (i = 0; i < NUM_PCBE_COUNTERS; i++) {
- size = 0;
- for (n = events[i]; n->bits != NT_END; n++)
- size += strlen(n->name) + 1;
- pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
- *pic_events[i] = '\0';
- for (n = events[i]; n->bits != NT_END; n++) {
- (void) strcat(pic_events[i], n->name);
- (void) strcat(pic_events[i], ",");
- }
- /*
- * Remove trailing comma.
- */
- pic_events[i][size - 1] = '\0';
-
- /* Initialize all active pics as NULL */
- for (j = 0; j < NCPU; j++)
- active_pics[i][j] = NULL;
- }
-#ifdef RKPCBE_DBG
- mutex_init(&print_pic_lock, NULL, MUTEX_DRIVER,
- (void *)ipltospl(PIL_15));
-#endif
- return (0);
-}
-
-static int
-rk_pcbe_fini(void)
-{
- return (0);
-}
-
-static uint_t
-rk_pcbe_ncounters(void)
-{
- return (NUM_PCBE_COUNTERS);
-}
-
-static const char *
-rk_pcbe_impl_name(void)
-{
- return (rock_name);
-}
-
-static const char *
-rk_pcbe_cpuref(void)
-{
- return (rock_cpuref);
-}
-
-static char *
-rk_pcbe_list_events(uint_t picnum)
-{
- ASSERT(picnum >= (uint_t)0 && picnum < cpc_ncounters);
-
- return (pic_events[picnum]);
-}
-
-static char *
-rk_pcbe_list_attrs(void)
-{
- /*
- * If no value is spcified in the command line for the
- * attributes then, a default value of 1 is passed into
- * pcbe from cpc. Specifying a value as zero is as good as
- * not specifying it.
- * 'source' attribute is equivallent of 'single, shared,
- * siu, mmu' all put together. 'source' will take precedence
- * over others.
- * Valid 'source' values are defined in rock_hypervisor_api.h.
- * If multiple flags need to be specified then user has to
- * specify the bitwise OR of the flags he/she is interested in.
- * populate_pic_config validates the correctness of the flags
- * specified.
- * tl is little odd. To consider instructions at
- * tl == 0, specify tl = TLZ in command line
- * tl > 0, specify tl = TLNZ in command line
- * The reason for this oddness: attr = 0 means, neglect
- * that attr.
- */
- return ("freq,source,single,shared,siu,mmu,nohws,tl,hpriv");
-}
-
-static const struct nametable *
-find_event(int picno, char *name)
-{
- const struct nametable *n;
-
- for (n = events[picno]; n->bits != NT_END; n++)
- if (strcmp(name, n->name) == 0)
- return (n);
-
- return (NULL);
-}
-
-static uint64_t
-rk_pcbe_event_coverage(char *event)
-{
- uint64_t bitmap = 0;
- int i;
-
- /* There is no intersection of events between different PICs */
- for (i = 0; i < NUM_PCBE_COUNTERS; i++) {
- if (find_event(i, event) != NULL) {
- bitmap = 1 << i;
- break;
- }
- }
- return (bitmap);
-}
-
-static uint64_t
-rk_pcbe_overflow_bitmap(void)
-{
- int i;
- rk_pcbe_config_t *pic;
- uint64_t ovf_bitmask = 0, ovf_cnt;
-
- for (i = 0; i < NUM_PCBE_COUNTERS; i++) {
- pic = active_pics[i][CPU->cpu_id];
-
- if (pic == NULL || pic->inuse != B_TRUE)
- continue;
-
- DBG_PRINT(("CPU-%d: Pic %s (#%d, cntr %X) overflowed\n",
- CPU->cpu_id, pic->name, pic->pcbe_picno, pic->counter));
-
- /* Check if any of the active pics overflowed */
- if (pic->counter_type == NORMAL_COUNTER) {
- hv_rk_perf_count_overflow((uint64_t)(pic->counter |
- pic->src_type), &ovf_cnt);
- if (ovf_cnt > 0)
- pic->pcbe_pic += (0x1ULL << pic->counter_bits);
- } else {
- /*
- * Synthetic counters don't overflow, so we must have gotten
- * here because the ringbuffer is getting half-full or
- * one of the normal counter which is a part of synthetic
- * counter did overflow. Force cpc to call
- * rk_pcbe_sample_synthetic by setting ovf_cnt to 1. If
- * returned 0, then cpc prints a WARNING message:
- * "WARNING: interrupt 0x80c at level 15 not serviced"
- */
- ovf_cnt = B_TRUE;
- }
-
- if (ovf_cnt > 0)
- ovf_bitmask |= (1 << pic->pcbe_picno);
- }
- return (ovf_bitmask);
-}
-
-/*
- * populate_pic_config
- *
- * Checks the validity of all the attributes and then updates flags
- * to reflect priv bits for Cycle and Instruction counters and
- * transaction bits for L2 and makes sure that flags is 0 for MMU.
- *
- * Along with validating the inputs, pic is populated with appropriate
- * values.
- *
- * Returns 0 on success and CPC_INVALID_ATTRIBUTE on failure.
- */
-static int
-populate_pic_config(uint_t picnum, uint_t nattrs, kcpc_attr_t *attrs,
- uint32_t bits, rk_pcbe_config_t *pic)
-{
- int i;
- uint32_t freq = 0;
- uint32_t *flagsp = &(pic->flags);
- uint32_t source = 0;
-
- pic->pcbe_picno = (uint8_t)picnum;
- pic->toe = B_TRUE;
- pic->sampler.synthetic_pic = 0;
- pic->sampler.ring_buffer = NULL;
- pic->inuse = UNINITIALIZED;
- pic->counter_type = ((bits & SYN_BIT) == 0) ? NORMAL_COUNTER :
- SYNTHETIC_COUNTER;
-
- /*
- * Initialized to 0. If a valid source attribute is specified, then
- * src_type field gets populated later, else will be defaulted to
- * HV_RK_PERF_SRC_STRAND
- */
- pic->src_type = 0;
- /*
- * Initialized to zero. In all the fallthrough case, this
- * is checked to determine if certain fields needs to be
- * populated or not
- */
- pic->counter = 0;
-
- /*
- * When synthetic counter's ring buffer reaches HWM, HV generates
- * PIC overflow trap to get guest's attention. This is not same as
- * a hardware counter overflow. Size of the ring buffer is configurable
- * and since there is no definite size, CPC_OVF_NOTIFY_EMT flag has no
- * meaning wrt synthetic counters.
- */
- if ((bits & SYN_BIT) && (*flagsp & CPC_OVF_NOTIFY_EMT))
- return (CPC_PIC_NOT_CAPABLE);
-
- /*
- * This flag is used by CPC to inform the application of a counter
- * overflow. It is of no use to PCBE.
- */
- *flagsp &= ~(CPC_OVF_NOTIFY_EMT);
-
- switch (picnum) {
-#define PRIV_BITS_MASK 0x7
-#define PRIV_BIT0_MASK 0x1
-#define PRIV_BIT1_MASK 0x2
-#define PRIV_BIT2_MASK 0x4
-
- case 0: /* Instruction Counter */
- pic->counter = RK_PERF_INSTR;
- pic->counter_bits = INSTR_COUNTER_BITS;
-
- freq = INSTR_SAM_DEF_FREQ; /* Default Frequency */
-
- for (i = 0; i < nattrs; i++) {
- if ((strcmp(attrs[i].ka_name, "freq") == 0)) {
- if ((bits & SYN_BIT) == 0 &&
- attrs[i].ka_val) {
- return (CPC_INVALID_ATTRIBUTE);
- }
- freq = attrs[i].ka_val;
- } else if ((strcmp(attrs[i].ka_name,
- "single") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND;
- else if ((strcmp(attrs[i].ka_name,
- "shared") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND_M;
- else if ((strcmp(attrs[i].ka_name,
- "hpriv") == 0) && attrs[i].ka_val)
- *flagsp |= CPC_COUNT_HV;
- else if ((strcmp(attrs[i].ka_name,
- "source") == 0) && attrs[i].ka_val)
- source = attrs[i].ka_val &
- HV_RK_PERF_SRC_MASK;
- else if ((strcmp(attrs[i].ka_name,
- "nohws") == 0) && attrs[i].ka_val) {
- if (bits & SYN_BIT)
- pic->sampler.nohws = B_TRUE;
- else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- } else if ((strcmp(attrs[i].ka_name,
- "tl") == 0) && attrs[i].ka_val) {
- if (bits & SYN_BIT) {
- pic->sampler.tl =
- (uint8_t)attrs[i].ka_val;
- } else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- } else {
- if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- }
- }
-
- if (source) {
- if (source & (HV_RK_PERF_SRC_SIU |
- HV_RK_PERF_SRC_MMU))
- return (CPC_INVALID_ATTRIBUTE);
- pic->src_type = source;
- }
-
- if (pic->src_type == 0)
- pic->src_type = HV_RK_PERF_SRC_STRAND;
-
- /*
- * hpriv, sys, user are sent as bits 3, 2, 1 from kcpc.
- * They are maintained by PCBE as bits 2, 1, & 0.
- */
- *flagsp >>= 1;
- *flagsp &= PRIV_BITS_MASK;
- if (bits & SYN_BIT) {
- pic->sampler.flags = *flagsp;
- pic->sampler.syn_counter = bits;
- if (freq > INSTR_SAM_MAX_FREQ) {
- cmn_err(CE_NOTE, "CPU-%d: freq set "
- "> MAX. Resetting to %d",
- CPU->cpu_id, INSTR_SAM_MAX_FREQ);
- freq = INSTR_SAM_MAX_FREQ;
- }
- if (freq < INSTR_SAM_MIN_FREQ) {
- cmn_err(CE_NOTE, "CPU-%d: freq set "
- "< MIN. Resetting to %d",
- CPU->cpu_id, INSTR_SAM_MIN_FREQ);
- freq = INSTR_SAM_MIN_FREQ;
- }
- pic->sampler.frequency = freq;
- }
- /*
- * When programming counter priv bits should be
- * 0, 1, & 2, i.e., in reverse order. Therefore swap
- * bits 2 & 0.
- */
- *flagsp = ((*flagsp & PRIV_BIT0_MASK) << 2) |
- ((*flagsp & PRIV_BIT2_MASK) >> 2) |
- (*flagsp & PRIV_BIT1_MASK);
- break;
- case 1: /* L2 counter */
- /*
- * nouser and sys are also invalid attributes for L2
- * and MMU counters. If user has not specified any
- * attributes then *flagsp contains CPC_COUNT_USER.
- * Any priv attrs are not applicable for L2 counters.
- */
- if (*flagsp != CPC_COUNT_USER)
- return (CPC_INVALID_ATTRIBUTE);
-
- pic->counter_bits = L2_COUNTER_BITS;
- if ((bits & SYN_BIT) == 0) {
- /*
- * Normal counter:
- * Find the attibutes for L2 Counter.
- */
- for (i = 0; i < nattrs; i++) {
- if ((strcmp(attrs[i].ka_name,
- "single") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND;
- else if ((strcmp(attrs[i].ka_name,
- "shared") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND_M;
- else if ((strcmp(attrs[i].ka_name,
- "siu") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_SIU;
- else if ((strcmp(attrs[i].ka_name,
- "mmu") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_MMU;
- else if ((strcmp(attrs[i].ka_name,
- "source") == 0) && attrs[i].ka_val)
- source = attrs[i].ka_val &
- HV_RK_PERF_SRC_MASK;
- else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- }
- if (source)
- pic->src_type = source;
-
- if (pic->src_type == 0)
- pic->src_type = HV_RK_PERF_SRC_STRAND;
-
- /* At least one hot Xn flag for L2 counters */
- *flagsp = bits;
- } else {
- /*
- * Synthetic Counter
- */
- pic->sampler.syn_counter = bits;
- freq = L2_SAM_DEF_FREQ; /* Default Frequency */
- /*
- * Find the attibutes for L2 Sampler.
- */
- for (i = 0; i < nattrs; i++) {
- if ((strcmp(attrs[i].ka_name,
- "freq") == 0) && attrs[i].ka_val)
- freq = attrs[i].ka_val;
- else if ((strcmp(attrs[i].ka_name,
- "single") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND;
- else if ((strcmp(attrs[i].ka_name,
- "shared") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND_M;
- else if ((strcmp(attrs[i].ka_name,
- "siu") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_SIU;
- else if ((strcmp(attrs[i].ka_name,
- "mmu") == 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_MMU;
- else if ((strcmp(attrs[i].ka_name,
- "source") == 0) && attrs[i].ka_val)
- source = attrs[i].ka_val &
- HV_RK_PERF_SRC_MASK;
- else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- }
- if (source)
- pic->src_type = source;
-
- if (pic->src_type == 0)
- pic->src_type = HV_RK_PERF_SRC_STRAND;
-
- /* Range check to avoid DOS */
- if (freq > L2_SAM_MAX_FREQ) {
- cmn_err(CE_NOTE, "CPU-%d: freq set "
- "> MAX. Resetting to %d",
- CPU->cpu_id, L2_SAM_MAX_FREQ);
- freq = L2_SAM_MAX_FREQ;
- }
- if (freq < L2_SAM_MIN_FREQ) {
- cmn_err(CE_NOTE, "CPU-%d: freq set "
- "< MIN. Resetting to %d",
- CPU->cpu_id, L2_SAM_MIN_FREQ);
- freq = L2_SAM_MIN_FREQ;
- }
- pic->sampler.frequency = freq;
- *flagsp = 0;
- }
- pic->counter = RK_PERF_L2;
- break;
- case 2: /* MMU Counter */
- if (*flagsp != CPC_COUNT_USER)
- return (CPC_INVALID_ATTRIBUTE);
-
- *flagsp = bits;
- pic->counter_bits = MMU_COUNTER_BITS;
-
- for (i = 0; i < nattrs; i++) {
- if ((strcmp(attrs[i].ka_name, "single") == 0) &&
- attrs[i].ka_val)
- pic->src_type |= HV_RK_PERF_SRC_STRAND;
- else if
- ((strcmp(attrs[i].ka_name, "shared") ==
- 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND_M;
- else if ((strcmp(attrs[i].ka_name,
- "source") == 0) && attrs[i].ka_val)
- source = attrs[i].ka_val &
- HV_RK_PERF_SRC_MASK;
- else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- }
- if (source) {
- if (source & (HV_RK_PERF_SRC_SIU |
- HV_RK_PERF_SRC_MMU))
- return (CPC_INVALID_ATTRIBUTE);
- pic->src_type = source;
- }
-
-
- if (pic->src_type == 0)
- pic->src_type = HV_RK_PERF_SRC_STRAND;
-
- pic->counter = RK_PERF_MMU;
- break;
- case 3: /* YANK Counter */
- pic->counter = RK_PERF_YANK;
- pic->counter_bits = YANK_COUNTER_BITS;
- /* FALLTHROUGH */
- case 4: /* SIBLK Counter */
- if (pic->counter == 0) {
- pic->counter = RK_PERF_SIBLK;
- pic->counter_bits = SIBLK_COUNTER_BITS;
- }
- /* FALLTHROUGH */
- case 5: /* LVLK Counter */
- if (pic->counter == 0) {
- pic->counter = RK_PERF_LVLK;
- pic->counter_bits = LVLK_COUNTER_BITS;
- }
-
- if (*flagsp != CPC_COUNT_USER)
- return (CPC_INVALID_ATTRIBUTE);
-
- for (i = 0; i < nattrs; i++) {
- if ((strcmp(attrs[i].ka_name, "single") ==
- 0) && attrs[i].ka_val)
- pic->src_type |= HV_RK_PERF_SRC_STRAND;
- else if
- ((strcmp(attrs[i].ka_name, "shared") ==
- 0) && attrs[i].ka_val)
- pic->src_type |=
- HV_RK_PERF_SRC_STRAND_M;
- else if ((strcmp(attrs[i].ka_name,
- "source") == 0) && attrs[i].ka_val)
- source = attrs[i].ka_val &
- HV_RK_PERF_SRC_MASK;
- else if (attrs[i].ka_val)
- return (CPC_INVALID_ATTRIBUTE);
- }
- if (source) {
- if (source & (HV_RK_PERF_SRC_SIU |
- HV_RK_PERF_SRC_MMU))
- return (CPC_INVALID_ATTRIBUTE);
- pic->src_type = source;
- }
-
-
- if (pic->src_type == 0)
- pic->src_type = HV_RK_PERF_SRC_STRAND;
-
- *flagsp = 0;
- pic->sampler.frequency = 0;
- pic->sampler.syn_counter = bits;
- break;
- }
-
- if ((int64_t)pic->pcbe_pic > COUNTER_MAX(pic) ||
- (int64_t)pic->pcbe_pic < COUNTER_MIN(pic))
- return (CPC_ATTRIBUTE_OUT_OF_RANGE);
-
- pic->pcbe_pic &= COUNTER_MASK(pic);
-
-#ifdef RKPCBE_DBG
- set_pic_name(pic);
-#endif
- return (0);
-}
-
-/*ARGSUSED7*/
-static int
-rk_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
- uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token)
-{
- rk_pcbe_config_t *pic;
- const struct nametable *n;
- int rc;
-
- /* Is API version for Rock pcbe hypervisor services negotiated? */
- if (rock_pcbe_hsvc_available == B_FALSE)
- return (CPC_RESOURCE_UNAVAIL);
-
- /*
- * If we've been handed an existing configuration, we need only preset
- * the counter value.
- */
- if (*data != NULL) {
- pic = *data;
- if ((int64_t)preset > COUNTER_MAX(pic) ||
- (int64_t)preset < COUNTER_MIN(pic))
- return (CPC_ATTRIBUTE_OUT_OF_RANGE);
- pic->pcbe_pic = preset & COUNTER_MASK(pic);
- return (0);
- }
-
- if (picnum < (uint_t)0 || picnum > NUM_PCBE_COUNTERS)
- return (CPC_INVALID_PICNUM);
-
- /*
- * Find other requests that will be programmed with this one, and ensure
- * they don't conflict.
- * Any other counter in this pic group is active?
- */
- if (active_pics[picnum][CPU->cpu_id] != NULL)
- return (CPC_CONFLICTING_REQS);
-
- if ((n = find_event(picnum, event)) == NULL)
- return (CPC_INVALID_EVENT);
-
- /* Check for supported attributes and populate pic */
- pic = kmem_zalloc(sizeof (rk_pcbe_config_t), KM_SLEEP);
- pic->flags = flags;
- pic->pcbe_pic = preset;
-
- if (rc = populate_pic_config(picnum, nattrs, attrs, n->bits, pic)) {
- kmem_free(pic, sizeof (rk_pcbe_config_t));
- return (rc);
- }
-
- /*
- * num_ringbuf_entries should be always even. Since this
- * /etc/system tunable, need to check for this.
- */
- if (num_ringbuf_entries & 1) {
- num_ringbuf_entries++;
- cmn_err(CE_WARN, "num_ringbuf_entries should be even."
- " Changing %u to %u\n", num_ringbuf_entries - 1,
- num_ringbuf_entries);
- }
- if (num_ringbuf_entries < MIN_RINGBUF_ENTRIES) {
- cmn_err(CE_WARN, "num_ringbuf_entries should be at least "
- "%u. Changing %u to %u\n", MIN_RINGBUF_ENTRIES,
- num_ringbuf_entries, MIN_RINGBUF_ENTRIES);
- num_ringbuf_entries = MIN_RINGBUF_ENTRIES;
- }
-
- pic->state = STATE_CONFIGURED;
- pic->cpu = CPU->cpu_id;
- active_pics[picnum][pic->cpu] = pic;
- *data = pic;
-
- if (pic->counter_type == NORMAL_COUNTER)
- PRINT_PIC(pic, "After Configuration (N)");
- return (0);
-}
-
-static void
-rk_pcbe_program(void *token)
-{
- rk_pcbe_config_t *pic = NULL;
- int rc;
- uint64_t counter;
-
- while ((pic = (rk_pcbe_config_t *)kcpc_next_config(token, pic, NULL))
- != NULL) {
-
- if (pic->inuse == B_FALSE)
- continue;
-
- counter = (uint64_t)(pic->counter | pic->src_type);
- rc = (int)hv_rk_perf_count_init(counter);
-
- if (curthread->t_cpc_ctx) {
- /*
- * If in thread context, pic should get an exclusive
- * lock. If it cannot then invalidate the pic.
- */
- if (rc != H_EOK) {
- kcpc_invalidate_config(token);
- continue;
- }
- } else {
- /* Must be cpu context */
- ASSERT(CPU->cpu_cpc_ctx);
- if (rc == H_EWOULDBLOCK &&
- (pic->src_type & HV_RK_PERF_SRC_STRAND_M)) {
- /* pic in use by a cpu of current guest */
- pic->inuse = B_FALSE;
- continue;
- } else if (rc != H_EOK) {
- /*
- * Either the counter is in use by a different
- * guest or another cpu in the current guest is
- * already using it in single source mode. In
- * either case, invalidate the pic.
- */
- kcpc_invalidate_config(token);
- continue;
- }
- }
-
- /*
- * rc = H_EOK, hence current cpu was successful in
- * obtaining exclusive access to the counter, Set this
- * pic as active.
- */
- if (CPU->cpu_id != pic->cpu) {
- active_pics[pic->pcbe_picno][pic->cpu] = NULL;
- pic->cpu = CPU->cpu_id;
- active_pics[pic->pcbe_picno][pic->cpu] = pic;
- }
- pic->inuse = B_TRUE;
-
- if (pic->counter_type == NORMAL_COUNTER)
- rc = rk_pcbe_program_normal(pic);
- else
- rc = rk_pcbe_program_synthetic(pic);
-
- pic->state = STATE_PROGRAMMED;
-
- if (rc != H_EOK) {
- kcpc_invalidate_config(token);
- continue;
- }
- }
-}
-
-static void
-rk_pcbe_allstop(void)
-{
- int i;
- rk_pcbe_config_t *pic;
- uint64_t diff;
-
- for (i = 0; i < NUM_PCBE_COUNTERS; i++) {
- pic = active_pics[i][CPU->cpu_id];
-
- if (pic == NULL || pic->state != STATE_PROGRAMMED)
- continue;
-
- ASSERT(pic->inuse == B_TRUE && CPU->cpu_id == pic->cpu);
-
- /* Stop all active pics */
- if (pic->counter_type == NORMAL_COUNTER) {
- hv_rk_perf_count_stop((uint64_t)(pic->counter |
- pic->src_type));
- DBG_PRINT(("CPU-%d: Counter %s(%X) stopped.\n",
- CPU->cpu_id, pic->name, pic->counter));
- } else {
- DBG_PRINT(("CPU-%d: Stopping counter %s(%lX)\n",
- CPU->cpu_id, pic->name,
- pic->sampler.synthetic_pic));
- rk_pcbe_stop_synthetic(pic);
- }
-
- /* Mark pic as stopped */
- pic->state = STATE_STOPPED;
-
- /*
- * If running in lwp context, kcpc ensures a cpu that
- * executed pcbe_program will be the one that executes
- * pcbe_allstop. However, pcbe_free may be executed on
- * a different strand. HV puts a restriction that the
- * strand that programmed the counter should be the one
- * that releases it. Therefore, when counters are bound
- * to thread context, counters are released everytime
- * they are stopped.
- */
- if (CPU->cpu_cpc_ctx == NULL) {
- /*
- * If counter is being released, cache the current
- * sample since we cannot sample a counter that has
- * been released.
- */
- if (rk_pcbe_sample_internal(pic, &diff) == H_EOK)
- pic->pcbe_pic = diff;
- else
- pic->pcbe_pic = 0;
- rk_pcbe_release(pic);
- }
- }
-}
-
-static void
-rk_pcbe_sample(void *token)
-{
- rk_pcbe_config_t *pic = NULL;
- uint64_t *pic_data;
- int rc;
- uint64_t diff;
-
- while ((pic = (rk_pcbe_config_t *)
- kcpc_next_config(token, pic, &pic_data)) != NULL) {
-
- if (pic->inuse != B_TRUE) {
- continue;
- }
-
- /*
- * If counter is already released, then return the
- * cached value
- */
- if (pic->state == STATE_RELEASED) {
- *pic_data += pic->pcbe_pic;
- pic->pcbe_pic = 0;
- continue;
- }
-
- ASSERT(CPU->cpu_id == pic->cpu);
-
- rc = rk_pcbe_sample_internal(pic, &diff);
-
- if (pic->state == STATE_STOPPED) {
- pic->pcbe_pic = 0;
- rk_pcbe_release(pic);
- }
-
- if (rc == H_EOK) {
- *pic_data += diff;
- } else {
- kcpc_invalidate_config(token);
- }
- }
-}
-
-static void
-rk_pcbe_free(void *config)
-{
- rk_pcbe_config_t *pic = (rk_pcbe_config_t *)config;
-
- /* Release counter */
- if (pic->inuse == B_TRUE) {
- if (pic->state != STATE_RELEASED) {
- rk_pcbe_release(pic);
- }
- if (pic->counter_type == SYNTHETIC_COUNTER)
- rk_pcbe_free_synthetic(pic);
- }
-
- /* Mark pic as inactive */
- active_pics[pic->pcbe_picno][pic->cpu] = NULL;
- kmem_free(pic, sizeof (rk_pcbe_config_t));
-}
-
-static void
-rk_pcbe_release(rk_pcbe_config_t *pic)
-{
- int rc = 0;
-
- ASSERT(pic->inuse == B_TRUE && pic->state != STATE_RELEASED);
-
- DBG_PRINT(("CPU-%d: Releasing Pic %s (#%d, cntr %X) %p",
- CPU->cpu_id, pic->name, pic->pcbe_picno, pic->counter,
- (void *)pic));
-
- rc = (int)hv_rk_perf_count_release((uint64_t)
- (pic->counter | pic->src_type));
- if (rc != 0) {
- cmn_err(CE_WARN, "CPU-%d: Releasing Pic-%d, counter: %X failed "
- "%p. rc=%d", CPU->cpu_id, pic->pcbe_picno, pic->counter,
- (void *)pic, rc);
- }
- if (pic->counter_type == SYNTHETIC_COUNTER &&
- !(pic->counter == RK_PERF_YANK || pic->counter == RK_PERF_SIBLK ||
- pic->counter == RK_PERF_LVLK)) {
- rc = (int)hv_rk_perf_sample_release((uint64_t)
- (pic->counter | pic->src_type));
- if (rc != 0) {
- cmn_err(CE_WARN, "CPU-%d: Releasing Pic-%d, sampler: %X"
- " failed %p. rc=%d", CPU->cpu_id, pic->pcbe_picno,
- pic->counter, (void *)pic, rc);
- return;
- }
- }
- pic->state = STATE_RELEASED;
-}
-
-static int
-rk_pcbe_program_normal(rk_pcbe_config_t *pic)
-{
- uint64_t counter;
- uint64_t config_value;
- uint64_t rc = H_EOK;
-
- ASSERT(pic->inuse == B_TRUE);
-
- counter = (uint64_t)(pic->counter | pic->src_type);
-
- /* Preset the counter value if non zero */
- if (pic->pcbe_pic > 0) {
- DBG_PRINT(("CPU-%d: Counter getting preset to %lu (%lX)\n",
- CPU->cpu_id, pic->pcbe_pic, pic->pcbe_pic));
- rc = (int)hv_rk_perf_count_set(counter, pic->pcbe_pic);
- }
-
- if (rc != H_EOK) {
- cmn_err(CE_WARN, "{%d} Pic %d cntr %X not set",
- CPU->cpu_id, pic->pcbe_picno, pic->counter);
- PRINT_PIC(pic, "Set counter failed");
- return ((int)rc);
- }
-
- /* Configure and start counter */
- config_value = ((uint64_t)pic->toe << RK_PERF_COUNT_TOE_SHIFT)
- | pic->flags;
- rc = (int)hv_rk_perf_count_start(counter, config_value);
-
- if (rc != H_EOK) {
- cmn_err(CE_WARN, "{%d} Pic %d cntr %X not configured",
- CPU->cpu_id, pic->pcbe_picno, pic->counter);
- PRINT_PIC(pic, "Configure counter failed");
- }
- return ((int)rc);
-}
-
-static int
-rk_pcbe_program_synthetic(rk_pcbe_config_t *pic)
-{
- int rc;
- ASSERT(pic->inuse == B_TRUE);
- switch (pic->counter) {
- case RK_PERF_INSTR:
- rc = program_instr_sampler(pic);
- break;
- case RK_PERF_L2:
- rc = program_l2_sampler(pic);
- break;
- case RK_PERF_YANK:
- /* FALLTHROUGH */
- case RK_PERF_SIBLK:
- /* FALLTHROUGH */
- case RK_PERF_LVLK:
- rc = rk_pcbe_program_normal(pic);
- break;
- default:
- PRINT_PIC(pic, "rk_pcbe_program_synthetic");
- ASSERT(0);
- rc = H_EINVAL;
- break;
- }
- return (rc);
-}
-
-static void
-rk_pcbe_free_synthetic(rk_pcbe_config_t *pic)
-{
- ASSERT(pic->inuse == B_TRUE);
- switch (pic->counter) {
- case RK_PERF_INSTR:
- /* FALLTHROUGH */
- case RK_PERF_L2:
- free_ringbuffer(pic);
- break;
- case RK_PERF_YANK:
- /* FALLTHROUGH */
- case RK_PERF_SIBLK:
- /* FALLTHROUGH */
- case RK_PERF_LVLK:
- /* Do nothing */
- break;
- default:
- PRINT_PIC(pic, "rk_pcbe_free_synthetic");
- ASSERT(0);
- break;
- }
-}
-
-static int
-rk_pcbe_sample_internal(rk_pcbe_config_t *pic, uint64_t *data)
-{
- uint64_t counter_value;
- int rc;
- int64_t diff;
-
- if (pic->counter_type == NORMAL_COUNTER) {
- rc = (int)hv_rk_perf_count_get((uint64_t)(pic->counter |
- pic->src_type), &counter_value);
- if (rc == H_EOK) {
- counter_value &= COUNTER_MASK(pic);
- diff = counter_value - pic->pcbe_pic;
- pic->pcbe_pic = counter_value;
- /*
- * When counter overflows the overflow handler
- * (rk_pcbe_overflow_bitmap) would have added
- * MAX count value to pic->pcbe_pic. Therefore
- * -ve implies that the counter has overflowed.
- * The actual count amounts to,
- * (counter_value - (pic->pcbe_pic - MAX)) + MAX
- * => counter_value - pic->pcbe_pic + (2 * MAX)
- * => diff + (2 * MAX)
- */
- if (diff < 0) {
- diff +=
- (0x1ULL << (pic->counter_bits + 1));
- }
- }
- } else {
- /*
- * Difference returned by synthetic counters will
- * be always +ve
- */
- rc = rk_pcbe_sample_synthetic(pic, &diff);
- }
-
- if (rc == H_EOK)
- *data = (uint64_t)diff;
-
- return ((int)rc);
-}
-
-/* All sample_synthetic code may be executed at TL=1 */
-static int
-rk_pcbe_sample_synthetic(rk_pcbe_config_t *pic, int64_t *diffp)
-{
- int rc;
- ASSERT(pic->inuse == B_TRUE);
- switch (pic->counter) {
- case RK_PERF_INSTR:
- rc = sample_instr_sampler(pic, diffp);
- break;
- case RK_PERF_L2:
- rc = sample_l2_sampler(pic, diffp);
- break;
- case RK_PERF_YANK:
- /* FALLTHROUGH */
- case RK_PERF_SIBLK:
- /* FALLTHROUGH */
- case RK_PERF_LVLK:
- rc = sample_mccdesr(pic, diffp);
- break;
- default:
- PRINT_PIC(pic, "rk_pcbe_sample_synthetic");
- ASSERT(0);
- break;
- }
- return (rc);
-}
-
-static void
-rk_pcbe_stop_synthetic(rk_pcbe_config_t *pic)
-{
- uint64_t counter = (uint64_t)(pic->counter | pic->src_type);
-
- ASSERT(pic->inuse == B_TRUE);
- switch (pic->counter) {
- case RK_PERF_INSTR:
- /* FALLTHROUGH */
- case RK_PERF_L2:
- hv_rk_perf_count_stop(counter);
- hv_rk_perf_sample_stop(counter);
- break;
- case RK_PERF_YANK:
- /* FALLTHROUGH */
- case RK_PERF_SIBLK:
- /* FALLTHROUGH */
- case RK_PERF_LVLK:
- hv_rk_perf_count_stop(counter);
- break;
- default:
- PRINT_PIC(pic, "rk_pcbe_stop_synthetic");
- ASSERT(0);
- break;
- }
-}
-
-static int
-program_l2_sampler(rk_pcbe_config_t *pic)
-{
-#define ASI_PERF_L2_TXN_INFO 0xF10010
-#define ASI_PERF_L2_EA_MASK 0xF10018
-#define ASI_PERF_L2_EA_MATCH 0xF10020
-#define ASI_PERF_L2_TXN_INFO_FILTER 0xF10030
-#define ASI_PERF_L2_CC 0xF10038
-#define TXN_ICACHE_LOAD 0x1
-#define TXN_DCACHE_LOAD 0x2
-#define TXN_INSTR_PREFETCH 0x4
-#define TXN_STORE_PREFETCH 0x8
-#define TXN_DCACHE_STORE 0x10
-#define TXN_ATOMIC_LOAD_STORE 0x20
-#define TXN_FLUSH 0x40
-#define L2_ALL_TXNS (TXN_ICACHE_LOAD | TXN_DCACHE_LOAD | \
- TXN_INSTR_PREFETCH | TXN_STORE_PREFETCH | \
- TXN_DCACHE_STORE | TXN_ATOMIC_LOAD_STORE | TXN_FLUSH)
-#define L2_TXN_SHIFT 3
-#define L2_ALL_EVT 0x3
-#define L2_ALL_EVT_SHIFT 10
-#define L2_TXN_INFO_FILTER_MASK (L2_ALL_EVT << L2_ALL_EVT_SHIFT) | \
- (L2_ALL_TXNS << L2_TXN_SHIFT)
-
- program_sampler_data_t sdata;
- int i = 0;
-
- (void) strcpy(sdata.name, "program_l2_sampler");
- pic->flags = L2_ALL_TXNS; /* For L2 counter */
-
- /*
- * If (((Reported EA ^ MATCH) & MASK) == 0) then sample is taken
- */
- sdata.asi_config[i].va = ASI_PERF_L2_EA_MASK;
- sdata.asi_config[i].value = 0;
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_L2_EA_MATCH;
- sdata.asi_config[i].value = 0;
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_L2_CC;
- sdata.asi_config[i].value = pic->sampler.frequency;
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_L2_TXN_INFO_FILTER;
- sdata.asi_config[i].value = L2_TXN_INFO_FILTER_MASK;
-
- sdata.asi_config_num = i + 1;
-
- sdata.asi_sample[0] = ASI_PERF_L2_TXN_INFO;
- sdata.asi_sample_num = 1;
-
- return (program_a_sampler(pic, &sdata));
-}
-
-static int
-sample_l2_sampler(rk_pcbe_config_t *pic, int64_t *diffp)
-{
-#define DS_SHIFT 34
-#define EVT_SHIFT 22
-#define TXN_SHIFT 7
-#define DS_MASK MAKE_MASK(2, 0)
-#define EVT_MASK MAKE_MASK(4, 0)
-#define TXN_MASK MAKE_MASK(7, 0)
-
- rk_pcbe_ringbuf_t *ringbuf = pic->sampler.ring_buffer;
- uint32_t value, target;
- uint64_t *head, *tail;
- uint32_t sample_count = 0, sample_hit_count = 0;
- uint32_t size = pic->sampler.sample_size;
- uint8_t ds, evt;
- int ret;
-
- head = RINGBUF_GET_HEAD(ringbuf);
- tail = RINGBUF_GET_TAIL(ringbuf);
-
- if (head == tail) {
- DBG_PRINT(("CPU-%d: HEAD eq TAIL to start with\n",
- CPU->cpu_id));
- }
-
- /* Consume samples */
- while (head != tail) {
- uint64_t rawvalue = *head;
- DBG_PRINT(("CPU-%d: rawvalue=0x%lX\n", CPU->cpu_id, rawvalue));
- target = TYPE(pic->sampler.syn_counter);
-
- switch (GROUP(pic->sampler.syn_counter)) {
- case L2_GROUP_DS:
- value = (rawvalue >> DS_SHIFT) & DS_MASK;
- DBG_PRINT(("CPU-%d: value=0x%X, target=0x%X\n",
- CPU->cpu_id, value, target));
- switch (target) {
- case DS_DRAM: /* FALLTHROUGH */
- case DS_L3: /* FALLTHROUGH */
- case DS_OTHER_L2: /* FALLTHROUGH */
- if (value == target)
- sample_hit_count++;
- break;
- }
- break;
- case L2_GROUP_TXN_MISS:
- value = (rawvalue >> TXN_SHIFT) & TXN_MASK;
- ds = (uint8_t)((rawvalue >> DS_SHIFT) & DS_MASK);
- evt = (uint8_t)((rawvalue >> EVT_SHIFT) & EVT_MASK);
- DBG_PRINT(("CPU-%d: value=0x%X, target=0x%X, "
- " ds: 0x%X, evt: 0x%X\n", CPU->cpu_id, value,
- target, ds, evt));
- if (((value & target) != 0) && (evt == EVT_L2_MISS ||
- evt == EVT_L2_PRIOR_MISS) && (ds != DS_LOCAL_L2))
- sample_hit_count++;
- break;
- case L2_GROUP_TXN_HIT:
- value = (rawvalue >> TXN_SHIFT) & TXN_MASK;
- ds = (uint8_t)((rawvalue >> DS_SHIFT) & DS_MASK);
- evt = (uint8_t)((rawvalue >> EVT_SHIFT) & EVT_MASK);
- DBG_PRINT(("CPU-%d: value=0x%X, target=0x%X, "
- " ds: 0x%X, evt: 0x%X\n", CPU->cpu_id, value,
- target, ds, evt));
- if (((value & target) != 0) && (evt == EVT_L2_PEND_ST ||
- evt == EVT_L2_NOEVENTS) && (ds == DS_LOCAL_L2))
- sample_hit_count++;
- break;
- case L2_GROUP_EVT:
- evt = (rawvalue >> EVT_SHIFT) & EVT_MASK;
- ds = (uint8_t)((rawvalue >> DS_SHIFT) & DS_MASK);
- DBG_PRINT(("CPU-%d: evt=0x%X, target=0x%X, "
- "ds: 0x%X\n", CPU->cpu_id, evt, target, ds));
-
- switch (target) {
- case L2_HIT:
- if ((evt == EVT_L2_NOEVENTS || evt ==
- EVT_L2_PEND_ST) && ds == DS_LOCAL_L2)
- sample_hit_count++;
- break;
- case L2_MISS:
- if ((evt == EVT_L2_MISS || evt ==
- EVT_L2_PRIOR_MISS) && ds == DS_LOCAL_L2)
- sample_hit_count++;
- break;
- }
- }
- sample_count++;
- RINGBUF_MOVE_HEAD(ringbuf, head, size);
- }
- RINGBUF_SET_HEAD(ringbuf, head);
-
- ret = synthesize_sample_count(pic, sample_count, sample_hit_count,
- "sample_l2_sampler", diffp);
-
- return (ret);
-}
-
-static int
-program_instr_sampler(rk_pcbe_config_t *pic)
-{
-#define ASI_PERF_IS_PC_MASK 0x10
-#define ASI_PERF_IS_PC_MATCH 0x18
-#define ASI_PERF_IS_CC_LATENCY_MASK 0x160
-#define ASI_PERF_IS_CONTEXT_FILTER 0x168
-#define ASI_PERF_IS_INFO_MASK 0x170
-#define ASI_PERF_IS_INFO_MATCH 0x178
-
-#define ASI_PERF_IS_CONTEXT 0x108
-#define ASI_PERF_IS_INFO 0x148
-
-#define IS_BHR_LATENCY_CLAT_MASK 0xFFF
-#define IS_CC_FILTER_TGTF_MASK 0x10
-#define IS_CC_FILTER_TOF_MASK 0x8
-#define IS_CC_LATENCY_FREQ_SHIFT 22
-
-
- program_sampler_data_t sdata;
- int i = 0;
-
- (void) strcpy(sdata.name, "program_instr_sampler");
- /*
- * If (((Reported Value ^ MATCH) & MASK) == 0) then sample is taken;
- */
- sdata.asi_config[i].va = ASI_PERF_IS_PC_MASK;
- sdata.asi_config[i].value = 0;
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_IS_PC_MATCH;
- sdata.asi_config[i].value = 0;
- i++;
-
- /*
- * Set CLAT_MASK to 0xFFF, meaning, drop instruction samples
- * whose latency is zero, means, sample all of them, because
- * all instructions has at least a latency of 1 cycle.
- */
- sdata.asi_config[i].va = ASI_PERF_IS_CONTEXT_FILTER;
- sdata.asi_config[i].value = (uint64_t)(IS_CC_FILTER_TGTF_MASK |
- IS_CC_FILTER_TOF_MASK | pic->sampler.flags);
- i++;
-
- /*
- * Even though frequency is set when started, it has to be
- * specified here, because, if left zero, then a PET is
- * immediately generated since the candidate counter is zero.
- */
- sdata.asi_config[i].va = ASI_PERF_IS_CC_LATENCY_MASK;
- sdata.asi_config[i].value = ((((uint64_t)pic->sampler.frequency) <<
- IS_CC_LATENCY_FREQ_SHIFT) | IS_BHR_LATENCY_CLAT_MASK);
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_IS_INFO_MASK;
- sdata.asi_config[i].value = 0;
- i++;
-
- sdata.asi_config[i].va = ASI_PERF_IS_INFO_MATCH;
- sdata.asi_config[i].value = 0;
-
- sdata.asi_config_num = i + 1;
-
- sdata.asi_sample[0] = ASI_PERF_IS_INFO;
- sdata.asi_sample[1] = ASI_PERF_IS_CONTEXT;
- sdata.asi_sample_num = 2;
-
- return (program_a_sampler(pic, &sdata));
-}
-
-static int
-sample_instr_sampler(rk_pcbe_config_t *pic, int64_t *diffp)
-{
-#define I_MODE_SHIFT 34
-#define I_TYPE_SHIFT 0
-#define I_EVT_SHIFT 7
-#define I_MODE_MASK MAKE_MASK(3, 0)
-#define I_TYPE_MASK MAKE_MASK(7, 0)
-#define I_EVT_MASK MAKE_MASK(12, 0)
-
- rk_pcbe_ringbuf_t *ringbuf = pic->sampler.ring_buffer;
- uint32_t size = pic->sampler.sample_size;
- uint32_t value, target, shift, mask;
- uint32_t sample_count = 0, sample_hit_count = 0;
- uint64_t *head, *tail;
- int ret;
-
- switch (GROUP(pic->sampler.syn_counter)) {
- case I_GROUP_MODE:
- mask = I_MODE_MASK;
- shift = I_MODE_SHIFT;
- break;
- case I_GROUP_TYPE:
- mask = I_TYPE_MASK;
- shift = I_TYPE_SHIFT;
- break;
- case I_GROUP_EVT:
- mask = I_EVT_MASK;
- shift = I_EVT_SHIFT;
- break;
- default:
- PRINT_PIC(pic, "No I_GROUP found");
- ASSERT(0);
- break;
- }
-
- head = RINGBUF_GET_HEAD(ringbuf);
- tail = RINGBUF_GET_TAIL(ringbuf);
-
- if (head == tail) {
- DBG_PRINT(("CPU-%d: HEAD eq TAIL to start with\n",
- CPU->cpu_id));
- }
-
- /* Consume samples */
- while (head != tail) {
- /*
- * Data returned will be in the same order as the asi_list
- * passed to hypervisor during hv_rk_perf_sample_start call.
- */
- uint64_t rawvalue = *head;
- uint64_t context = *(head + 1);
- uint8_t tl = (uint8_t)((context >> 2) & 7);
- int drop_sample = B_FALSE;
-
- if (rawvalue != 0) {
- value = (rawvalue >> shift) & mask;
- target = TYPE(pic->sampler.syn_counter);
- DBG_PRINT(("CPU-%d: rawvalue=0x%lX, value=0x%X,"
- "target=0x%X\n", CPU->cpu_id, rawvalue, value,
- target));
-
- /*
- * Several EVT fields are only valid for certain
- * instruction types. Need to check TYP field
- * before trusting what's in EVT.
- */
- if (GROUP(pic->sampler.syn_counter) == I_GROUP_EVT) {
- uint64_t type = rawvalue >> I_TYPE_SHIFT;
-
- switch (target) {
- case EVT_DC_MISS:
- case EVT_PRIOR_MISS:
- case EVT_LDB_FULL:
- case EVT_BYPASS_RAW:
- case EVT_NONBYPASS_RAW:
- if ((type & TYPE_LD) == 0)
- drop_sample = B_TRUE;
- break;
- case EVT_STB_FULL:
- if ((type & TYPE_ST) == 0)
- drop_sample = B_TRUE;
- break;
- case EVT_DTLB_MISS:
- if ((type & (TYPE_LD|TYPE_ST)) == 0)
- drop_sample = B_TRUE;
- break;
- case EVT_CORRECT_BP:
- case EVT_CTI_TAKEN:
- if ((type & TYPE_CTI) == 0)
- drop_sample = B_TRUE;
- break;
- }
- DBG_PRINT(("CPU-%d: rawvalue=%lX, cleaned value"
- "=%X, target=%X\n", CPU->cpu_id, rawvalue,
- value, target));
- }
-
- /*
- * If user does not want to count instructions in scout
- * mode, and if the instruction sampled was in scout
- * mode, drop the sample.
- */
- if (pic->sampler.nohws == B_TRUE) {
- uint64_t mode = (rawvalue >> I_MODE_SHIFT) &
- I_MODE_MASK;
- if (mode == MODE_HWS)
- drop_sample = B_TRUE;
- }
-
- /*
- * If user wants to count instructions at a particular
- * trap level (0 or >0), and the samples are in
- * different trap level, drop the sample.
- */
- switch (pic->sampler.tl) {
- case TLZ: /* Sample ONLY instr at TL == 0 */
- if (tl != 0)
- drop_sample = B_TRUE;
- break;
- case TLNZ: /* Sample ONLY instr at TL > 0 */
- if (tl == 0)
- drop_sample = B_TRUE;
- break;
- }
-
- switch (GROUP(pic->sampler.syn_counter)) {
- case I_GROUP_MODE:
- /* Fields that are integers */
- if (value == target && drop_sample == B_FALSE)
- sample_hit_count++;
- break;
- case I_GROUP_EVT:
- case I_GROUP_TYPE:
- /* Fields that are bit vectors */
- if (value & target && drop_sample == B_FALSE)
- sample_hit_count++;
- break;
- default:
- ASSERT(0); /* missing case statement */
- }
- }
- sample_count++;
- RINGBUF_MOVE_HEAD(ringbuf, head, size);
- }
- RINGBUF_SET_HEAD(ringbuf, head);
-
- ret = synthesize_sample_count(pic, sample_count, sample_hit_count,
- "sample_instr_sampler", diffp);
-
- return (ret);
-}
-
-/*
- * mccdesr counters are synthetic counters. Hypervisor maintains
- * a 64 bit memory based counter. Therefore we can assume that
- * this counter never overflows.
- */
-static int
-sample_mccdesr(rk_pcbe_config_t *pic, int64_t *diffp)
-{
- uint64_t rc = 0;
- uint64_t counter_value;
- rc = hv_rk_perf_count_get((uint64_t)(pic->counter |
- pic->src_type), &counter_value);
- if (rc == H_EOK) {
- counter_value &= COUNTER_MASK(pic);
- *diffp = counter_value - pic->pcbe_pic;
- pic->pcbe_pic = counter_value;
- if (*diffp < 0) {
- cmn_err(CE_WARN, "CPU-%d: Pic-%d, counter: %X overflow",
- CPU->cpu_id, pic->pcbe_picno, pic->counter);
- }
- } else {
- cmn_err(CE_WARN, "CPU-%d: Failed to sample pic-%d, counter-%X",
- CPU->cpu_id, pic->pcbe_picno, pic->counter);
- }
- return ((int)rc);
-}
-
-static int
-program_a_sampler(rk_pcbe_config_t *pic, program_sampler_data_t *sdata)
-{
- uint64_t ringbuf_pa, asi_list_pa, counter, rc;
- int hv_call_cnt = 1, ret = 0, need_init = 0, i;
- uint64_t temp_pcbe_pic = 0;
-
- counter = (uint64_t)(pic->counter | pic->src_type);
-
- if (pic->sampler.ring_buffer == NULL) {
- pic->sampler.sample_size = sdata->asi_sample_num *
- sizeof (uint64_t);
- rc = alloc_ringbuffer(pic, pic->sampler.sample_size,
- num_ringbuf_entries);
- if (rc != 0)
- return ((int)rc);
- need_init = 1;
- PRINT_PIC(pic, "After Configuration (S)");
- }
-
- if (need_init || pic->state == STATE_RELEASED) {
- ringbuf_pa = va_to_pa(pic->sampler.ring_buffer);
- rc = hv_rk_perf_sample_init(counter, ringbuf_pa);
- print_hv_error(rc, &hv_call_cnt, sdata->name, pic);
- if (rc != H_EOK)
- return ((int)rc);
- }
-
- /*
- * If (((Reported Value ^ MATCH) & MASK) == 0) then sample is taken;
- */
- for (i = 0; i < sdata->asi_config_num; i++) {
- rc = hv_rk_perf_sample_config(counter, sdata->asi_config[i].va,
- sdata->asi_config[i].value);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, sdata->name, pic);
- }
-
- /*
- * pic->pcbe_pic is used to hold preset value in case of synthetic
- * counters
- */
- if (pic->pcbe_pic > 0) {
- temp_pcbe_pic = pic->pcbe_pic;
- pic->pcbe_pic = 0;
- }
- ret |= rk_pcbe_program_normal(pic); /* Reset to zero & start counting */
- pic->pcbe_pic = temp_pcbe_pic;
-
- /*
- * Start sampling
- *
- * Data returned in the ringbuffer by the hypervisor will be in the
- * same order as it is programmed
- */
- asi_list_pa = va_to_pa(sdata->asi_sample);
- rc = hv_rk_perf_sample_start(counter, pic->sampler.frequency,
- sdata->asi_sample_num * sizeof (uint64_t), asi_list_pa);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, sdata->name, pic);
- return (ret);
-}
-
-static int
-synthesize_sample_count(rk_pcbe_config_t *pic, uint64_t sample_count,
- uint64_t sample_hit_count, char *name, int64_t *diffp)
-{
-
- uint64_t total_count, rc, ovf_count, hit_count = 0;
- int hv_call_cnt = 1, ret = 0;
- /*
- * Since ring buffer is consumed, clear pending sample count.
- * Sample count is discarded, therefore reusing a variable.
- */
- rc = hv_rk_perf_sample_pending((uint64_t)(pic->counter |
- pic->src_type), &total_count);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, name, pic);
-
- /* Check if the counter overflowed */
- rc = hv_rk_perf_count_overflow((uint64_t)(pic->counter |
- pic->src_type), &ovf_count);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, name, pic);
-
- if (rc != H_EOK)
- ovf_count = 0;
-
- rc = hv_rk_perf_count_get((uint64_t)(pic->counter |
- pic->src_type), &total_count);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, name, pic);
-
- if (rc != H_EOK)
- total_count = 0;
-
- total_count &= COUNTER_MASK(pic);
-
- /*
- * Reset it to zero so that we need not maintain old value
- */
- rc = hv_rk_perf_count_set((uint64_t)(pic->counter | pic->src_type), 0);
- ret |= (int)rc;
- print_hv_error(rc, &hv_call_cnt, name, pic);
-
- /*
- * ovf_count > 0 means, counter has hit max, ovf_count times
- * before counting total_count of instructions. Therefore
- * add total_count to ovf_count times max count value.
- */
- if (ovf_count)
- total_count += (ovf_count * (0x1ULL << pic->counter_bits));
-
- if (sample_count > 0)
- hit_count = (sample_hit_count * total_count) / sample_count;
-
- *diffp = (int64_t)hit_count;
- DBG_PRINT(("CPU-%d: sample_instr_load. hit_count: %lu, *diffp: %ld\n",
- CPU->cpu_id, hit_count, *diffp));
- if (*diffp < 0) {
- cmn_err(CE_WARN, "CPU-%d Negative instr count. hit_count: %lu, "
- "*diffp: %ld\n", CPU->cpu_id, hit_count, *diffp);
- }
-
- if (pic->pcbe_pic) {
- *diffp += pic->pcbe_pic; /* Add the preset value */
- /*
- * pic->pcbe_pic is used to hold preset value in case of synthetic
- * counters
- */
- pic->pcbe_pic = 0;
- }
- return (ret);
-}
-
-static int
-alloc_ringbuffer(rk_pcbe_config_t *pic, uint32_t size,
- uint32_t num_samples)
-{
- uint32_t ringbuf_size;
- uint32_t asize = 2;
- rk_pcbe_ringbuf_t *ringbuf;
- ASSERT(!(num_samples & 1)); /* Assert number of samples is even */
-
- ringbuf_size = sizeof (rk_pcbe_ringbuf_t) + (size * num_samples);
-
- /* Size should be a power of 2 */
- while ((ringbuf_size & (asize - 1)) != ringbuf_size)
- asize <<= 1;
-
- ringbuf = contig_mem_alloc_align_sleep(asize, 0);
- if (ringbuf == NULL) {
- cmn_err(CE_WARN, "CPU-%d: Ringbuffer memory allocation failed!",
- CPU->cpu_id);
- return (-1);
- }
- pic->sampler.ring_buffer = ringbuf;
- ringbuf->head = NULL;
- ringbuf->tail = NULL;
- ringbuf->size = size * num_samples;
- ringbuf->hwm = ringbuf->size >> 1;
- return (0);
-}
-
-static void
-free_ringbuffer(rk_pcbe_config_t *pic)
-{
- rk_pcbe_ringbuf_t *ringbuf = pic->sampler.ring_buffer;
- /*
- * When multiple pics are used and one of the pics was not configurable
- * (eg: Bad attribute), then cpc calls rk_pcbe_free for the pics that
- * were already configured. This results in calling this routine with
- * NULL ringbuf, since ringbuf is allocated when the first sample is
- * taken. To protect against this condition, we need do the following
- * check before calling contig_mem_free since it uses ringbuf->size.
- */
- if (ringbuf) {
- uint32_t ringbuf_size;
- uint32_t asize = 2;
- DBG_PRINT(("CPU-%d: free_ringbuffer freeing %d bytes\n",
- CPU->cpu_id,
- (int)(sizeof (rk_pcbe_ringbuf_t) + ringbuf->size)));
- ringbuf_size = sizeof (rk_pcbe_ringbuf_t) + ringbuf->size;
- while ((ringbuf_size & (asize - 1)) != ringbuf_size)
- asize <<= 1;
- contig_mem_free(ringbuf, asize);
- }
-}
-
-static void
-print_hv_error(uint64_t rc, int *cntp, char *funcname, rk_pcbe_config_t *pic)
-{
- ASSERT(cntp && pic);
- if (rc != H_EOK) {
- cmn_err(CE_WARN, "{%d} pgm-hw call-%d in %s returned 0x%lX for "
- "pic %d cntr %X", CPU->cpu_id, *cntp, funcname, rc,
- pic->pcbe_picno, pic->counter);
- }
- (*cntp)++;
-}
-
-static void
-set_string_constants(void)
-{
- if (strncmp(cpu_module_name, "SUNW,", 5) == 0)
- rock_name = &cpu_module_name[5];
- else
- rock_name = cpu_module_name;
- (void) strcpy(rock_cpuref, "See the \"");
- (void) strcat(rock_cpuref, rock_name);
- (void) strcat(rock_cpuref, " User's Manual\" for descriptions of "
- "these events. "CPU_REF_URL);
- (void) strcat(pcbe_module_name, cpu_module_name);
-}
-
-static uint64_t
-bitmask(uint8_t bits)
-{
- if (bits < 64)
- return ((1ULL << bits) - 1);
- return (-1);
-}
-
-#ifdef RKPCBE_DBG
-static void
-set_pic_name(rk_pcbe_config_t *pic)
-{
- uint32_t bits;
- const struct nametable *n;
-
- /*
- * For normal instruction counter, the 'bits' value is not saved.
- */
- if (pic->counter_type == NORMAL_COUNTER) {
- if (pic->counter == RK_PERF_INSTR) {
- (void) strcpy(pic->name, "Instr_All");
- return;
- }
- bits = pic->flags;
- }
- else
- bits = pic->sampler.syn_counter;
-
- for (n = events[pic->pcbe_picno]; n->bits != NT_END; n++) {
- if (n->bits == bits) {
- (void) strcpy(pic->name, n->name);
- break;
- }
- }
-}
-
-static void
-print_pic(rk_pcbe_config_t *pic, char *heading)
-{
- ASSERT(pic);
- /*
- * On multi strand system, the print gets clobberd. Therefore
- * grab a lock so that the output is legible.
- */
- mutex_enter(&print_pic_lock);
- printf("{CPU-%d} %s:\n", CPU->cpu_id, heading);
- printf("pic addr : %p\n", (void *)pic);
- printf("name : %s\n", pic->name);
- printf("pcbe_picno : %d\n", pic->pcbe_picno);
- printf("counter_bits : 0x%X\n", pic->counter_bits);
- printf("counter_type : 0x%X\n", pic->counter_type);
- printf("toe : %d\n", pic->toe);
- printf("counter : 0x%X\n", pic->counter);
- printf("src_type : 0x%X\n", pic->src_type);
- printf("flags : 0x%X\n", pic->flags);
- printf("pcbe_pic : %ld\n", pic->pcbe_pic);
- printf("inuse : %d\n", pic->inuse);
- printf("state : 0x%X\n", pic->state);
- printf("cpu : %d\n", pic->cpu);
- if (pic->counter_type == SYNTHETIC_COUNTER) {
- printf("Synthetic counter:\n");
- printf("\tsyn_pic: 0x%X\n", (int)pic->sampler.synthetic_pic);
- printf("\tfreq : %d\n", pic->sampler.frequency);
- printf("\tsyn_cnt: 0x%X\n", pic->sampler.syn_counter);
- printf("\tsize : %d bytes\n", pic->sampler.sample_size);
- printf("\tflags : 0x%X\n", pic->sampler.flags);
- printf("\ttl : 0x%X\n", pic->sampler.tl);
- printf("\tnohws : 0x%X\n", pic->sampler.nohws);
- printf("\trbuf : 0x%p\n", (void *)pic->sampler.ring_buffer);
- if (pic->sampler.ring_buffer) {
- rk_pcbe_ringbuf_t *rb = pic->sampler.ring_buffer;
- printf("\tRingbuffer:\n");
- printf("\t\tHead: 0x%X\n", rb->head);
- printf("\t\tTail: 0x%X\n", rb->tail);
- printf("\t\tSize: 0x%X\n", rb->size);
- printf("\t\tHwm : 0x%X\n", rb->hwm);
- }
- }
- printf("-----------------\n");
- mutex_exit(&print_pic_lock);
-}
-#endif
diff --git a/usr/src/uts/sun4v/rock/Makefile b/usr/src/uts/sun4v/rock/Makefile
deleted file mode 100644
index 9a361ed6f6..0000000000
--- a/usr/src/uts/sun4v/rock/Makefile
+++ /dev/null
@@ -1,113 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# This makefile drives the production of the UltraSPARC-AT10 cpu module.
-#
-# sun4v implementation architecture dependent
-#
-
-#
-# Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE = ../..
-
-#
-# Define the module and object file sets.
-#
-MODULE = SUNW,UltraSPARC-AT10
-OBJECTS = $(ROCKCPU_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(ROCKCPU_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_PSM_CPU_DIR)/$(MODULE)
-
-CPU_DIR = .
-HERE = ../rock
-
-#
-# Include common rules.
-#
-include $(UTSBASE)/sun4v/Makefile.sun4v
-
-#
-# Override defaults
-#
-CLEANFILES += $(CPULIB) $(SYM_MOD)
-
-#
-# Define targets
-#
-ALL_TARGET = $(SYM_MOD)
-LINT_TARGET = $(MODULE).lint
-INSTALL_TARGET = def $(BINARY) $(ROOTMODULE)
-
-#
-# The ATOMIC_BO_ENABLE_SHIFT enables backoff in atomic routines.
-# ATOMIC_SIMPLE_BO_ENABLE enables simple backoff required for rock
-#
-ATOMIC_BO_FLAG = -DATOMIC_BO_ENABLE_SHIFT=14 -DATOMIC_SIMPLE_BO_ENABLE
-
-#
-# lint pass one enforcement
-#
-CFLAGS += $(CCVERBOSE) $(ATOMIC_BO_FLAG)
-
-#
-# cpu-module-specific flags
-#
-CPPFLAGS += -DCPU_MODULE $(ATOMIC_BO_FLAG)
-AS_CPPFLAGS += -DCPU_MODULE -DCUSTOM_FPZERO $(ATOMIC_BO_FLAG)
-LINTFLAGS += -DCUSTOM_FPZERO
-
-#
-# Default build targets.
-#
-.KEEP_STATE:
-
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
-
-clobber: $(CLOBBER_DEPS)
-
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
-install: $(INSTALL_DEPS)
-
-$(CPULIB): $(BINARY)
- $(LD) -o $(CPULIB) -G $(BINARY)
-
-$(SYM_MOD): $(UNIX_O) $(CPULIB)
- @echo "resolving symbols against unix.o"
- @(cd $(UNIX_DIR); pwd; \
- CPU_DIR=$(HERE) SYM_MOD=$(HERE)/$(SYM_MOD) $(MAKE) symcheck)
-
-# Include common targets.
-#
-include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/rock_pcbe/Makefile b/usr/src/uts/sun4v/rock_pcbe/Makefile
deleted file mode 100644
index 1fa4cb9470..0000000000
--- a/usr/src/uts/sun4v/rock_pcbe/Makefile
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# This Makefile builds the Rock Performance Counter BackEnd (PCBE).
-#
-
-UTSBASE = ../..
-
-#
-# Define module and object file sets.
-#
-MODULE = pcbe.SUNW,UltraSPARC-AT10
-OBJECTS = $(RK_PCBE_OBJS:%=$(OBJS_DIR)/%)
-LINTS = $(RK_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
-
-#
-# Include common rules.
-#
-include $(UTSBASE)/sun4v/Makefile.sun4v
-
-#
-# Define targets.
-#
-ALL_TARGET = $(BINARY)
-LINT_MODULE = rock_pcbe
-LINT_TARGET = $(LINT_MODULE).lint
-INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
-EXTRA_OPTIONS += -URKPCBE_DBG
-
-#
-# Default build targets.
-#
-.KEEP_STATE:
-
-def: $(DEF_DEPS)
-
-all: $(ALL_DEPS)
-
-clean: $(CLEAN_DEPS)
-
-clobber: $(CLOBBER_DEPS)
-
-lint: $(LINT_DEPS)
-
-modlintlib: $(MODLINTLIB_DEPS)
-
-clean.lint: $(CLEAN_LINT_DEPS)
-
-install: $(INSTALL_DEPS)
-
-#
-# Include common targets.
-#
-include $(UTSBASE)/sun4v/Makefile.targ
diff --git a/usr/src/uts/sun4v/sys/error.h b/usr/src/uts/sun4v/sys/error.h
index 0b0579a0da..9679b9d338 100644
--- a/usr/src/uts/sun4v/sys/error.h
+++ b/usr/src/uts/sun4v/sys/error.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -118,7 +118,7 @@ typedef struct {
uint64_t stick; /* Value of the %STICK register */
uint32_t desc; /* Error Descriptor */
uint32_t attr; /* error attributes bit field */
- uint64_t addr; /* va for ERRH_ATTR_ASI, otherwise ra */
+ uint64_t ra; /* Real address */
uint32_t sz; /* Size of affected mem region */
uint16_t cpuid; /* Virtual ID of the affected CPU */
uint16_t secs; /* Seconds */
diff --git a/usr/src/uts/sun4v/sys/hsvc.h b/usr/src/uts/sun4v/sys/hsvc.h
index 0814a648c5..c0cae0cf70 100644
--- a/usr/src/uts/sun4v/sys/hsvc.h
+++ b/usr/src/uts/sun4v/sys/hsvc.h
@@ -42,7 +42,6 @@ extern "C" {
#define HSVC_GROUP_CORE 0x0001
#define HSVC_GROUP_INTR 0x0002
#define HSVC_GROUP_SOFT_STATE 0x0003
-#define HSVC_GROUP_MEM_IFLUSH 0x0010
#define HSVC_GROUP_TM 0x0080
#define HSVC_GROUP_VPCI 0x0100
#define HSVC_GROUP_LDC 0x0101
@@ -54,9 +53,6 @@ extern "C" {
#define HSVC_GROUP_NIAGARA2_CPU 0x0202
#define HSVC_GROUP_NIU 0x0204
#define HSVC_GROUP_VFALLS_CPU 0x0205
-#define HSVC_GROUP_RKPERF 0x0206
-#define HSVC_GROUP_RKMMU_EXT 0x0207
-#define HSVC_GROUP_RKCPU 0x0208
#define HSVC_GROUP_DIAG 0x0300
#ifndef _ASM
@@ -82,8 +78,6 @@ typedef struct hsvc_info hsvc_info_t;
*/
#define HSVC_REV_1 1
-extern int hsvc_kdi_mem_iflush_negotiated;
-
/*
* External interface
*/
diff --git a/usr/src/uts/sun4v/sys/hypervisor_api.h b/usr/src/uts/sun4v/sys/hypervisor_api.h
index db53e1f989..2ad96e6c9f 100644
--- a/usr/src/uts/sun4v/sys/hypervisor_api.h
+++ b/usr/src/uts/sun4v/sys/hypervisor_api.h
@@ -110,8 +110,6 @@ extern "C" {
#define HV_MEM_SCRUB 0x31
#define HV_MEM_SYNC 0x32
-#define HV_MEM_IFLUSH 0x33
-#define HV_MEM_IFLUSH_ALL 0x34
#define HV_INTR_SEND 0x42
@@ -199,12 +197,6 @@ extern "C" {
#define MAP_DTLB 0x1
#define MAP_ITLB 0x2
-/*
- * Definitions for TLB Search Order functions
- */
-#define TLB_SO_DATA 0x1
-#define TLB_SO_INS 0x2
-#define TLB_SO_ID TLB_SO_DATA | TLB_SO_INS
/*
* Interrupt state manipulation definitions.
@@ -325,7 +317,6 @@ struct mmu_stat {
*/
#define HVIO_DMA_SYNC_DIR_TO_DEV 0x01
#define HVIO_DMA_SYNC_DIR_FROM_DEV 0x02
-#define HVIO_DMA_SYNC_DIR_NO_ICACHE_FLUSH 0x04
/*
* LDC Channel States
@@ -360,9 +351,6 @@ extern uint64_t hv_mem_scrub(uint64_t real_addr, uint64_t length,
uint64_t *scrubbed_len);
extern uint64_t hv_mem_sync(uint64_t real_addr, uint64_t length,
uint64_t *flushed_len);
-extern uint64_t hv_mem_iflush(uint64_t real_addr, uint64_t length,
- uint64_t *flushed_len);
-extern uint64_t hv_mem_iflush_all(void);
extern uint64_t hv_tm_enable(uint64_t enable);
extern uint64_t hv_service_recv(uint64_t s_id, uint64_t buf_pa,
diff --git a/usr/src/uts/sun4v/sys/machcpuvar.h b/usr/src/uts/sun4v/sys/machcpuvar.h
index 13f0093c02..28cbd84ff5 100644
--- a/usr/src/uts/sun4v/sys/machcpuvar.h
+++ b/usr/src/uts/sun4v/sys/machcpuvar.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -181,7 +181,6 @@ struct machcpu {
id_t cpu_core; /* cpu core id */
id_t cpu_chip; /* cpu chip id */
kthread_t *startup_thread;
- uint64_t cpu_nre_error; /* nonresumable error */
};
typedef struct machcpu machcpu_t;
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index a896722bbf..832a634d0f 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -249,7 +249,6 @@ extern uint64_t cbe_level14_inum;
extern void *contig_mem_alloc(size_t);
extern void *contig_mem_alloc_align(size_t, size_t);
extern void contig_mem_free(void *, size_t);
-extern void *contig_mem_alloc_align_sleep(size_t, size_t);
/*
* Caches
diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h
index e1c6390449..f329e39ab9 100644
--- a/usr/src/uts/sun4v/sys/mmu.h
+++ b/usr/src/uts/sun4v/sys/mmu.h
@@ -156,18 +156,6 @@ extern "C" {
#define MIN_NSHCONTEXTS 1
#define MIN_NTSBS 4
-/*
- * The number of shared contexts supported in search list entries for the
- * pagesize register.
- */
-#define NSEARCH_SHCONTEXTS 1
-
-/*
- * The maximum number of entries allowed in a search list for the pagesize
- * register.
- */
-#define MAX_PGSZ_SEARCH_ORDER 8
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/sun4v/sys/pte.h b/usr/src/uts/sun4v/sys/pte.h
index 702883f8e5..6e1ee349d0 100644
--- a/usr/src/uts/sun4v/sys/pte.h
+++ b/usr/src/uts/sun4v/sys/pte.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -61,7 +61,7 @@ typedef union {
unsigned int w:1; /* <6> write perm */
unsigned int ref:1; /* <5> sw - ref */
unsigned int wr_perm:1; /* <4> sw - write perm */
- unsigned int xsoft:1; /* <3> sw - soft execute */
+ unsigned int rsvd:1; /* <3> reserved */
unsigned int sz:3; /* <2:0> pagesize */
} tte_bit;
struct {
@@ -83,7 +83,6 @@ typedef union {
#define tte_no_sync tte_bit.no_sync
#define tte_suspend tte_bit.susp
#define tte_exec_perm tte_bit.x
-#define tte_soft_exec tte_bit.xsoft
#define tte_lock tte_bit.lock
#define tte_cp tte_bit.cp
#define tte_cv tte_bit.cv
@@ -163,7 +162,6 @@ typedef union {
#define TTE_HWWR_INT 0x00000040
#define TTE_REF_INT 0x00000020
#define TTE_WRPRM_INT 0x00000010
-#define TTE_SOFTEXEC_INT 0x00000008
#define TTE_PROT_INT (TTE_WRPRM_INT | TTE_PRIV_INT)
@@ -245,7 +243,6 @@ typedef union {
#define TTE_IS_8K(ttep) (TTE_CSZ(ttep) == TTE8K)
#define TTE_IS_WRITABLE(ttep) ((ttep)->tte_wr_perm)
#define TTE_IS_EXECUTABLE(ttep) ((ttep)->tte_exec_perm)
-#define TTE_IS_SOFTEXEC(ttep) ((ttep)->tte_soft_exec)
#define TTE_IS_PRIVILEGED(ttep) ((ttep)->tte_priv)
#define TTE_IS_NOSYNC(ttep) ((ttep)->tte_no_sync)
#define TTE_IS_LOCKED(ttep) ((ttep)->tte_lock)
@@ -275,8 +272,6 @@ typedef union {
#define TTE_CLR_WRT(ttep) ((ttep)->tte_wr_perm = 0)
#define TTE_SET_EXEC(ttep) ((ttep)->tte_exec_perm = 1)
#define TTE_CLR_EXEC(ttep) ((ttep)->tte_exec_perm = 0)
-#define TTE_SET_SOFTEXEC(ttep) ((ttep)->tte_soft_exec = 1)
-#define TTE_CLR_SOFTEXEC(ttep) ((ttep)->tte_soft_exec = 0)
#define TTE_SET_PRIV(ttep) ((ttep)->tte_priv = 1)
#define TTE_CLR_PRIV(ttep) ((ttep)->tte_priv = 0)
diff --git a/usr/src/uts/sun4v/sys/rock_hypervisor_api.h b/usr/src/uts/sun4v/sys/rock_hypervisor_api.h
deleted file mode 100644
index cc9ee171b8..0000000000
--- a/usr/src/uts/sun4v/sys/rock_hypervisor_api.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ROCK_HYPERVISOR_API_H
-#define _SYS_ROCK_HYPERVISOR_API_H
-
-/*
- * sun4v rock Hypervisor API
- *
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Function numbers for managing the Rock TLB page size register.
- */
-#define MMU_GET_NONPRIV_SEARCH 0x13b
-#define MMU_SET_NONPRIV_SEARCH 0x13c
-#define MMU_GET_PRIV_SEARCH 0x13d
-#define MMU_SET_PRIV_SEARCH 0x13e
-
-/*
- * Function numbers for performance counters
- */
-#define HV_RK_PERF_COUNT_INIT 0x108
-#define HV_RK_PERF_COUNT_RELEASE 0x109
-#define HV_RK_PERF_COUNT_SET 0x10A
-#define HV_RK_PERF_COUNT_GET 0x10B
-#define HV_RK_PERF_COUNT_START 0x10C
-#define HV_RK_PERF_COUNT_OVERFLOW 0x10D
-#define HV_RK_PERF_COUNT_STOP 0x10E
-
-#define HV_RK_PERF_SAMPLE_INIT 0x135
-#define HV_RK_PERF_SAMPLE_RELEASE 0x136
-#define HV_RK_PERF_SAMPLE_CONFIG 0x137
-#define HV_RK_PERF_SAMPLE_START 0x138
-#define HV_RK_PERF_SAMPLE_PENDING 0x139
-#define HV_RK_PERF_SAMPLE_STOP 0x13A
-
-#define HV_RK_PERF_SRC_STRAND 0x1 /* Local Strand */
-#define HV_RK_PERF_SRC_STRAND_M 0x2 /* Multiple Strands */
-#define HV_RK_PERF_SRC_SIU 0x4 /* L2 txn source */
-#define HV_RK_PERF_SRC_MMU 0x8 /* L2 txn source */
-#define HV_RK_PERF_SRC_MASK 0xF
-
-#define ROCK_HSVC_MAJOR 1
-#define ROCK_HSVC_MINOR 0
-
-#ifndef _ASM
-
-/* Performance Counter API */
-extern uint64_t hv_rk_perf_count_init(uint64_t counter);
-extern uint64_t hv_rk_perf_count_release(uint64_t counter);
-extern uint64_t hv_rk_perf_count_set(uint64_t counter, uint64_t value);
-extern uint64_t hv_rk_perf_count_get(uint64_t counter, uint64_t *value);
-extern uint64_t hv_rk_perf_count_start(uint64_t counter, uint64_t value);
-extern uint64_t hv_rk_perf_count_overflow(uint64_t counter, uint64_t *ovf_cnt);
-extern uint64_t hv_rk_perf_count_stop(uint64_t counter);
-
-/* Performance Sampler API */
-extern uint64_t hv_rk_perf_sample_init(uint64_t sampler, uint64_t ringbuf_pa);
-extern uint64_t hv_rk_perf_sample_release(uint64_t sampler);
-extern uint64_t hv_rk_perf_sample_config(uint64_t sampler, uint64_t reg_va,
- uint64_t reg_value);
-extern uint64_t hv_rk_perf_sample_start(uint64_t sampler, uint64_t freq,
- uint64_t list_size, uint64_t valist_pa);
-extern uint64_t hv_rk_perf_sample_pending(uint64_t sampler, uint64_t *pend_cnt);
-extern uint64_t hv_rk_perf_sample_stop(uint64_t counter);
-#endif /* _ASM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ROCK_HYPERVISOR_API_H */
diff --git a/usr/src/uts/sun4v/sys/rockasi.h b/usr/src/uts/sun4v/sys/rockasi.h
deleted file mode 100644
index b97926ba30..0000000000
--- a/usr/src/uts/sun4v/sys/rockasi.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_ROCKASI_H
-#define _SYS_ROCKASI_H
-
-/*
- * alternate address space identifiers
- *
- * 0x00 - 0x2F are privileged
- * 0x30 - 0x7f are hyperprivileged
- * 0x80 - 0xFF can be used by non-privileged, privileged & hyperprivileged
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * ROCK specific ASIs
- */
-#define ASI_CACHE_SPARING_P 0xF4 /* Cache sparing */
-
-#ifndef _ASM
-struct cpsregs {
- uint64_t fails;
- uint64_t exog;
- uint64_t coh;
- uint64_t tcc;
- uint64_t instr;
- uint64_t precise;
- uint64_t async;
- uint64_t size;
- uint64_t ld;
- uint64_t st;
- uint64_t cti;
- uint64_t fp;
- uint64_t zeros;
-};
-#endif /* _ASM */
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ROCKASI_H */
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.c b/usr/src/uts/sun4v/vm/mach_sfmmu.c
index c2574694a1..398d0a069b 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.c
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.c
@@ -45,7 +45,6 @@
#include <sys/vmsystm.h>
#include <sys/bitmap.h>
#include <vm/rm.h>
-#include <vm/vm_dep.h>
#include <sys/t_lock.h>
#include <sys/vm_machparam.h>
#include <sys/promif.h>
@@ -60,7 +59,6 @@
#include <sys/reboot.h>
#include <sys/kdi.h>
#include <sys/hypervisor_api.h>
-#include <sys/hsvc.h>
/*
* External routines and data structures
@@ -169,7 +167,7 @@ sfmmu_remap_kernel(void)
prom_panic("can't find kernel text pfn");
pfn &= TTE_PFNMASK(TTE4M);
- attr = PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC;
+ attr = PROC_TEXT | HAT_NOSYNC;
flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD;
sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M);
/*
@@ -185,7 +183,7 @@ sfmmu_remap_kernel(void)
prom_panic("can't find kernel data pfn");
pfn &= TTE_PFNMASK(TTE4M);
- attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC;
+ attr = PROC_DATA | HAT_NOSYNC;
sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M);
/*
* We set the lock bit in the tte to lock the translation in
@@ -210,7 +208,7 @@ sfmmu_remap_kernel(void)
ASSERT(tsbsz >= MMU_PAGESIZE4M);
ASSERT(IS_P2ALIGNED(tsbsz, tsbsz));
ASSERT(IS_P2ALIGNED(va, tsbsz));
- attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC;
+ attr = PROC_DATA | HAT_NOSYNC;
while (tsbsz != 0) {
ASSERT(i < MAX_BIGKTSB_TTES);
pfn = va_to_pfn(va);
@@ -294,8 +292,7 @@ kdi_tlb_page_lock(caddr_t va, int do_dtlb)
pfn_t pfn = va_to_pfn(va);
uint64_t ret;
- sfmmu_memtte(&tte, pfn, PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC,
- TTE8K);
+ sfmmu_memtte(&tte, pfn, (PROC_TEXT | HAT_NOSYNC), TTE8K);
ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte,
MAP_ITLB | (do_dtlb ? MAP_DTLB : 0));
@@ -481,22 +478,3 @@ void
sfmmu_cache_flushall()
{
}
-
-/*
- * Initialise the real address field in sfmmu_pgsz_order.
- */
-void
-sfmmu_init_pgsz_hv(sfmmu_t *sfmmup)
-{
- int i;
-
- /*
- * Initialize mmu counts for pagesize register programming.
- */
- for (i = 0; i < max_mmu_page_sizes; i++) {
- sfmmup->sfmmu_mmuttecnt[i] = 0;
- }
-
- sfmmup->sfmmu_pgsz_order.hv_pgsz_order_pa =
- va_to_pa(&sfmmup->sfmmu_pgsz_order.hv_pgsz_order);
-}
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu.h b/usr/src/uts/sun4v/vm/mach_sfmmu.h
index f9fbb7ce74..7812c73ac6 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu.h
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu.h
@@ -36,7 +36,6 @@
#include <sys/x_call.h>
#include <sys/hypervisor_api.h>
-#include <sys/mmu.h>
#ifdef __cplusplus
extern "C" {
@@ -61,29 +60,8 @@ struct hv_tsb_block {
hv_tsb_info_t hv_tsb_info[NHV_TSB_INFO]; /* hypervisor TSB info */
};
-/*
- * Defines for hypervisor pagesize search API.
- */
-
-#define TLB_PGSZ_ENABLE_SHIFT 15
-#define TLB_PGSZ_CTX_SHIFT 7
-#define TLB_PGSZ_ENABLE (1<<TLB_PGSZ_ENABLE_SHIFT)
-#define TLB_PGSZ_CONTEXT1 (1<<TLB_PGSZ_CTX_SHIFT)
-#define TLB_PGSZ_CONTEXT1_ENABLE (TLB_PGSZ_ENABLE|TLB_PGSZ_CONTEXT1)
-
-struct hv_pgsz_order {
- uint64_t hv_pgsz_order_pa; /* hypervisor pagesize order PA */
- /* hypervisor pagesize order */
- uint16_t hv_pgsz_order[MAX_PGSZ_SEARCH_ORDER];
-};
-
-#define sfmmu_pgsz_order_hv sfmmu_pgsz_order.hv_pgsz_order
-
#endif /* _ASM */
-/* value for sfmmu_pgsz_map if all shared pagesizes are allowed */
-#define TLB_ALL_SHARED_PGSZ 0xff
-
#ifdef _ASM
/*
@@ -333,47 +311,6 @@ struct hv_pgsz_order {
label/**/1:
/*
- * Support for non-coherent I$.
- *
- * In sun4v we use tte bit 3 as a software flag indicating whether
- * execute permission is given. IMMU miss traps cause the real execute
- * permission to be set. sfmmu_ttesync() will see if execute permission
- * has been set, and then set P_EXEC in page_t. This causes I-cache
- * flush when the page is freed.
- *
- * However, the hypervisor reserves bit 3 as part of a 4-bit page size.
- * We allow this flag to be set in hme TTE, but never in TSB or TLB.
- */
-#define TTE_CLR_SOFTEXEC_ML(tte) bclr TTE_SOFTEXEC_INT, tte
-#define TTE_CHK_SOFTEXEC_ML(tte) andcc tte, TTE_SOFTEXEC_INT, %g0
-
-/*
- * TTE_SET_EXEC_ML is a macro that updates the exec bit if it is
- * not already set. Will also set reference bit at the same time.
- *
- * Caller must check EXECPRM. Do not call if it is already set in the tte.
- *
- * Parameters:
- * tte = reg containing tte
- * ttepa = physical pointer to tte
- * tmp1 = tmp reg
- * label = temporary label
- */
-
-#define TTE_SET_EXEC_ML(tte, ttepa, tmp1, label) \
- /* BEGIN CSTYLED */ \
- /* update execprm bit */ \
-label/**/1: \
- or tte, (TTE_EXECPRM_INT | TTE_REF_INT), tmp1; \
- casxa [ttepa]ASI_MEM, tte, tmp1; /* update bits */ \
- cmp tte, tmp1; \
- bne,a,pn %xcc, label/**/1; \
- mov tmp1, tte; \
- or tte, (TTE_EXECPRM_INT | TTE_REF_INT), tte; \
- /* END CSTYLED */
-
-
-/*
* TTE_SET_REF_ML is a macro that updates the reference bit if it is
* not already set.
*
@@ -597,27 +534,6 @@ label/**/1: \
label:
/* END CSTYLED */
-/*
- * For shared context mappings, check against the page size bitmap in the
- * tsbmiss area to decide if we should use private mappings instead to reduce
- * the number of shared page size searches on Rock based platforms.
- * In:
- * tsbarea (not clobbered)
- * tte (not clobbered)
- * tmp (clobbered)
- * Out:
- * use_shctx - changed to 0 if page size bit is not set in mask.
- */
-#define CHECK_SHARED_PGSZ(tsbarea, tte, tmp, use_shctx, label) \
- /* BEGIN CSTYLED */ \
- brz use_shctx, label/**/1 ;\
- and tte, TTE_SZ_BITS, tmp ;\
- ldub [tsbarea + TSBMISS_PGSZ_BITMAP], use_shctx ;\
- srlx use_shctx, tmp, use_shctx ;\
- and use_shctx, 0x1, use_shctx ;\
-label/**/1:
- /* END CSTYLED */
-
#endif /* _ASM */
#ifdef __cplusplus
diff --git a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
index aa69647bab..b0ee085586 100644
--- a/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
+++ b/usr/src/uts/sun4v/vm/mach_sfmmu_asm.s
@@ -41,7 +41,6 @@
#include <sys/pte.h>
#include <sys/mmu.h>
#include <vm/hat_sfmmu.h>
-#include <vm/mach_sfmmu.h>
#include <vm/seg_spt.h>
#include <sys/machparam.h>
#include <sys/privregs.h>
@@ -50,7 +49,6 @@
#include <sys/machthread.h>
#include <sys/clock.h>
#include <sys/trapstat.h>
-#include <sys/rock_hypervisor_api.h>
/*
* sfmmu related subroutines
@@ -79,7 +77,8 @@ sfmmu_setctx_sec(uint_t ctx)
/* ARGSUSED */
void
sfmmu_load_mmustate(sfmmu_t *sfmmup)
-{}
+{
+}
#else /* lint */
@@ -282,7 +281,7 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
sethi %hi(ksfmmup), %o3
ldx [%o3 + %lo(ksfmmup)], %o3
cmp %o3, %o0
- be,pn %xcc, 8f ! if kernel as, do nothing
+ be,pn %xcc, 7f ! if kernel as, do nothing
nop
set MMU_SCONTEXT, %o3
@@ -340,7 +339,7 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
ldx [%g2 + SCD_SFMMUP], %g3 ! %g3 = scdp->scd_sfmmup
ldx [%g3 + SFMMU_TSB], %o1 ! %o1 = first scd tsbinfo
- brz,pn %o1, 1f
+ brz,pn %o1, 9f
nop ! panic if no third TSB
/* make 3rd UTSBREG */
@@ -383,26 +382,9 @@ sfmmu_load_mmustate(sfmmu_t *sfmmup)
mov MMU_TSB_CTXNON0, %o5
ta FAST_TRAP ! set TSB info for user process
brnz,a,pn %o0, panic_bad_hcall
- mov MMU_TSB_CTXNON0, %o1
- mov %o3, %o0 ! restore saved sfmmup to %o0
+ mov MMU_TSB_CTXNON0, %o1
+ mov %o3, %o0 ! restore %o0
6:
- /*
- * If the TLB pagesize register is supported and pgsz_search_on is set
- * then we patch out the following branch instruction.
- */
- .global sfmmu_pgsz_load_mmustate_patch
-sfmmu_pgsz_load_mmustate_patch:
- ba,a 7f ! branch around pgsz search hcall
- mov %o0, %o3 ! preserve sfmmup in %o3
- ldx [%o3 + SFMMU_PGSZ_ORDER + HV_PGSZ_ORDER_PA], %o0
- mov TLB_SO_ID, %o1 ! flags apply to I and D
- mov MMU_SET_NONPRIV_SEARCH, %o5
- ta FAST_TRAP ! set page size search order
- brnz,a,pn %o0, panic_bad_hcall
- mov MMU_SET_NONPRIV_SEARCH, %o1
- mov %o3, %o0 ! restore saved sfmmup to %o0
-7:
- mov %o1, %o5 ! preserve pgsz_search_on
ldx [%o0 + SFMMU_ISMBLKPA], %o1 ! copy members of sfmmu
CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area
stx %o1, [%o2 + TSBMISS_ISMBLKPA] ! sfmmu_tsb_miss into the
@@ -413,7 +395,7 @@ sfmmu_pgsz_load_mmustate_patch:
stub %o3, [%o2 + TSBMISS_UTTEFLAGS]
stub %o4, [%o2 + TSBMISS_URTTEFLAGS]
stx %o1, [%o2 + TSBMISS_SHARED_UHATID]
- brz,pn %o1, 8f ! check for sfmmu_srdp
+ brz,pn %o1, 7f ! check for sfmmu_srdp
add %o0, SFMMU_HMERMAP, %o1
add %o2, TSBMISS_SHMERMAP, %o2
mov SFMMU_HMERGNMAP_WORDS, %o3
@@ -423,38 +405,31 @@ sfmmu_pgsz_load_mmustate_patch:
ldx [%o0 + SFMMU_SCDP], %o4 ! %o4 = sfmmu_scd
CPU_TSBMISS_AREA(%o2, %o3) ! %o2 = tsbmiss area
mov SFMMU_HMERGNMAP_WORDS, %o3
- brnz,pt %o4, 9f ! check for sfmmu_scdp else
- nop
- add %o2, TSBMISS_SCDSHMERMAP, %o2 ! zero tsbmiss scd_shmermap
+ brnz,pt %o4, 8f ! check for sfmmu_scdp else
+ add %o2, TSBMISS_SCDSHMERMAP, %o2 ! zero tsbmiss scd_shmermap
ZERO_REGION_MAP(%o2, %o3, zero_scd_mmustate)
-8:
+7:
retl
nop
-9:
- brz,a %o5, 0f ! test pgsz_search_on
- or %g0, TLB_ALL_SHARED_PGSZ, %o1 ! enable all page sizes
- ldub [%o0 + SFMMU_PGSZ_MAP], %o1
-0:
- stub %o1, [%o2 + TSBMISS_PGSZ_BITMAP] ! set tsbmiss pgsz bitmap
- add %o2, TSBMISS_SCDSHMERMAP, %o2 ! set tsbmiss scd_shmermap
- add %o4, SCD_HMERMAP, %o1
+8: ! set tsbmiss scd_shmermap
+ add %o4, SCD_HMERMAP, %o1
SET_REGION_MAP(%o1, %o2, %o3, %o4, load_scd_mmustate)
-
retl
nop
-1:
+9:
sethi %hi(panicstr), %g1 ! panic if no 3rd TSB
ldx [%g1 + %lo(panicstr)], %g1
tst %g1
- bnz,pn %xcc, 8b
+ bnz,pn %xcc, 7b
nop
sethi %hi(sfmmu_panic10), %o0
call panic
or %o0, %lo(sfmmu_panic10), %o0
- SET_SIZE(sfmmu_load_mmustate)
+ SET_SIZE(sfmmu_load_mmustate)
+
#endif /* lint */
#if defined(lint)
diff --git a/usr/src/uts/sun4v/vm/mach_vm_dep.c b/usr/src/uts/sun4v/vm/mach_vm_dep.c
index d34908b0f5..a8368677bb 100644
--- a/usr/src/uts/sun4v/vm/mach_vm_dep.c
+++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c
@@ -52,7 +52,6 @@
#include <sys/stack.h>
#include <sys/atomic.h>
#include <sys/promif.h>
-#include <sys/hsvc.h>
uint_t page_colors = 0;
uint_t page_colors_mask = 0;
@@ -150,7 +149,6 @@ static vmem_t *contig_mem_slab_arena;
static vmem_t *contig_mem_arena;
static vmem_t *contig_mem_reloc_arena;
static kmutex_t contig_mem_lock;
-static kmutex_t contig_mem_sleep_lock;
#define CONTIG_MEM_ARENA_QUANTUM 64
#define CONTIG_MEM_SLAB_ARENA_QUANTUM MMU_PAGESIZE64K
@@ -617,15 +615,14 @@ contig_mem_alloc(size_t size)
}
/*
- * contig_mem_alloc_align_flag allocates real contiguous memory with the
+ * contig_mem_alloc_align allocates real contiguous memory with the
* specified alignment up to contig_mem_import_size_max. The alignment must
* be a power of 2 and no greater than contig_mem_import_size_max. We assert
* the aligment is a power of 2. For non-debug, vmem_xalloc will panic
* for non power of 2 alignments.
*/
-static void *
-contig_mem_alloc_align_flag(size_t size, size_t align, int flag,
- kmutex_t *lockp)
+void *
+contig_mem_alloc_align(size_t size, size_t align)
{
void *buf;
@@ -644,48 +641,27 @@ contig_mem_alloc_align_flag(size_t size, size_t align, int flag,
* allocations also prevents us from trying to allocate
* more spans than necessary.
*/
- mutex_enter(lockp);
+ mutex_enter(&contig_mem_lock);
buf = vmem_xalloc(contig_mem_arena, size, align, 0, 0,
- NULL, NULL, flag | VM_NORELOC);
+ NULL, NULL, VM_NOSLEEP | VM_NORELOC);
if ((buf == NULL) && (size <= MMU_PAGESIZE)) {
- mutex_exit(lockp);
+ mutex_exit(&contig_mem_lock);
return (vmem_xalloc(static_alloc_arena, size, align, 0, 0,
- NULL, NULL, flag));
+ NULL, NULL, VM_NOSLEEP));
}
if (buf == NULL) {
buf = vmem_xalloc(contig_mem_reloc_arena, size, align, 0, 0,
- NULL, NULL, flag);
+ NULL, NULL, VM_NOSLEEP);
}
- mutex_exit(lockp);
+ mutex_exit(&contig_mem_lock);
return (buf);
}
-void *
-contig_mem_alloc_align(size_t size, size_t align)
-{
- return (contig_mem_alloc_align_flag
- (size, align, VM_NOSLEEP, &contig_mem_lock));
-}
-
-/*
- * This function is provided for callers that need physically contiguous
- * allocations but can sleep. We use the contig_mem_sleep_lock so that we
- * don't interfere with contig_mem_alloc_align calls that should never sleep.
- * Similarly to contig_mem_alloc_align, we use a lock to prevent allocating
- * unnecessary spans when called in parallel.
- */
-void *
-contig_mem_alloc_align_sleep(size_t size, size_t align)
-{
- return (contig_mem_alloc_align_flag
- (size, align, VM_SLEEP, &contig_mem_sleep_lock));
-}
-
void
contig_mem_free(void *vaddr, size_t size)
{
@@ -709,7 +685,6 @@ void
contig_mem_init(void)
{
mutex_init(&contig_mem_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&contig_mem_sleep_lock, NULL, MUTEX_DEFAULT, NULL);
contig_mem_slab_arena = vmem_xcreate("contig_mem_slab_arena", NULL, 0,
CONTIG_MEM_SLAB_ARENA_QUANTUM, contig_vmem_xalloc_aligned_wrapper,
@@ -811,96 +786,3 @@ exec_get_spslew(void)
uint_t spcolor = atomic_inc_32_nv(&sp_current_color);
return ((size_t)((spcolor & sp_color_mask) * SA(sp_color_stride)));
}
-
-/*
- * This flag may be set via /etc/system to force the synchronization
- * of I-cache with memory after every bcopy. The default is 0, meaning
- * that there is no need for an I-cache flush after each bcopy. This
- * flag is relevant only on platforms that have non-coherent I-caches.
- */
-uint_t force_sync_icache_after_bcopy = 0;
-
-/*
- * This flag may be set via /etc/system to force the synchronization
- * of I-cache to memory after every DMA. The default is 0, meaning
- * that there is no need for an I-cache flush after each dma write to
- * memory. This flag is relevant only on platforms that have
- * non-coherent I-caches.
- */
-uint_t force_sync_icache_after_dma = 0;
-
-/*
- * This internal flag enables mach_sync_icache_pa, which is always
- * called from common code if it is defined. However, not all
- * platforms support the hv_mem_iflush firmware call.
- */
-static uint_t do_mach_sync_icache_pa = 0;
-
-int hsvc_kdi_mem_iflush_negotiated = B_FALSE;
-
-#define MEM_IFLUSH_MAJOR 1
-#define MEM_IFLUSH_MINOR 0
-static hsvc_info_t kdi_mem_iflush_hsvc = {
- HSVC_REV_1, /* HSVC rev num */
- NULL, /* Private */
- HSVC_GROUP_MEM_IFLUSH, /* Requested API Group */
- MEM_IFLUSH_MAJOR, /* Requested Major */
- MEM_IFLUSH_MINOR, /* Requested Minor */
- "kdi" /* Module name */
-};
-
-/*
- * Setup soft exec mode.
- * Since /etc/system is read later on init, it
- * may be used to override these flags.
- */
-void
-mach_setup_icache(uint_t coherency)
-{
- int status;
- uint64_t sup_minor;
-
- if (coherency == 0 && icache_is_coherent) {
- extern void kdi_flush_caches(void);
- status = hsvc_register(&kdi_mem_iflush_hsvc, &sup_minor);
- if (status != 0)
- cmn_err(CE_PANIC, "I$ flush not implemented on "
- "I$ incoherent system");
- hsvc_kdi_mem_iflush_negotiated = B_TRUE;
- kdi_flush_caches();
- icache_is_coherent = 0;
- do_mach_sync_icache_pa = 1;
- }
-}
-
-/*
- * Flush specified physical address range from I$ via hv_mem_iflush interface
- */
-/*ARGSUSED*/
-void
-mach_sync_icache_pa(caddr_t paddr, size_t size)
-{
- if (do_mach_sync_icache_pa) {
- uint64_t pa = (uint64_t)paddr;
- uint64_t sz = (uint64_t)size;
- uint64_t i, flushed;
-
- for (i = 0; i < sz; i += flushed) {
- if (hv_mem_iflush(pa + i, sz - i, &flushed) != H_EOK) {
- cmn_err(CE_PANIC, "Flushing the Icache failed");
- break;
- }
- }
- }
-}
-
-/*
- * Flush the page if it has been marked as executed
- */
-/*ARGSUSED*/
-void
-mach_sync_icache_pp(page_t *pp)
-{
- if (PP_ISEXEC(pp))
- mach_sync_icache_pa((caddr_t)ptob(pp->p_pagenum), PAGESIZE);
-}