# DP: Changes for the Linaro 4.8-2014.03 release. LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_8-branch@208264 \ svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@208576 \ | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/ --- a/src/libitm/ChangeLog.linaro +++ b/src/libitm/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libgomp/ChangeLog.linaro +++ b/src/libgomp/ChangeLog.linaro @@ -0,0 +1,59 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-22 Yvan Roux + + Backport from trunk r200521. + 2013-06-28 Marcus Shawcroft + + * testsuite/libgomp.fortran/strassen.f90: + Add dg-skip-if aarch64_tiny. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libgomp/testsuite/libgomp.fortran/strassen.f90 +++ b/src/libgomp/testsuite/libgomp.fortran/strassen.f90 @@ -1,4 +1,5 @@ ! { dg-options "-O2" } +! { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} } program strassen_matmul use omp_lib --- a/src/libquadmath/ChangeLog.linaro +++ b/src/libquadmath/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libsanitizer/sanitizer_common/sanitizer_linux.cc +++ b/src/libsanitizer/sanitizer_common/sanitizer_linux.cc @@ -410,7 +410,9 @@ CHECK_EQ(*current_++, ' '); while (IsDecimal(*current_)) current_++; - CHECK_EQ(*current_++, ' '); + // Qemu may lack the trailing space. + // http://code.google.com/p/address-sanitizer/issues/detail?id=160 + // CHECK_EQ(*current_++, ' '); // Skip spaces. while (current_ < next_line && *current_ == ' ') current_++; --- a/src/libsanitizer/ChangeLog.linaro +++ b/src/libsanitizer/ChangeLog.linaro @@ -0,0 +1,66 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-20 Christophe Lyon + + Backport from trunk r198683. + 2013-05-07 Christophe Lyon + + * configure.tgt: Add ARM pattern. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-06-04 Christophe Lyon + + Backport from trunk r199606. + 2013-06-03 Christophe Lyon + + * sanitizer_common/sanitizer_linux.cc (MemoryMappingLayout::Next): + Cherry pick upstream r182922. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libsanitizer/configure.tgt +++ b/src/libsanitizer/configure.tgt @@ -29,6 +29,8 @@ ;; sparc*-*-linux*) ;; + arm*-*-linux*) + ;; x86_64-*-darwin[1]* | i?86-*-darwin[1]*) TSAN_SUPPORTED=no ;; --- a/src/zlib/ChangeLog.linaro +++ b/src/zlib/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libstdc++-v3/ChangeLog.linaro +++ b/src/libstdc++-v3/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/configure.ac +++ b/src/configure.ac @@ -611,6 +611,8 @@ # Disable Java if libffi is not supported. case "${target}" in + aarch64-*-*) + ;; alpha*-*-*) ;; arm*-*-*) --- a/src/intl/ChangeLog.linaro +++ b/src/intl/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/ChangeLog.linaro +++ b/src/ChangeLog.linaro @@ -0,0 +1,59 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-12-06 Michael Collison + + Backport from trunk r197997 + 2013-04-16 Andreas Schwab + + * configure.ac (aarch64-*-*): Don't disable java. + * configure: Regenerate. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libmudflap/ChangeLog.linaro +++ b/src/libmudflap/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/boehm-gc/ChangeLog.linaro +++ b/src/boehm-gc/ChangeLog.linaro @@ -0,0 +1,64 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197770. + + 2013-03-16 Yvan Roux + + * include/private/gcconfig.h (AARCH64): New macro (defined only if + __aarch64__). + (mach_type_known): Update comment adding ARM AArch64 target. + (NOSYS, mach_type_known,CPP_WORDSZ, MACH_TYPE, ALIGNMENT, HBLKSIZE, + OS_TYPE, LINUX_STACKBOTTOM, USE_GENERIC_PUSH_REGS, DYNAMIC_LOADING, + DATASTART, DATAEND, STACKBOTTOM): Define for AArch64. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/boehm-gc/include/private/gcconfig.h +++ b/src/boehm-gc/include/private/gcconfig.h @@ -60,6 +60,13 @@ # endif /* Determine the machine type: */ +#if defined(__aarch64__) +# define AARCH64 +# if !defined(LINUX) +# define NOSYS +# define mach_type_known +# endif +# endif # if defined(__arm__) || defined(__thumb__) # define ARM32 # if !defined(LINUX) && !defined(NETBSD) @@ -239,6 +246,10 @@ # define IA64 # define mach_type_known # endif +# if defined(LINUX) && defined(__aarch64__) +# define AARCH64 +# define mach_type_known +# endif # if defined(LINUX) && defined(__arm__) # define ARM32 # define mach_type_known @@ -500,6 +511,7 @@ /* running Amdahl UTS4 */ /* S390 ==> 390-like machine */ /* running LINUX */ + /* AARCH64 ==> ARM AArch64 */ /* ARM32 ==> Intel StrongARM */ /* IA64 ==> Intel IPF */ /* (e.g. Itanium) */ @@ -1841,6 +1853,32 @@ # define HEURISTIC1 # endif +# ifdef AARCH64 +# define CPP_WORDSZ 64 +# define MACH_TYPE "AARCH64" +# define ALIGNMENT 8 +# ifndef HBLKSIZE +# define HBLKSIZE 4096 +# endif +# ifdef LINUX +# define OS_TYPE "LINUX" +# define LINUX_STACKBOTTOM +# define USE_GENERIC_PUSH_REGS +# define DYNAMIC_LOADING + extern int __data_start[]; +# define DATASTART ((ptr_t)__data_start) + extern char _end[]; +# define DATAEND ((ptr_t)(&_end)) +# endif +# ifdef NOSYS + /* __data_start is usually defined in the target linker script. */ + extern int __data_start[]; +# define DATASTART ((ptr_t)__data_start) + extern void *__stack_base__; +# define STACKBOTTOM ((ptr_t)__stack_base__) +# endif +# endif + # ifdef ARM32 # define CPP_WORDSZ 32 # define MACH_TYPE "ARM32" --- a/src/include/ChangeLog.linaro +++ b/src/include/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libiberty/ChangeLog.linaro +++ b/src/libiberty/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/lto-plugin/ChangeLog.linaro +++ b/src/lto-plugin/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/contrib/regression/ChangeLog.linaro +++ b/src/contrib/regression/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/contrib/config-list.mk +++ b/src/contrib/config-list.mk @@ -11,7 +11,8 @@ # nohup nice make -j25 -l36 -f ../gcc/contrib/config-list.mk > make.out 2>&1 & # # v850e1-elf is rejected by config.sub -LIST = alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \ +LIST = aarch64-elf aarch64-linux-gnu \ + alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \ alpha64-dec-vms alpha-dec-vms am33_2.0-linux \ arm-wrs-vxworks arm-netbsdelf \ arm-linux-androideabi arm-uclinux_eabi arm-eabi \ --- a/src/contrib/ChangeLog.linaro +++ b/src/contrib/ChangeLog.linaro @@ -0,0 +1,58 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198443. + 2013-04-22 Sofiane Naci + + * config-list.mk (LIST): Add aarch64-elf and aarch64-linux-gnu. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/contrib/reghunt/ChangeLog.linaro +++ b/src/contrib/reghunt/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libatomic/ChangeLog.linaro +++ b/src/libatomic/ChangeLog.linaro @@ -0,0 +1,59 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-12-06 Michael Collison + + Backport from trunk r203774 + 2013-10-17 Michael Hudson-Doyle + + * libatomic/configure.tgt (aarch64*): Remove code preventing + build. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libatomic/configure.tgt +++ b/src/libatomic/configure.tgt @@ -95,11 +95,6 @@ # Other system configury case "${target}" in - aarch64*) - # This is currently not supported in AArch64. - UNSUPPORTED=1 - ;; - arm*-*-linux*) # OS support for atomic primitives. config_path="${config_path} linux/arm posix" --- a/src/config/ChangeLog.linaro +++ b/src/config/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libbacktrace/ChangeLog.linaro +++ b/src/libbacktrace/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libjava/libltdl/ChangeLog.linaro +++ b/src/libjava/libltdl/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libjava/configure.host +++ b/src/libjava/configure.host @@ -81,6 +81,11 @@ # This case statement supports per-CPU defaults. case "${host}" in + aarch64*-linux*) + libgcj_interpreter=yes + sysdeps_dir=aarch64 + ATOMICSPEC=-fuse-atomic-builtins + ;; arm*-elf) with_libffi_default=no PROCESS=Ecos @@ -289,6 +294,12 @@ sysdeps_dir=i386 DIVIDESPEC=-f%{m32:no-}use-divide-subroutine ;; + aarch64*-linux* ) + slow_pthread_self=no + can_unwind_signal=no + CHECKREFSPEC=-fcheck-references + DIVIDESPEC=-fuse-divide-subroutine + ;; arm*-linux* ) slow_pthread_self=no can_unwind_signal=no --- a/src/libjava/ChangeLog.linaro +++ b/src/libjava/ChangeLog.linaro @@ -0,0 +1,59 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-12-06 Michael Collison + + Backport from trunk r197997 + 2013-04-16 Andreas Schwab + + * configure.host: Add support for aarch64. + * sysdep/aarch64/locks.h: New file. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libjava/classpath/ChangeLog.linaro +++ b/src/libjava/classpath/ChangeLog.linaro @@ -0,0 +1,58 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-12-06 Michael Collison + + Backport from trunk r197997 + 2013-04-16 Andreas Schwab + + * native/fdlibm/ieeefp.h: Add support for aarch64. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libjava/classpath/native/fdlibm/ieeefp.h +++ b/src/libjava/classpath/native/fdlibm/ieeefp.h @@ -4,6 +4,14 @@ #ifndef __IEEE_BIG_ENDIAN #ifndef __IEEE_LITTLE_ENDIAN +#ifdef __aarch64__ +#ifdef __AARCH64EB__ +#define __IEEE_BIG_ENDIAN +#else +#define __IEEE_LITTLE_ENDIAN +#endif +#endif + #ifdef __alpha__ #define __IEEE_LITTLE_ENDIAN #endif --- a/src/libjava/sysdep/aarch64/locks.h +++ b/src/libjava/sysdep/aarch64/locks.h @@ -0,0 +1,57 @@ +// locks.h - Thread synchronization primitives. AArch64 implementation. + +#ifndef __SYSDEP_LOCKS_H__ +#define __SYSDEP_LOCKS_H__ + +typedef size_t obj_addr_t; /* Integer type big enough for object */ + /* address. */ + +// Atomically replace *addr by new_val if it was initially equal to old. +// Return true if the comparison succeeded. +// Assumed to have acquire semantics, i.e. later memory operations +// cannot execute before the compare_and_swap finishes. +inline static bool +compare_and_swap(volatile obj_addr_t *addr, + obj_addr_t old, + obj_addr_t new_val) +{ + return __sync_bool_compare_and_swap(addr, old, new_val); +} + +// Set *addr to new_val with release semantics, i.e. making sure +// that prior loads and stores complete before this +// assignment. +inline static void +release_set(volatile obj_addr_t *addr, obj_addr_t new_val) +{ + __sync_synchronize(); + *addr = new_val; +} + +// Compare_and_swap with release semantics instead of acquire semantics. +// On many architecture, the operation makes both guarantees, so the +// implementation can be the same. +inline static bool +compare_and_swap_release(volatile obj_addr_t *addr, + obj_addr_t old, + obj_addr_t new_val) +{ + return __sync_bool_compare_and_swap(addr, old, new_val); +} + +// Ensure that subsequent instructions do not execute on stale +// data that was loaded from memory before the barrier. +inline static void +read_barrier() +{ + __sync_synchronize(); +} + +// Ensure that prior stores to memory are completed with respect to other +// processors. +inline static void +write_barrier() +{ + __sync_synchronize(); +} +#endif --- a/src/gnattools/ChangeLog.linaro +++ b/src/gnattools/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/maintainer-scripts/ChangeLog.linaro +++ b/src/maintainer-scripts/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/configure +++ b/src/configure @@ -3272,6 +3272,8 @@ # Disable Java if libffi is not supported. case "${target}" in + aarch64-*-*) + ;; alpha*-*-*) ;; arm*-*-*) --- a/src/libgcc/ChangeLog.linaro +++ b/src/libgcc/ChangeLog.linaro @@ -0,0 +1,61 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198090. + 2013-04-19 Yufeng Zhang + + * config/aarch64/sfp-machine.h (_FP_W_TYPE): Change to define + as 'unsigned long long' instead of 'unsigned long'. + (_FP_WS_TYPE): Change to define as 'signed long long' instead of + 'signed long'. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libgcc/config/aarch64/sfp-machine.h +++ b/src/libgcc/config/aarch64/sfp-machine.h @@ -24,8 +24,8 @@ . */ #define _FP_W_TYPE_SIZE 64 -#define _FP_W_TYPE unsigned long -#define _FP_WS_TYPE signed long +#define _FP_W_TYPE unsigned long long +#define _FP_WS_TYPE signed long long #define _FP_I_TYPE int typedef int TItype __attribute__ ((mode (TI))); --- a/src/libgcc/config/libbid/ChangeLog.linaro +++ b/src/libgcc/config/libbid/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libdecnumber/ChangeLog.linaro +++ b/src/libdecnumber/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/LINARO-VERSION +++ b/src/gcc/LINARO-VERSION @@ -0,0 +1 @@ +4.8-2014.03 --- a/src/gcc/targhooks.c +++ b/src/gcc/targhooks.c @@ -1042,20 +1042,17 @@ unsigned *cost = (unsigned *) data; unsigned retval = 0; - if (flag_vect_cost_model) - { - tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; - int stmt_cost = default_builtin_vectorization_cost (kind, vectype, + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign); - /* Statements in an inner loop relative to the loop being - vectorized are weighted more heavily. The value here is - arbitrary and could potentially be improved with analysis. */ - if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) - count *= 50; /* FIXME. */ + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ - retval = (unsigned) (count * stmt_cost); - cost[where] += retval; - } + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; return retval; } --- a/src/gcc/hooks.c +++ b/src/gcc/hooks.c @@ -147,6 +147,14 @@ return false; } +/* Generic hook that takes (gimple_stmt_iterator *) and returns + false. */ +bool +hook_bool_gsiptr_false (gimple_stmt_iterator *a ATTRIBUTE_UNUSED) +{ + return false; +} + /* Used for the TARGET_ASM_CAN_OUTPUT_MI_THUNK hook. */ bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree a ATTRIBUTE_UNUSED, --- a/src/gcc/hooks.h +++ b/src/gcc/hooks.h @@ -42,6 +42,7 @@ extern bool hook_bool_const_tree_false (const_tree); extern bool hook_bool_tree_true (tree); extern bool hook_bool_const_tree_true (const_tree); +extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *); extern bool hook_bool_const_tree_hwi_hwi_const_tree_false (const_tree, HOST_WIDE_INT, HOST_WIDE_INT, --- a/src/gcc/c-family/ChangeLog.linaro +++ b/src/gcc/c-family/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/java/ChangeLog.linaro +++ b/src/gcc/java/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/c/ChangeLog.linaro +++ b/src/gcc/c/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/target.def +++ b/src/gcc/target.def @@ -1289,7 +1289,8 @@ "", tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL) -/* Fold a target-specific builtin. */ +/* Fold a target-specific builtin to a tree valid for both GIMPLE + and GENERIC. */ DEFHOOK (fold_builtin, "", @@ -1296,6 +1297,16 @@ tree, (tree fndecl, int n_args, tree *argp, bool ignore), hook_tree_tree_int_treep_bool_null) +/* Fold a target-specific builtin to a valid GIMPLE tree. */ +DEFHOOK +(gimple_fold_builtin, + "Fold a call to a machine specific built-in function that was set up\n\ +by @samp{TARGET_INIT_BUILTINS}. @var{gsi} points to the gimple\n\ +statement holding the function call. Returns true if any change\n\ +was made to the GIMPLE stream.", + bool, (gimple_stmt_iterator *gsi), + hook_bool_gsiptr_false) + /* Target hook is used to compare the target attributes in two functions to determine which function's features get higher priority. This is used during function multi-versioning to figure out the order in which two --- a/src/gcc/incpath.c +++ b/src/gcc/incpath.c @@ -148,20 +148,22 @@ if (!filename_ncmp (p->fname, cpp_GCC_INCLUDE_DIR, len)) { char *str = concat (iprefix, p->fname + len, NULL); - if (p->multilib == 1 && imultilib) - str = reconcat (str, str, dir_separator_str, - imultilib, NULL); - else if (p->multilib == 2) + if (p->multilib && imultilib) + { + str = reconcat (str, str, dir_separator_str, + imultilib, NULL); + add_path (str, SYSTEM, p->cxx_aware, false); + } + else + add_path (str, SYSTEM, p->cxx_aware, false); + + if (p->multilib && imultiarch) { - if (!imultiarch) - { - free (str); - continue; - } + char *str = concat (iprefix, p->fname + len, NULL); str = reconcat (str, str, dir_separator_str, imultiarch, NULL); + add_path (str, SYSTEM, p->cxx_aware, false); } - add_path (str, SYSTEM, p->cxx_aware, false); } } } @@ -171,7 +173,7 @@ { if (!p->cplusplus || cxx_stdinc) { - char *str; + char *str, *str2; /* Should this directory start with the sysroot? */ if (sysroot && p->add_sysroot) @@ -215,19 +217,20 @@ else str = update_path (p->fname, p->component); - if (p->multilib == 1 && imultilib) - str = reconcat (str, str, dir_separator_str, imultilib, NULL); - else if (p->multilib == 2) + str2 = xstrdup(str); + if (p->multilib && imultilib) { - if (!imultiarch) - { - free (str); - continue; - } - str = reconcat (str, str, dir_separator_str, imultiarch, NULL); + str = reconcat (str, str, dir_separator_str, imultilib, NULL); + add_path (str, SYSTEM, p->cxx_aware, false); } + else + add_path (str, SYSTEM, p->cxx_aware, false); - add_path (str, SYSTEM, p->cxx_aware, false); + if (p->multilib && imultiarch) + { + str2 = reconcat (str2, str2, dir_separator_str, imultiarch, NULL); + add_path (str2, SYSTEM, p->cxx_aware, false); + } } } } --- a/src/gcc/rtlanal.c +++ b/src/gcc/rtlanal.c @@ -1199,6 +1199,10 @@ if (find_reg_note (insn, REG_EQUAL, NULL_RTX)) return 0; + /* Check the code to be executed for COND_EXEC. */ + if (GET_CODE (pat) == COND_EXEC) + pat = COND_EXEC_CODE (pat); + if (GET_CODE (pat) == SET && set_noop_p (pat)) return 1; --- a/src/gcc/configure +++ b/src/gcc/configure @@ -1658,7 +1658,8 @@ use sysroot as the system root during the build --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR --with-specs=SPECS add SPECS to driver command-line processing - --with-pkgversion=PKG Use PKG in the version string in place of "GCC" + --with-pkgversion=PKG Use PKG in the version string in place of "Linaro + GCC `cat $srcdir/LINARO-VERSION`" --with-bugurl=URL Direct users to URL to report a bug --with-multilib-list select multilibs (SH and x86-64 only) --with-gnu-ld assume the C compiler uses GNU ld default=no @@ -7327,7 +7328,7 @@ *) PKGVERSION="($withval) " ;; esac else - PKGVERSION="(GCC) " + PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " fi @@ -25984,8 +25985,9 @@ # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze | mips \ - | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 | xtensa) + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ + | mips | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 \ + | xtensa) insn="nop" ;; ia64 | s390) --- a/src/gcc/gcc.c +++ b/src/gcc/gcc.c @@ -2227,7 +2227,7 @@ } /* Now try the multiarch path. */ - if (!skip_multi_dir + if (!skip_multi_dir && !multi_dir && !pl->require_machine_suffix && multiarch_dir) { memcpy (path + len, multiarch_suffix, multiarch_len + 1); @@ -2263,6 +2263,16 @@ if (ret) break; } + + /* Now try the multiarch path. */ + if (!skip_multi_dir + && !pl->require_machine_suffix && multiarch_dir) + { + memcpy (path + len, multiarch_suffix, multiarch_len + 1); + ret = callback (path, callback_info); + if (ret) + break; + } } if (pl) break; @@ -7662,6 +7672,21 @@ ++p; } + if (first) + { + if (this_path_len > 3 + && this_path[0] == '.' + && this_path[1] == ':' + && this_path[2] == ':') + { + char *new_multiarch_dir = XNEWVEC (char, this_path_len + 1); + + strncpy (new_multiarch_dir, this_path, this_path_len); + new_multiarch_dir[this_path_len] = '\0'; + multiarch_dir = &new_multiarch_dir[3]; + } + } + if (ok && first) { if (this_path_len != 1 --- a/src/gcc/gensupport.c +++ b/src/gcc/gensupport.c @@ -1717,6 +1717,21 @@ XVECEXP (insn, 1, 0) = pattern; } + if (XVEC (ce_elem->data, 3) != NULL) + { + rtvec attributes = rtvec_alloc (XVECLEN (insn, 4) + + XVECLEN (ce_elem->data, 3)); + int i = 0; + int j = 0; + for (i = 0; i < XVECLEN (insn, 4); i++) + RTVEC_ELT (attributes, i) = XVECEXP (insn, 4, i); + + for (j = 0; j < XVECLEN (ce_elem->data, 3); j++, i++) + RTVEC_ELT (attributes, i) = XVECEXP (ce_elem->data, 3, j); + + XVEC (insn, 4) = attributes; + } + XSTR (insn, 2) = alter_test_for_insn (ce_elem, insn_elem); XTMPL (insn, 3) = alter_output_for_insn (ce_elem, insn_elem, alternatives, max_operand); --- a/src/gcc/fold-const.c +++ b/src/gcc/fold-const.c @@ -2474,9 +2474,13 @@ } if (TREE_CODE (arg0) != TREE_CODE (arg1) - /* This is needed for conversions and for COMPONENT_REF. - Might as well play it safe and always test this. */ - || TREE_CODE (TREE_TYPE (arg0)) == ERROR_MARK + /* NOP_EXPR and CONVERT_EXPR are considered equal. */ + && !(CONVERT_EXPR_P (arg0) && CONVERT_EXPR_P (arg1))) + return 0; + + /* This is needed for conversions and for COMPONENT_REF. + Might as well play it safe and always test this. */ + if (TREE_CODE (TREE_TYPE (arg0)) == ERROR_MARK || TREE_CODE (TREE_TYPE (arg1)) == ERROR_MARK || TYPE_MODE (TREE_TYPE (arg0)) != TYPE_MODE (TREE_TYPE (arg1))) return 0; --- a/src/gcc/objc/ChangeLog.linaro +++ b/src/gcc/objc/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/tree-ssa-uncprop.c +++ b/src/gcc/tree-ssa-uncprop.c @@ -466,12 +466,11 @@ struct equiv_hash_elt equiv_hash_elt; void **slot; - /* If the argument is not an invariant, and refers to the same - underlying variable as the PHI result, then there's no - point in un-propagating the argument. */ + /* If the argument is not an invariant and can be potentially + coalesced with the result, then there's no point in + un-propagating the argument. */ if (!is_gimple_min_invariant (arg) - && (SSA_NAME_VAR (arg) == SSA_NAME_VAR (res) - && TREE_TYPE (arg) == TREE_TYPE (res))) + && gimple_can_coalesce_p (arg, res)) continue; /* Lookup this argument's value in the hash table. */ @@ -485,7 +484,7 @@ int j; /* Walk every equivalence with the same value. If we find - one with the same underlying variable as the PHI result, + one that can potentially coalesce with the PHI rsult, then replace the value in the argument with its equivalent SSA_NAME. Use the most recent equivalence as hopefully that results in shortest lifetimes. */ @@ -493,8 +492,7 @@ { tree equiv = elt->equivalences[j]; - if (SSA_NAME_VAR (equiv) == SSA_NAME_VAR (res) - && TREE_TYPE (equiv) == TREE_TYPE (res)) + if (gimple_can_coalesce_p (equiv, res)) { SET_PHI_ARG_DEF (phi, e->dest_idx, equiv); break; --- a/src/gcc/ChangeLog.linaro +++ b/src/gcc/ChangeLog.linaro @@ -0,0 +1,3063 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + * LINARO-VERSION: Update. + +2014-02-13 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + * LINARO-VERSION: Update. + +2014-02-10 Michael Collison + + Backport from trunk r206518 + 2014-01-10 Kyrylo Tkachov + + * config/arm/arm.c (arm_init_iwmmxt_builtins): Skip + non-iwmmxt builtins. + +2014-02-10 Michael Collison + + Backport from trunk r206151 + 2013-12-20 Kyrylo Tkachov + + * config/arm/neon.ml (crypto_intrinsics): Add vceq_64 and vtst_p64. + * config/arm/arm_neon.h: Regenerate. + * config/arm/neon-docgen.ml: Add vceq_p64 and vtst_p64. + * doc/arm-neon-intrinsics.texi: Regenerate. + +2014-02-10 Michael Collison + + Backport from trunk r206149 + 2013-12-20 Kyrylo Tkachov + + * config/arm/arm_acle.h: Add underscores before variables. + +2014-02-10 Michael Collison + + Backport from trunk r206132 + 2013-12-19 Kyrylo Tkachov + + * config/arm/neon-docgen.ml: Add crypto intrinsics documentation. + * doc/arm-neon-intrinsics.texi: Regenerate. + +2014-02-10 Michael Collison + + Backport from trunk r206131 + 2013-12-19 Kyrylo Tkachov + + * config/arm/neon-testgen.ml (effective_target): Handle "CRYPTO". + +2014-02-10 Michael Collison + + Backport from trunk r206130 + 2013-12-19 Kyrylo Tkachov + + * config/arm/arm.c (enum arm_builtins): Add crypto builtins. + (arm_init_neon_builtins): Handle crypto builtins. + (bdesc_2arg): Likewise. + (bdesc_1arg): Likewise. + (bdesc_3arg): New table. + (arm_expand_ternop_builtin): New function. + (arm_expand_unop_builtin): Handle sha1h explicitly. + (arm_expand_builtin): Handle ternary builtins. + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): + Define __ARM_FEATURE_CRYPTO. + * config/arm/arm.md: Include crypto.md. + (is_neon_type): Add crypto types. + * config/arm/arm_neon_builtins.def: Add TImode reinterprets. + * config/arm/crypto.def: New. + * config/arm/crypto.md: Likewise. + * config/arm/iterators.md (CRYPTO_UNARY): New int iterator. + (CRYPTO_BINARY): Likewise. + (CRYPTO_TERNARY): Likewise. + (CRYPTO_SELECTING): Likewise. + (crypto_pattern): New int attribute. + (crypto_size_sfx): Likewise. + (crypto_mode): Likewise. + (crypto_type): Likewise. + * config/arm/neon-gen.ml: Handle poly64_t and poly128_t types. + Handle crypto intrinsics. + * config/arm/neon.ml: Add support for poly64 and polt128 types + and intrinsics. Define crypto intrinsics. + * config/arm/neon.md (neon_vreinterpretti): New pattern. + (neon_vreinterpretv16qi): Use VQXMOV mode iterator. + (neon_vreinterpretv8hi): Likewise. + (neon_vreinterpretv4si): Likewise. + (neon_vreinterpretv4sf): Likewise. + (neon_vreinterpretv2di): Likewise. + * config/arm/unspecs.md (UNSPEC_AESD, UNSPEC_AESE, UNSPEC_AESIMC, + UNSPEC_AESMC, UNSPEC_SHA1C, UNSPEC_SHA1M, UNSPEC_SHA1P, UNSPEC_SHA1H, + UNSPEC_SHA1SU0, UNSPEC_SHA1SU1, UNSPEC_SHA256H, UNSPEC_SHA256H2, + UNSPEC_SHA256SU0, UNSPEC_SHA256SU1, VMULLP64): Define. + * config/arm/arm_neon.h: Regenerate. + + Modifications needed to backport into linaro-4_8-branch: + * config/arm/arm.md (attribute neon_type): neon_crypto_aes, + neon_crypto_sha1_xor, neon_crypto_sha1_fast, + neon_crypto_sha1_slow, neon_crypto_sha256_fast, + neon_crypto_sha256_slow, neon_mul_d_long: New. + instead of: + * config/arm/arm.md: Include crypto.md. + (is_neon_type): Add crypto types. + + +2014-02-10 Michael Collison + + Backport from trunk r206128 + 2013-12-19 Kyrylo Tkachov + + * Makefile.in (TEXI_GCC_FILES): Add arm-acle-intrinsics.texi. + * config.gcc (extra_headers): Add arm_acle.h. + * config/arm/arm.c (FL_CRC32): Define. + (arm_have_crc): Likewise. + (arm_option_override): Set arm_have_crc. + (arm_builtins): Add CRC32 builtins. + (bdesc_2arg): Likewise. + (arm_init_crc32_builtins): New function. + (arm_init_builtins): Initialise CRC32 builtins. + (arm_file_start): Handle architecture extensions. + * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FEATURE_CRC32. + Define __ARM_32BIT_STATE. + (TARGET_CRC32): Define. + * config/arm/arm-arches.def: Add armv8-a+crc. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm.md (type): Add crc. + (): New insn. + * config/arm/arm_acle.h: New file. + * config/arm/iterators.md (CRC): New int iterator. + (crc_variant, crc_mode): New int attributes. + * confg/arm/unspecs.md (UNSPEC_CRC32B, UNSPEC_CRC32H, UNSPEC_CRC32W, + UNSPEC_CRC32CB, UNSPEC_CRC32CH, UNSPEC_CRC32CW): New unspecs. + * doc/invoke.texi: Document -march=armv8-a+crc option. + * doc/extend.texi: Document ACLE intrinsics. + +2014-02-10 Michael Collison + + Backport from trunk r206120 + 2013-12-19 Tejas Belagod + + * config/aarch64/aarch64-builtins.c (aarch64_init_simd_builtins): + Define builtin types for poly64_t poly128_t. + (TYPES_BINOPP, aarch64_types_binopp_qualifiers): New. + * aarch64/aarch64-simd-builtins.def: Update builtins table. + * config/aarch64/aarch64-simd.md (aarch64_crypto_pmulldi, + aarch64_crypto_pmullv2di): New. + * config/aarch64/aarch64.c (aarch64_simd_mangle_map): Update table for + poly64x2_t mangler. + * config/aarch64/arm_neon.h (poly64x2_t, poly64_t, poly128_t): Define. + (vmull_p64, vmull_high_p64): New. + * config/aarch64/iterators.md (UNSPEC_PMULL<2>): New. + +2014-02-10 Michael Collison + + Backport from trunk r206119 + 2013-12-19 Tejas Belagod + + * config/aarch64/aarch64-simd-builtins.def: Update builtins table. + * config/aarch64/aarch64-simd.md (aarch64_crypto_sha256hv4si, + aarch64_crypto_sha256su0v4si, aarch64_crypto_sha256su1v4si): New. + * config/aarch64/arm_neon.h (vsha256hq_u32, vsha256h2q_u32, + vsha256su0q_u32, vsha256su1q_u32): New. + * config/aarch64/iterators.md (UNSPEC_SHA256H<2>, UNSPEC_SHA256SU<01>): + New. + (CRYPTO_SHA256): New int iterator. + (sha256_op): New int attribute. + +2014-02-10 Michael Collison + + Backport from trunk r206118 + 2013-12-19 Tejas Belagod + + * config/aarch64/aarch64-simd-builtins.def: Update builtins table. + * config/aarch64/aarch64-builtins.c (aarch64_types_ternopu_qualifiers, + TYPES_TERNOPU): New. + * config/aarch64/aarch64-simd.md (aarch64_crypto_sha1hsi, + aarch64_crypto_sha1su1v4si, aarch64_crypto_sha1v4si, + aarch64_crypto_sha1su0v4si): New. + * config/aarch64/arm_neon.h (vsha1cq_u32, sha1mq_u32, vsha1pq_u32, + vsha1h_u32, vsha1su0q_u32, vsha1su1q_u32): New. + * config/aarch64/iterators.md (UNSPEC_SHA1, UNSPEC_SHA1SU<01>): + New. + (CRYPTO_SHA1): New int iterator. + (sha1_op): New int attribute. + +2014-02-10 Michael Collison + + Backport from trunk r206117 + 2013-12-19 Tejas Belagod + + * config/aarch64/aarch64-simd-builtins.def: Update builtins table. + * config/aarch64/aarch64-builtins.c (aarch64_types_binopu_qualifiers, + TYPES_BINOPU): New. + * config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi, + aarch64_crypto_aesv16qi): New. + * config/aarch64/arm_neon.h (vaeseq_u8, vaesdq_u8, vaesmcq_u8, + vaesimcq_u8): New. + * config/aarch64/iterators.md (UNSPEC_AESE, UNSPEC_AESD, UNSPEC_AESMC, + UNSPEC_AESIMC): New. + (CRYPTO_AES, CRYPTO_AESMC): New int iterators. + (aes_op, aesmc_op): New int attributes. + +2014-02-10 Michael Collison + + Backport from trunk r206115 + 2013-12-19 Tejas Belagod + + * config/arm/types.md (neon_mul_d_long, crypto_aes, crypto_sha1_xor, + crypto_sha1_fast, crypto_sha1_slow, crypto_sha256_fast, + crypto_sha256_slow): New. + + Modifications needed to backport into linaro-4_8-branch: + * config/aarch64/aarch64-simd.md (attribute simd_type): + (simd_mul_d_long, simd_crypto_aes, simd_crypto_sha1_xor, + simd_crypto_sha1_fast, simd_crypto_sha1_slow, simd_crypto_sha256_fast, + simd_crypto_sha256_slow) : New. + instead of the above change. + +2014-02-10 Michael Collison + + Backport from trunk r206114 + 2013-12-19 Tejas Belagod + + * config/aarch64/aarch64.h (TARGET_CRYPTO): New. + (__ARM_FEATURE_CRYPTO): Define if TARGET_CRYPTO is true. + +2014-02-10 Michael Collison + + Backport from trunk r205384. + 2013-11-26 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_type_qualifiers): Add qualifier_poly. + (aarch64_build_scalar_type): Also build Poly types. + (aarch64_build_vector_type): Likewise. + (aarch64_build_type): Likewise. + (aarch64_build_signed_type): New. + (aarch64_build_unsigned_type): Likewise. + (aarch64_build_poly_type): Likewise. + (aarch64_init_simd_builtins): Also handle Poly types. + +2014-02-10 Michael Collison + + Backport from trunk r205383. + 2013-11-26 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (VAR1): Use new naming scheme for aarch64_builtins. + (aarch64_builtin_vectorized_function): Use new + aarch64_builtins names. + +2014-02-10 Michael Collison + + Backport from trunk r205092. + 2013-11-20 James Greenhalgh + + * gcc/config/aarch64/aarch64-builtins.c + (aarch64_simd_itype): Remove. + (aarch64_simd_builtin_datum): Remove itype, add + qualifiers pointer. + (VAR1): Use qualifiers. + (aarch64_build_scalar_type): New. + (aarch64_build_vector_type): Likewise. + (aarch64_build_type): Likewise. + (aarch64_init_simd_builtins): Refactor, remove special cases, + consolidate main loop. + (aarch64_simd_expand_args): Likewise. + +2014-02-01 Christophe Lyon + + Backport from trunk r202875,202980. + 2013-09-24 Xinliang David Li + + * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): + Check max peel iterations parameter. + * param.def: New parameter. + * doc/invoke.texi: Document New parameter. + + 2013-09-27 Xinliang David Li + + * opts.c (finish_options): Adjust parameters + according to vect cost model. + (common_handle_option): Set dynamic vect cost + model for FDO. + targhooks.c (default_add_stmt_cost): Compute stmt cost + unconditionally. + * tree-vect-loop.c (vect_estimate_min_profitable_iters): + Use helper function. + * tree-vectorizer.h (unlimited_cost_model): New function. + * tree-vect-slp.c (vect_slp_analyze_bb_1): Use helper function. + * tree-vect-data-refs.c (vect_peeling_hash_insert): Use helper + function. + (vect_enhance_data_refs_alignment): Ditto. + * flag-types.h: New enum. + * common/config/i386/i386-common.c (ix86_option_init_struct): + No need to initialize vect_cost_model flag. + * config/i386/i386.c (ix86_add_stmt_cost): Compute stmt cost + unconditionally. + +2014-01-21 Zhenqiang Chen + + Backport from trunk r200103 + 2013-06-15 Jeff Law + + * gimple.h (gimple_can_coalesce_p): Prototype. + * tree-ssa-coalesce.c (gimple_can_coalesce_p): New function. + (create_outofssa_var_map, coalesce_partitions): Use it. + * tree-ssa-uncprop.c (uncprop_into_successor_phis): Similarly. + * tree-ssa-live.c (var_map_base_init): Use TYPE_CANONICAL + if it's available. + +2014-01-21 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + * LINARO-VERSION: Update. + +2014-01-16 Zhenqiang Chen + + Linaro local patch for armv4t multilib support. + * gcc/config/arm/t-mlibs: New file. + * config.gcc: Add t-mlibs. + * incpath.c (add_standard_paths): Try multilib path first. + * gcc.c (for_each_path): Likewise. + +2013-12-21 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + * LINARO-VERSION: Update. + +2013-12-06 Christophe Lyon + + Backport from trunk r204737. + 2013-11-13 Christophe Lyon + + * config/aarch64/aarch64.h (FRAME_GROWS_DOWNWARD): Define to 1. + * config/aarch64/aarch64.c (aarch64_initial_elimination_offset): + Update offset calculations. + +2013-12-06 Christophe Lyon + + Backport from trunk r203327. + 2013-10-09 Zhenqiang Chen + + * tree-ssa-phiopts.c (rhs_is_fed_for_value_replacement): New function. + (operand_equal_for_value_replacement): New function, extracted from + value_replacement and enhanced to catch more cases. + (value_replacement): Use operand_equal_for_value_replacement. + +2013-11-18 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + * LINARO-VERSION: Update. + +2013-11-06 Christophe Lyon + + Revert backport from trunk r197526. + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (negdi_extendsidi): New pattern. + (negdi_zero_extendsidi): Likewise. + +2013-11-05 Zhenqiang Chen + + Backport from trunk r203267, r203603 and r204247. + 2013-10-08 Zhenqiang Chen + + PR target/58423 + * config/arm/arm.c (arm_emit_ldrd_pop): Attach + RTX_FRAME_RELATED_P on INSN. + + 2013-10-15 Matthew Gretton-Dann + Ramana Radhakrishnan + + * config/arm/t-aprofile: New file. + * config.gcc: Handle --with-multilib-list option. + + 2013-10-31 Zhenqiang Chen + + * lower-subreg.c (resolve_simple_move): Copy REG_INC note. + +2013-10-17 Christophe Lyon + + Backport from trunk r200956 + 2013-07-15 Marcus Shawcroft + + * config/aarch64/aarch64-protos.h (aarch64_symbol_type): + Define SYMBOL_TINY_GOT, update comment. + * config/aarch64/aarch64.c + (aarch64_load_symref_appropriately): Handle SYMBOL_TINY_GOT. + (aarch64_expand_mov_immediate): Likewise. + (aarch64_print_operand): Likewise. + (aarch64_classify_symbol): Likewise. + * config/aarch64/aarch64.md (UNSPEC_GOTTINYPIC): Define. + (ldr_got_tiny): Define. + +2013-10-16 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + * LINARO-VERSION: Update. + +2013-10-09 Christophe Lyon + + Backport from trunk r198526,198527,200020,200595. + 2013-05-02 Ian Bolton + + * config/aarch64/aarch64.md (*and_one_cmpl3_compare0): + New pattern. + (*and_one_cmplsi3_compare0_uxtw): Likewise. + (*and_one_cmpl_3_compare0): Likewise. + (*and_one_cmpl_si3_compare0_uxtw): Likewise. + + 2013-05-02 Ian Bolton + + * config/aarch64/aarch64.md (movsi_aarch64): Only allow to/from + S reg when fp attribute set. + (movdi_aarch64): Only allow to/from D reg when fp attribute set. + + 2013-06-12 Sofiane Naci + + * config/aarch64/aarch64-simd.md (aarch64_combine): convert to split. + (aarch64_simd_combine): New instruction expansion. + * config/aarch64/aarch64-protos.h (aarch64_split_simd_combine): New + function prototype. + * config/aarch64/aarch64.c (aarch64_split_combine): New function. + * config/aarch64/iterators.md (Vdbl): Add entry for DF. + + 2013-07-02 Ian Bolton + + * config/aarch64/aarch64.md (*extr_insv_reg): New pattern. + +2013-10-09 Christophe Lyon + + Backport from trunk r201879. + 2013-08-20 Matthew Gretton-Dann + + * config/arm/linux-elf.h (MULTILIB_DEFAULTS): Remove definition. + * config/arm/t-linux-eabi (MULTILIB_OPTIONS): Document association + with MULTLIB_DEFAULTS. + +2013-10-09 Christophe Lyon + + Backport from trunk r201871. + 2013-08-20 Pavel Chupin + + Fix LIB_SPEC for systems without libpthread. + + * config/gnu-user.h: Introduce GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC. + * config/arm/linux-eabi.h: Use GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC + for Android. + * config/i386/linux-common.h: Likewise. + * config/mips/linux-common.h: Likewise. + +2013-10-08 Christophe Lyon + + Backport from trunk r202702. + 2013-09-18 Richard Earnshaw + + * arm.c (arm_get_frame_offsets): Validate architecture supports + LDRD/STRD before accepting the tuning preference. + (arm_expand_prologue): Likewise. + (arm_expand_epilogue): Likewise. + +2013-10-04 Venkataramanan.Kumar + + Backport from trunk r203028. + 2013-09-30 Venkataramanan Kumar + + * config/aarch64/aarch64.h (MCOUNT_NAME): Define. + (NO_PROFILE_COUNTERS): Likewise. + (PROFILE_HOOK): Likewise. + (FUNCTION_PROFILER): Likewise. + * config/aarch64/aarch64.c (aarch64_function_profiler): Remove. + +2013-10-03 Christophe Lyon + + Backport from trunk r201923,201927. + 2013-08-22 Julian Brown + + * configure.ac: Add aarch64 to list of arches which use "nop" in + debug_line test. + * configure: Regenerate. + + 2013-08-22 Paolo Carlini + + * configure.ac: Add backslashes missing from the last change. + * configure: Regenerate. + +2013-10-03 Christophe Lyon + + Backport from trunk r202023,202108. + 2013-08-27 Tejas Belagod + + * config/aarch64/arm_neon.h: Replace all inline asm implementations + of vget_low_* with implementations in terms of other intrinsics. + + 2013-08-30 Tejas Belagod + + * config/aarch64/arm_neon.h (__AARCH64_UINT64_C, __AARCH64_INT64_C): New + arm_neon.h's internal macros to specify 64-bit constants. Avoid using + stdint.h's macros. + +2013-10-03 Christophe Lyon + + Backport from trunk r201260,202400. + 2013-07-26 Kyrylo Tkachov + Richard Earnshaw + + * combine.c (simplify_comparison): Re-canonicalize operands + where appropriate. + * config/arm/arm.md (movcond_addsi): New splitter. + + 2013-09-09 Kyrylo Tkachov + + * config/aarch64/aarch64.c (aarch64_select_cc_mode): Return CC_SWP for + comparison with negated operand. + * config/aarch64/aarch64.md (compare_neg): Match canonical + RTL form. + +2013-10-03 Christophe Lyon + + Backport from trunk r202164. + 2013-09-02 Bin Cheng + + * tree-ssa-loop-ivopts.c (set_autoinc_for_original_candidates): + Find auto-increment use both before and after candidate. + +2013-10-03 Christophe Lyon + + Backport from trunk r202279. + 2013-09-05 Richard Earnshaw + + * arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on + initial store. + * thumb2.md (thumb2_storewb_parisi): New pattern. + +2013-10-03 Christophe Lyon + + Backport from trunk r202275. + 2013-09-05 Yufeng Zhang + + * config/aarch64/aarch64-option-extensions.def: Add + AARCH64_OPT_EXTENSION of 'crc'. + * config/aarch64/aarch64.h (AARCH64_FL_CRC): New define. + (AARCH64_ISA_CRC): Ditto. + * doc/invoke.texi (-march and -mcpu feature modifiers): Add + description of the CRC extension. + +2013-10-01 Christophe Lyon + + Backport from trunk r201250. + 2013-07-25 Kyrylo Tkachov + + * config/arm/arm.md (arm_addsi3, addsi3_carryin_, + addsi3_carryin_alt2_): Correct output template. + +2013-10-01 Kugan Vivekanandarajah + + Backport from trunk r203059,203116. + 2013-10-01 Kugan Vivekanandarajah + + PR target/58578 + Revert + 2013-04-05 Greta Yorsh + * config/arm/arm.md (arm_ashldi3_1bit): define_insn into + define_insn_and_split. + (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise. + (shiftsi3_compare): New pattern. + (rrx): New pattern. + * config/arm/unspecs.md (UNSPEC_RRX): New. + +2013-09-11 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + * LINARO-VERSION: Update. + +2013-09-10 Venkataramanan Kumar + + Backport from trunk r200197, 201411. + 2013-06-19 Richard Earnshaw + + arm.md (split for eq(reg, 0)): Add variants for ARMv5 and Thumb2. + (peepholes for eq(reg, not-0)): Ensure condition register is dead after + pattern. Use more efficient sequences on ARMv5 and Thumb2. + + 2013-08-01 Kyrylo Tkachov + + * config/arm/arm.md (peepholes for eq (reg1) (reg2/imm)): + Generate canonical plus rtx with negated immediate instead of minus + where appropriate. + * config/arm/arm.c (thumb2_reorg): Handle ADCS , case. + +2013-09-10 Christophe Lyon + + Backport from trunk r200593,201024,201025,201122,201124,201126. + 2013-07-02 Kyrylo Tkachov + + * config/arm/arm.md (arm_andsi3_insn): Add alternatives for 16-bit + encoding. + (iorsi3_insn): Likewise. + (arm_xorsi3): Likewise. + + 2013-07-18 Sofiane Naci + + * config/arm/arm.md (attribute "type"): Rename "simple_alu_imm" to + "arlo_imm". Rename "alu_reg" to "arlo_reg". Rename "simple_alu_shift" to + "extend". Split "alu_shift" into "shift" and "arlo_shift". Split + "alu_shift_reg" into "shift_reg" and "arlo_shift_reg". List types + in alphabetical order. + (attribute "core_cycles"): Update for attribute changes. + (arm_addsi3): Likewise. + (addsi3_compare0): Likewise. + (addsi3_compare0_scratch): Likewise. + (addsi3_compare_op1): Likewise. + (addsi3_compare_op2): Likewise. + (compare_addsi2_op0): Likewise. + (compare_addsi2_op1): Likewise. + (addsi3_carryin_shift_): Likewise. + (subsi3_carryin_shift): Likewise. + (rsbsi3_carryin_shift): Likewise. + (arm_subsi3_insn): Likewise. + (subsi3_compare0): Likewise. + (subsi3_compare): Likewise. + (arm_andsi3_insn): Likewise. + (thumb1_andsi3_insn): Likewise. + (andsi3_compare0): Likewise. + (andsi3_compare0_scratch): Likewise. + (zeroextractsi_compare0_scratch + (andsi_not_shiftsi_si): Likewise. + (iorsi3_insn): Likewise. + (iorsi3_compare0): Likewise. + (iorsi3_compare0_scratch): Likewise. + (arm_xorsi3): Likewise. + (thumb1_xorsi3_insn): Likewise. + (xorsi3_compare0): Likewise. + (xorsi3_compare0_scratch): Likewise. + (satsi__shift): Likewise. + (rrx): Likewise. + (arm_shiftsi3): Likewise. + (shiftsi3_compare0): Likewise. + (not_shiftsi): Likewise. + (not_shiftsi_compare0): Likewise. + (not_shiftsi_compare0_scratch): Likewise. + (arm_one_cmplsi2): Likewise. + (thumb_one_complsi2): Likewise. + (notsi_compare0): Likewise. + (notsi_compare0_scratch): Likewise. + (thumb1_zero_extendhisi2): Likewise. + (arm_zero_extendhisi2): Likewise. + (arm_zero_extendhisi2_v6): Likewise. + (arm_zero_extendhisi2addsi): Likewise. + (thumb1_zero_extendqisi2): Likewise. + (thumb1_zero_extendqisi2_v6): Likewise. + (arm_zero_extendqisi2): Likewise. + (arm_zero_extendqisi2_v6): Likewise. + (arm_zero_extendqisi2addsi): Likewise. + (thumb1_extendhisi2): Likewise. + (arm_extendhisi2): Likewise. + (arm_extendhisi2_v6): Likewise. + (arm_extendqisi): Likewise. + (arm_extendqisi_v6): Likewise. + (arm_extendqisi2addsi): Likewise. + (thumb1_extendqisi2): Likewise. + (thumb1_movdi_insn): Likewise. + (arm_movsi_insn): Likewise. + (movsi_compare0): Likewise. + (movhi_insn_arch4): Likewise. + (movhi_bytes): Likewise. + (arm_movqi_insn): Likewise. + (thumb1_movqi_insn): Likewise. + (arm32_movhf): Likewise. + (thumb1_movhf): Likewise. + (arm_movsf_soft_insn): Likewise. + (thumb1_movsf_insn): Likewise. + (movdf_soft_insn): Likewise. + (thumb_movdf_insn): Likewise. + (arm_cmpsi_insn): Likewise. + (cmpsi_shiftsi): Likewise. + (cmpsi_shiftsi_swp): Likewise. + (arm_cmpsi_negshiftsi_si): Likewise. + (movsicc_insn): Likewise. + (movsfcc_soft_insn): Likewise. + (arith_shiftsi): Likewise. + (arith_shiftsi_compare0 + (arith_shiftsi_compare0_scratch + (sub_shiftsi): Likewise. + (sub_shiftsi_compare0 + (sub_shiftsi_compare0_scratch + (and_scc): Likewise. + (cond_move): Likewise. + (if_plus_move): Likewise. + (if_move_plus): Likewise. + (if_move_not): Likewise. + (if_not_move): Likewise. + (if_shift_move): Likewise. + (if_move_shift): Likewise. + (if_shift_shift): Likewise. + (if_not_arith): Likewise. + (if_arith_not): Likewise. + (cond_move_not): Likewise. + (thumb1_ashlsi3): Set type attribute. + (thumb1_ashrsi3): Likewise. + (thumb1_lshrsi3): Likewise. + (thumb1_rotrsi3): Likewise. + (shiftsi3_compare0_scratch): Likewise. + * config/arm/neon.md (neon_mov): Update for attribute changes. + (neon_mov): Likewise. + * config/arm/thumb2.md (thumb_andsi_not_shiftsi_si): Update for attribute + changes. + (thumb2_movsi_insn): Likewise. + (thumb2_cmpsi_neg_shiftsi): Likewise. + (thumb2_extendqisi_v6): Likewise. + (thumb2_zero_extendhisi2_v6): Likewise. + (thumb2_zero_extendqisi2_v6): Likewise. + (thumb2_shiftsi3_short): Likewise. + (thumb2_addsi3_compare0_scratch): Likewise. + (orsi_not_shiftsi_si): Likewise. + * config/arm/vfp.md (arm_movsi_vfp): Update for attribute changes. + * config/arm/arm-fixed.md (arm_ssatsihi_shift): Update for attribute + changes. + * config/arm/arm1020e.md (1020alu_op): Update for attribute changes. + (1020alu_shift_op): Likewise. + (1020alu_shift_reg_op): Likewise. + * config/arm/arm1026ejs.md (alu_op): Update for attribute changes. + (alu_shift_op): Likewise. + (alu_shift_reg_op): Likewise. + * config/arm/arm1136jfs.md (11_alu_op): Update for attribute changes. + (11_alu_shift_op): Likewise. + (11_alu_shift_reg_op): Likewise. + * config/arm/arm926ejs.md (9_alu_op): Update for attribute changes. + (9_alu_shift_reg_op): Likewise. + * config/arm/cortex-a15.md (cortex_a15_alu): Update for attribute changes. + (cortex_a15_alu_shift): Likewise. + (cortex_a15_alu_shift_reg): Likewise. + * config/arm/cortex-a5.md (cortex_a5_alu): Update for attribute changes. + (cortex_a5_alu_shift): Likewise. + * config/arm/cortex-a53.md (cortex_a53_alu) : Update for attribute + changes. + (cortex_a53_alu_shift): Likewise. + * config/arm/cortex-a7.md (cortex_a7_alu_imm): Update for attribute + changes. + (cortex_a7_alu_reg): Likewise. + (cortex_a7_alu_shift): Likewise. + * config/arm/cortex-a8.md (cortex_a8_alu): Update for attribute changes. + (cortex_a8_alu_shift): Likewise. + (cortex_a8_alu_shift_reg): Likewise. + (cortex_a8_mov): Likewise. + * config/arm/cortex-a9.md (cortex_a9_dp): Update for attribute changes. + (cortex_a9_dp_shift): Likewise. + * config/arm/cortex-m4.md (cortex_m4_alu): Update for attribute changes. + * config/arm/cortex-r4.md (cortex_r4_alu): Update for attribute changes. + (cortex_r4_mov): Likewise. + (cortex_r4_alu_shift): Likewise. + (cortex_r4_alu_shift_reg): Likewise. + * config/arm/fa526.md (526_alu_op): Update for attribute changes. + (526_alu_shift_op): Likewise. + * config/arm/fa606te.md (606te_alu_op): Update for attribute changes. + * config/arm/fa626te.md (626te_alu_op): Update for attribute changes. + (626te_alu_shift_op): Likewise. + * config/arm/fa726te.md (726te_shift_op): Update for attribute changes. + (726te_alu_op): Likewise. + (726te_alu_shift_op): Likewise. + (726te_alu_shift_reg_op): Likewise. + * config/arm/fmp626.md (mp626_alu_op): Update for attribute changes. + (mp626_alu_shift_op): Likewise. + * config/arm/marvell-pj4.md (pj4_alu_e1): Update for attribute changes. + (pj4_alu_e1_conds): Likewise. + (pj4_alu): Likewise. + (pj4_alu_conds): Likewise. + (pj4_shift): Likewise. + (pj4_shift_conds): Likewise. + (pj4_alu_shift): Likewise. + (pj4_alu_shift_conds): Likewise. + * config/arm/arm.c (xscale_sched_adjust_cost): Update for attribute changes. + (cortexa7_older_only): Likewise. + (cortexa7_younger): Likewise. + + 2013-07-18 Sofiane Naci + + * config/arm/arm.md (attribute "insn"): Delete values "mrs", "msr", + "xtab" and "sat". Move value "clz" from here to ... + (attriubte "type"): ... here. + (satsi_): Delete "insn" attribute. + (satsi__shift): Likewise. + (arm_zero_extendqisi2addsi): Likewise. + (arm_extendqisi2addsi): Likewise. + (clzsi2): Update for attribute changes. + (rbitsi2): Likewise. + * config/arm/arm-fixed.md (arm_ssatsihi_shift): Delete "insn" attribute. + (arm_usatsihi): Likewise. + * config/arm/cortex-a8.md (cortex_a8_alu): Update for attribute change. + + 2013-07-22 Kyrylo Tkachov + + * config/arm/predicates.md (shiftable_operator_strict_it): + New predicate. + * config/arm/thumb2.md (thumb_andsi_not_shiftsi_si): + Disable cond_exec version for arm_restrict_it. + (thumb2_smaxsi3): Convert to generate cond_exec. + (thumb2_sminsi3): Likewise. + (thumb32_umaxsi3): Likewise. + (thumb2_uminsi3): Likewise. + (thumb2_abssi2): Adjust constraints for arm_restrict_it. + (thumb2_neg_abssi2): Likewise. + (thumb2_mov_scc): Add alternative for 16-bit encoding. + (thumb2_movsicc_insn): Adjust alternatives. + (thumb2_mov_negscc): Disable for arm_restrict_it. + (thumb2_mov_negscc_strict_it): New pattern. + (thumb2_mov_notscc_strict_it): New pattern. + (thumb2_mov_notscc): Disable for arm_restrict_it. + (thumb2_ior_scc): Likewise. + (thumb2_ior_scc_strict_it): New pattern. + (thumb2_cond_move): Adjust for arm_restrict_it. + (thumb2_cond_arith): Disable for arm_restrict_it. + (thumb2_cond_arith_strict_it): New pattern. + (thumb2_cond_sub): Adjust for arm_restrict_it. + (thumb2_movcond): Likewise. + (thumb2_extendqisi_v6): Disable cond_exec variant for arm_restrict_it. + (thumb2_zero_extendhisi2_v6): Likewise. + (thumb2_zero_extendqisi2_v6): Likewise. + (orsi_notsi_si): Likewise. + (orsi_not_shiftsi_si): Likewise. + + 2013-07-22 Sofiane Naci + + * config/arm/arm.md (attribute "insn"): Delete. + (attribute "type"): Add "mov_imm", "mov_reg", "mov_shift", + "mov_shift_reg", "mvn_imm", "mvn_reg", "mvn_shift" and "mvn_shift_reg". + (not_shiftsi): Update for attribute change. + (not_shiftsi_compare0): Likewise. + (not_shiftsi_compare0_scratch): Likewise. + (arm_one_cmplsi2): Likewise. + (thumb1_one_cmplsi2): Likewise. + (notsi_compare0): Likewise. + (notsi_compare0_scratch): Likewise. + (thumb1_movdi_insn): Likewise. + (arm_movsi_insn): Likewise. + (movhi_insn_arch4): Likewise. + (movhi_bytes): Likewise. + (arm_movqi_insn): Likewise. + (thumb1_movqi_insn): Likewise. + (arm32_movhf): Likewise. + (thumb1_movhf): Likewise. + (arm_movsf_soft_insn): Likewise. + (thumb1_movsf_insn): Likewise. + (thumb_movdf_insn): Likewise. + (movsicc_insn): Likewise. + (movsfcc_soft_insn): Likewise. + (and_scc): Likewise. + (cond_move): Likewise. + (if_move_not): Likewise. + (if_not_move): Likewise. + (if_shift_move): Likewise. + (if_move_shift): Likewise. + (if_shift_shift): Likewise. + (if_not_arith): Likewise. + (if_arith_not): Likewise. + (cond_move_not): Likewise. + * config/arm/neon.md (neon_mov): Update for attribute change. + (neon_mov): Likewise. + * config/arm/vfp.md (arm_movsi_vfp): Update for attribute change. + (thumb2_movsi_vfp): Likewise. + (movsf_vfp): Likewise. + (thumb2_movsf_vfp): Likewise. + * config/arm/arm.c (xscale_sched_adjust_cost): Update for attribute change. + (cortexa7_older_only): Likewise. + (cortexa7_younger): Likewise. + * config/arm/arm1020e.md (1020alu_op): Update for attribute change. + (1020alu_shift_op): Likewise. + (1020alu_shift_reg_op): Likewise. + * config/arm/arm1026ejs.md (alu_op): Update for attribute change. + (alu_shift_op): Likewise. + (alu_shift_reg_op): Likewise. + * config/arm/arm1136jfs.md (11_alu_op): Update for attribute change. + (11_alu_shift_op): Likewise. + (11_alu_shift_reg_op): Likewise. + * config/arm/arm926ejs.md (9_alu_op): Update for attribute change. + (9_alu_shift_reg_op): Likewise. + * config/arm/cortex-a15.md (cortex_a15_alu): Update for attribute change. + (cortex_a15_alu_shift): Likewise. + (cortex_a15_alu_shift_reg): Likewise. + * config/arm/cortex-a5.md (cortex_a5_alu): Update for attribute change. + (cortex_a5_alu_shift): Likewise. + * config/arm/cortex-a53.md (cortex_a53_alu): Update for attribute change. + (cortex_a53_alu_shift): Likewise. + * config/arm/cortex-a7.md (cortex_a7_alu_imm): Update for attribute change. + (cortex_a7_alu_reg): Likewise. + (cortex_a7_alu_shift): Likewise. + * config/arm/cortex-a8.md (cortex_a8_alu): Update for attribute change. + (cortex_a8_alu_shift): Likewise. + (cortex_a8_alu_shift_reg): Likewise. + (cortex_a8_mov): Likewise. + * config/arm/cortex-a9.md (cortex_a9_dp): Update for attribute change. + (cortex_a9_dp_shift): Likewise. + * config/arm/cortex-m4.md (cortex_m4_alu): Update for attribute change. + * config/arm/cortex-r4.md (cortex_r4_alu): Update for attribute change. + (cortex_r4_mov): Likewise. + (cortex_r4_alu_shift): Likewise. + (cortex_r4_alu_shift_reg): Likewise. + * config/arm/fa526.md (526_alu_op): Update for attribute change. + (526_alu_shift_op): Likewise. + * config/arm/fa606te.md (606te_alu_op): Update for attribute change. + * config/arm/fa626te.md (626te_alu_op): Update for attribute change. + (626te_alu_shift_op): Likewise. + * config/arm/fa726te.md (726te_shift_op): Update for attribute change. + (726te_alu_op): Likewise. + (726te_alu_shift_op): Likewise. + (726te_alu_shift_reg_op): Likewise. + * config/arm/fmp626.md (mp626_alu_op): Update for attribute change. + (mp626_alu_shift_op): Likewise. + * config/arm/marvell-pj4.md (pj4_alu_e1): Update for attribute change. + (pj4_alu_e1_conds): Likewise. + (pj4_alu): Likewise. + (pj4_alu_conds): Likewise. + (pj4_shift): Likewise. + (pj4_shift_conds): Likewise. + (pj4_alu_shift): Likewise. + (pj4_alu_shift_conds): Likewise. + + 2013-07-22 Kyrylo Tkachov + + * config/arm/constraints.md (Pd): Allow TARGET_THUMB + instead of TARGET_THUMB1. + (Pz): New constraint. + * config/arm/arm.md (arm_addsi3): Add alternatives for 16-bit + encodings. + (compare_negsi_si): Likewise. + (compare_addsi2_op0): Likewise. + (compare_addsi2_op1): Likewise. + (addsi3_carryin_): Likewise. + (addsi3_carryin_alt2_): Likewise. + (addsi3_carryin_shift_): Disable cond_exec variant + for arm_restrict_it. + (subsi3_carryin): Likewise. + (arm_subsi3_insn): Add alternatives for 16-bit encoding. + (minmax_arithsi): Disable for arm_restrict_it. + (minmax_arithsi_non_canon): Adjust for arm_restrict_it. + (satsi_): Disable cond_exec variant for arm_restrict_it. + (satsi__shift): Likewise. + (arm_shiftsi3): Add alternative for 16-bit encoding. + (arm32_movhf): Disable for arm_restrict_it. + (arm_cmpdi_unsigned): Add alternatives for 16-bit encoding. + (arm_movtas_ze): Disable cond_exec variant for arm_restrict_it. + +2013-09-09 Kugan Vivekanandarajah + + Backport from trunk r201412. + 2013-08-01 Kyrylo Tkachov + + * config/arm/arm.md (minmax_arithsi_non_canon): Emit canonical RTL form + when subtracting a constant. + +2013-09-05 Yvan Roux + + Backport from trunk r201249. + 2013-07-25 Kyrylo Tkachov + + * config/arm/arm-fixed.md (ssmulsa3, usmulusa3): + Adjust for arm_restrict_it. + Remove trailing whitespace. + +2013-09-05 Yvan Roux + + Backport from trunk r201342. + 2013-07-30 Richard Earnshaw + + * config.gcc (arm): Require 64-bit host-wide-int for all ARM target + configs. + +2013-09-05 Christophe Lyon + + Backport from trunk r199527,199792,199814. + 2013-05-31 Kyrylo Tkachov + + PR target/56315 + * config/arm/arm.c (const_ok_for_dimode_op): Handle IOR. + * config/arm/arm.md (*iordi3_insn): Change to insn_and_split. + * config/arm/neon.md (iordi3_neon): Remove. + (neon_vorr): Generate iordi3 instead of iordi3_neon. + * config/arm/predicates.md (imm_for_neon_logic_operand): + Move to earlier in the file. + (neon_logic_op2): Likewise. + (arm_iordi_operand_neon): New predicate. + + 2013-06-07 Kyrylo Tkachov + + * config/arm/constraints.md (Df): New constraint. + * config/arm/arm.md (iordi3_insn): Use Df constraint instead of De. + Correct length attribute for last two alternatives. + + 2013-06-07 Kyrylo Tkachov + + PR target/56315 + * config/arm/arm.md (*xordi3_insn): Change to insn_and_split. + (xordi3): Change operand 2 constraint to arm_xordi_operand. + * config/arm/arm.c (const_ok_for_dimode_op): Handle XOR. + * config/arm/constraints.md (Dg): New constraint. + * config/arm/neon.md (xordi3_neon): Remove. + (neon_veor): Generate xordi3 instead of xordi3_neon. + * config/arm/predicates.md (arm_xordi_operand): New predicate. + +2013-09-05 Christophe Lyon + + Backport from trunk r201599. + 2013-08-08 Richard Earnshaw + + PR target/57431 + * arm/neon.md (neon_vld1_dupdi): New expand pattern. + (neon_vld1_dup VD iterator): Iterate over VD not VDX. + +2013-09-05 Christophe Lyon + + Backport from trunk r201589. + 2013-08-08 Bernd Edlinger + + PR target/58065 + * config/arm/arm.h (MALLOC_ABI_ALIGNMENT): Define. + +2013-09-03 Venkataramanan Kumar + + Backport from trunk + r201624, r201666. + 2013-08-09 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def (get_lane_signed): Remove. + (get_lane_unsigned): Likewise. + (dup_lane_scalar): Likewise. + (get_lane): enable for VALL. + * config/aarch64/aarch64-simd.md + (aarch64_dup_lane_scalar): Remove. + (aarch64_get_lane_signed): Likewise. + (aarch64_get_lane_unsigned): Likewise. + (aarch64_get_lane_extend): New. + (aarch64_get_lane_zero_extendsi): Likewise. + (aarch64_get_lane): Enable for all vector modes. + (aarch64_get_lanedi): Remove misleading constraints. + * config/aarch64/arm_neon.h + (__aarch64_vget_lane_any): Define. + (__aarch64_vget_lane_<8,16,32,64>): Likewise. + (vget_lane_<8,16,32,64>): Use __aarch64_vget_lane macros. + (vdup_lane_<8,16,32,64>): Likewise. + * config/aarch64/iterators.md (VDQQH): New. + (VDQQHS): Likewise. + (vwcore): Likewise. + + 2013-08-12 James Greenhalgh + + * config/aarch64/arm_none.h + (vdup_lane_<8,16,32,64>): Fix macro call. + +2013-08-26 Kugan Vivekanandarajah + + Backport from trunk r201341. + 2013-07-30 Richard Earnshaw + + * arm.md (mulhi3): New expand pattern. + +2013-08-16 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + * LINARO-VERSION: Update. + +2013-08-08 Christophe Lyon + + Backport from trunk + r198489,200167,200199,200510,200513,200515,200576. + 2013-05-01 Greta Yorsh + + * config/arm/thumb2.md (thumb2_smaxsi3,thumb2_sminsi3): Convert + define_insn to define_insn_and_split. + (thumb32_umaxsi3,thumb2_uminsi3): Likewise. + (thumb2_negdi2,thumb2_abssi2,thumb2_neg_abssi2): Likewise. + (thumb2_mov_scc,thumb2_mov_negscc,thumb2_mov_notscc): Likewise. + (thumb2_movsicc_insn,thumb2_and_scc,thumb2_ior_scc): Likewise. + (thumb2_negscc): Likewise. + + 2013-06-18 Sofiane Naci + + * config/arm/arm.md (attribute "insn"): Move multiplication and division + attributes to... + (attribute "type"): ... here. Remove mult. + (attribute "mul32"): New attribute. + (attribute "mul64"): Add umaal. + (*arm_mulsi3): Update attributes. + (*arm_mulsi3_v6): Likewise. + (*thumb_mulsi3): Likewise. + (*thumb_mulsi3_v6): Likewise. + (*mulsi3_compare0): Likewise. + (*mulsi3_compare0_v6): Likewise. + (*mulsi_compare0_scratch): Likewise. + (*mulsi_compare0_scratch_v6): Likewise. + (*mulsi3addsi): Likewise. + (*mulsi3addsi_v6): Likewise. + (*mulsi3addsi_compare0): Likewise. + (*mulsi3addsi_compare0_v6): Likewise. + (*mulsi3addsi_compare0_scratch): Likewise. + (*mulsi3addsi_compare0_scratch_v6): Likewise. + (*mulsi3subsi): Likewise. + (*mulsidi3adddi): Likewise. + (*mulsi3addsi_v6): Likewise. + (*mulsidi3adddi_v6): Likewise. + (*mulsidi3_nov6): Likewise. + (*mulsidi3_v6): Likewise. + (*umulsidi3_nov6): Likewise. + (*umulsidi3_v6): Likewise. + (*umulsidi3adddi): Likewise. + (*umulsidi3adddi_v6): Likewise. + (*smulsi3_highpart_nov6): Likewise. + (*smulsi3_highpart_v6): Likewise. + (*umulsi3_highpart_nov6): Likewise. + (*umulsi3_highpart_v6): Likewise. + (mulhisi3): Likewise. + (*mulhisi3tb): Likewise. + (*mulhisi3bt): Likewise. + (*mulhisi3tt): Likewise. + (maddhisi4): Likewise. + (*maddhisi4tb): Likewise. + (*maddhisi4tt): Likewise. + (maddhidi4): Likewise. + (*maddhidi4tb): Likewise. + (*maddhidi4tt): Likewise. + (divsi3): Likewise. + (udivsi3): Likewise. + * config/arm/thumb2.md (thumb2_mulsi_short): Update attributes. + (thumb2_mulsi_short_compare0): Likewise. + (thumb2_mulsi_short_compare0_scratch): Likewise. + * config/arm/arm1020e.md (1020mult1): Update attribute change. + (1020mult2): Likewise. + (1020mult3): Likewise. + (1020mult4): Likewise. + (1020mult5): Likewise. + (1020mult6): Likewise. + * config/arm/cortex-a15.md (cortex_a15_mult32): Update attribute change. + (cortex_a15_mult64): Likewise. + (cortex_a15_sdiv): Likewise. + (cortex_a15_udiv): Likewise. + * config/arm/arm1026ejs.md (mult1): Update attribute change. + (mult2): Likewise. + (mult3): Likewise. + (mult4): Likewise. + (mult5): Likewise. + (mult6): Likewise. + * config/arm/marvell-pj4.md (pj4_ir_mul): Update attribute change. + (pj4_ir_div): Likewise. + * config/arm/arm1136jfs.md (11_mult1): Update attribute change. + (11_mult2): Likewise. + (11_mult3): Likewise. + (11_mult4): Likewise. + (11_mult5): Likewise. + (11_mult6): Likewise. + (11_mult7): Likewise. + * config/arm/cortex-a8.md (cortex_a8_mul): Update attribute change. + (cortex_a8_mla): Likewise. + (cortex_a8_mull): Likewise. + (cortex_a8_smulwy): Likewise. + (cortex_a8_smlald): Likewise. + * config/arm/cortex-m4.md (cortex_m4_alu): Update attribute change. + * config/arm/cortex-r4.md (cortex_r4_mul_4): Update attribute change. + (cortex_r4_mul_3): Likewise. + (cortex_r4_mla_4): Likewise. + (cortex_r4_mla_3): Likewise. + (cortex_r4_smlald): Likewise. + (cortex_r4_mull): Likewise. + (cortex_r4_sdiv): Likewise. + (cortex_r4_udiv): Likewise. + * config/arm/cortex-a7.md (cortex_a7_mul): Update attribute change. + (cortex_a7_idiv): Likewise. + * config/arm/arm926ejs.md (9_mult1): Update attribute change. + (9_mult2): Likewise. + (9_mult3): Likewise. + (9_mult4): Likewise. + (9_mult5): Likewise. + (9_mult6): Likewise. + * config/arm/cortex-a53.md (cortex_a53_mul): Update attribute change. + (cortex_a53_sdiv): Likewise. + (cortex_a53_udiv): Likewise. + * config/arm/fa726te.md (726te_mult_op): Update attribute change. + * config/arm/fmp626.md (mp626_mult1): Update attribute change. + (mp626_mult2): Likewise. + (mp626_mult3): Likewise. + (mp626_mult4): Likewise. + * config/arm/fa526.md (526_mult1): Update attribute change. + (526_mult2): Likewise. + * config/arm/arm-generic.md (mult): Update attribute change. + (mult_ldsched_strongarm): Likewise. + (mult_ldsched): Likewise. + (multi_cycle): Likewise. + * config/arm/cortex-a5.md (cortex_a5_mul): Update attribute change. + * config/arm/fa606te.md (606te_mult1): Update attribute change. + (606te_mult2): Likewise. + (606te_mult3): Likewise. + (606te_mult4): Likewise. + * config/arm/cortex-a9.md (cortex_a9_mult16): Update attribute change. + (cortex_a9_mac16): Likewise. + (cortex_a9_multiply): Likewise. + (cortex_a9_mac): Likewise. + (cortex_a9_multiply_long): Likewise. + * config/arm/fa626te.md (626te_mult1): Update attribute change. + (626te_mult2): Likewise. + (626te_mult3): Likewise. + (626te_mult4): Likewise. + + 2013-06-19 Sofiane Naci + + * config/arm/vfp.md: Move VFP instruction classification documentation + to ... + * config/arm/arm.md: ... here. Update instruction classification + documentation. + + 2013-06-28 Kyrylo Tkachov + + * config/arm/predicates.md (arm_cond_move_operator): New predicate. + * config/arm/arm.md (movsfcc): Use arm_cond_move_operator predicate. + (movdfcc): Likewise. + * config/arm/vfp.md (*thumb2_movsf_vfp): + Disable predication for arm_restrict_it. + (*thumb2_movsfcc_vfp): Disable for arm_restrict_it. + (*thumb2_movdfcc_vfp): Likewise. + (*abssf2_vfp, *absdf2_vfp, *negsf2_vfp, *negdf2_vfp,*addsf3_vfp, + *adddf3_vfp, *subsf3_vfp, *subdf3_vfpc, *divsf3_vfp,*divdf3_vfp, + *mulsf3_vfp, *muldf3_vfp, *mulsf3negsf_vfp, *muldf3negdf_vfp, + *mulsf3addsf_vfp, *muldf3adddf_vfp, *mulsf3subsf_vfp, + *muldf3subdf_vfp, *mulsf3negsfaddsf_vfp, *fmuldf3negdfadddf_vfp, + *mulsf3negsfsubsf_vfp, *muldf3negdfsubdf_vfp, *fma4, + *fmsub4, *fnmsub4, *fnmadd4, + *extendsfdf2_vfp, *truncdfsf2_vfp, *extendhfsf2, *truncsfhf2, + *truncsisf2_vfp, *truncsidf2_vfp, fixuns_truncsfsi2, fixuns_truncdfsi2, + *floatsisf2_vfp, *floatsidf2_vfp, floatunssisf2, floatunssidf2, + *sqrtsf2_vfp, *sqrtdf2_vfp, *cmpsf_vfp, *cmpsf_trap_vfp, *cmpdf_vfp, + *cmpdf_trap_vfp, 2): + Disable predication for arm_restrict_it. + + 2013-06-28 Kyrylo Tkachov + + * config/arm/arm.md (arm_mulsi3_v6): Add alternative for 16-bit + encoding. + (mulsi3addsi_v6): Disable predicable variant for arm_restrict_it. + (mulsi3subsi): Likewise. + (mulsidi3adddi): Likewise. + (mulsidi3_v6): Likewise. + (umulsidi3_v6): Likewise. + (umulsidi3adddi_v6): Likewise. + (smulsi3_highpart_v6): Likewise. + (umulsi3_highpart_v6): Likewise. + (mulhisi3tb): Likewise. + (mulhisi3bt): Likewise. + (mulhisi3tt): Likewise. + (maddhisi4): Likewise. + (maddhisi4tb): Likewise. + (maddhisi4tt): Likewise. + (maddhidi4): Likewise. + (maddhidi4tb): Likewise. + (maddhidi4tt): Likewise. + (zeroextractsi_compare0_scratch): Likewise. + (insv_zero): Likewise. + (insv_t2): Likewise. + (anddi_notzesidi_di): Likewise. + (anddi_notsesidi_di): Likewise. + (andsi_notsi_si): Likewise. + (iordi_zesidi_di): Likewise. + (xordi_zesidi_di): Likewise. + (andsi_iorsi3_notsi): Likewise. + (smax_0): Likewise. + (smax_m1): Likewise. + (smin_0): Likewise. + (not_shiftsi): Likewise. + (unaligned_loadsi): Likewise. + (unaligned_loadhis): Likewise. + (unaligned_loadhiu): Likewise. + (unaligned_storesi): Likewise. + (unaligned_storehi): Likewise. + (extv_reg): Likewise. + (extzv_t2): Likewise. + (divsi3): Likewise. + (udivsi3): Likewise. + (arm_zero_extendhisi2addsi): Likewise. + (arm_zero_extendqisi2addsi): Likewise. + (compareqi_eq0): Likewise. + (arm_extendhisi2_v6): Likewise. + (arm_extendqisi2addsi): Likewise. + (arm_movt): Likewise. + (thumb2_ldrd): Likewise. + (thumb2_ldrd_base): Likewise. + (thumb2_ldrd_base_neg): Likewise. + (thumb2_strd): Likewise. + (thumb2_strd_base): Likewise. + (thumb2_strd_base_neg): Likewise. + (arm_negsi2): Add alternative for 16-bit encoding. + (arm_one_cmplsi2): Likewise. + + 2013-06-28 Kyrylo Tkachov + + * config/arm/constraints.md (Ts): New constraint. + * config/arm/arm.md (arm_movqi_insn): Add alternatives for + 16-bit encodings. + (compare_scc): Use "Ts" constraint for operand 0. + (ior_scc_scc): Likewise. + (and_scc_scc): Likewise. + (and_scc_scc_nodom): Likewise. + (ior_scc_scc_cmp): Likewise for operand 7. + (and_scc_scc_cmp): Likewise. + * config/arm/thumb2.md (thumb2_movsi_insn): + Add alternatives for 16-bit encodings. + (thumb2_movhi_insn): Likewise. + (thumb2_movsicc_insn): Likewise. + (thumb2_and_scc): Take 'and' outside cond_exec. Use "Ts" constraint. + (thumb2_negscc): Use "Ts" constraint. + Move mvn instruction outside cond_exec block. + * config/arm/vfp.md (thumb2_movsi_vfp): Add alternatives + for 16-bit encodings. + + 2013-07-01 Sofiane Naci + + * arm.md (attribute "wtype"): Delete. Move attribute values from here + to ... + (attribute "type"): ... here, and prefix with "wmmx_". + (attribute "core_cycles"): Update for attribute changes. + * iwmmxt.md (tbcstv8qi): Update for attribute changes. + (tbcstv4hi): Likewise. + (tbcstv2si): Likewise. + (iwmmxt_iordi3): Likewise. + (iwmmxt_xordi3): Likewise. + (iwmmxt_anddi3): Likewise. + (iwmmxt_nanddi3): Likewise. + (iwmmxt_arm_movdi): Likewise. + (iwmmxt_movsi_insn): Likewise. + (mov_internal): Likewise. + (and3_iwmmxt): Likewise. + (ior3_iwmmxt): Likewise. + (xor3_iwmmxt): Likewise. + (add3_iwmmxt): Likewise. + (ssaddv8qi3): Likewise. + (ssaddv4hi3): Likewise. + (ssaddv2si3): Likewise. + (usaddv8qi3): Likewise. + (usaddv4hi3): Likewise. + (usaddv2si3): Likewise. + (sub3_iwmmxt): Likewise. + (sssubv8qi3): Likewise. + (sssubv4hi3): Likewise. + (sssubv2si3): Likewise. + (ussubv8qi3): Likewise. + (ussubv4hi3): Likewise. + (ussubv2si3): Likewise. + (mulv4hi3_iwmmxt): Likewise. + (smulv4hi3_highpart): Likewise. + (umulv4hi3_highpart): Likewise. + (iwmmxt_wmacs): Likewise. + (iwmmxt_wmacsz): Likewise. + (iwmmxt_wmacu): Likewise. + (iwmmxt_wmacuz): Likewise. + (iwmmxt_clrdi): Likewise. + (iwmmxt_clrv8qi): Likewise. + (iwmmxt_clr4hi): Likewise. + (iwmmxt_clr2si): Likewise. + (iwmmxt_uavgrndv8qi3): Likewise. + (iwmmxt_uavgrndv4hi3): Likewise. + (iwmmxt_uavgv8qi3): Likewise. + (iwmmxt_uavgv4hi3): Likewise. + (iwmmxt_tinsrb): Likewise. + (iwmmxt_tinsrh): Likewise. + (iwmmxt_tinsrw): Likewise. + (iwmmxt_textrmub): Likewise. + (iwmmxt_textrmsb): Likewise. + (iwmmxt_textrmuh): Likewise. + (iwmmxt_textrmsh): Likewise. + (iwmmxt_textrmw): Likewise. + (iwmxxt_wshufh): Likewise. + (eqv8qi3): Likewise. + (eqv4hi3): Likewise. + (eqv2si3): Likewise. + (gtuv8qi3): Likewise. + (gtuv4hi3): Likewise. + (gtuv2si3): Likewise. + (gtv8qi3): Likewise. + (gtv4hi3): Likewise. + (gtv2si3): Likewise. + (smax3_iwmmxt): Likewise. + (umax3_iwmmxt): Likewise. + (smin3_iwmmxt): Likewise. + (umin3_iwmmxt): Likewise. + (iwmmxt_wpackhss): Likewise. + (iwmmxt_wpackwss): Likewise. + (iwmmxt_wpackdss): Likewise. + (iwmmxt_wpackhus): Likewise. + (iwmmxt_wpackwus): Likewise. + (iwmmxt_wpackdus): Likewise. + (iwmmxt_wunpckihb): Likewise. + (iwmmxt_wunpckihh): Likewise. + (iwmmxt_wunpckihw): Likewise. + (iwmmxt_wunpckilb): Likewise. + (iwmmxt_wunpckilh): Likewise. + (iwmmxt_wunpckilw): Likewise. + (iwmmxt_wunpckehub): Likewise. + (iwmmxt_wunpckehuh): Likewise. + (iwmmxt_wunpckehuw): Likewise. + (iwmmxt_wunpckehsb): Likewise. + (iwmmxt_wunpckehsh): Likewise. + (iwmmxt_wunpckehsw): Likewise. + (iwmmxt_wunpckelub): Likewise. + (iwmmxt_wunpckeluh): Likewise. + (iwmmxt_wunpckeluw): Likewise. + (iwmmxt_wunpckelsb): Likewise. + (iwmmxt_wunpckelsh): Likewise. + (iwmmxt_wunpckelsw): Likewise. + (ror3): Likewise. + (ashr3_iwmmxt): Likewise. + (lshr3_iwmmxt): Likewise. + (ashl3_iwmmxt): Likewise. + (ror3_di): Likewise. + (ashr3_di): Likewise. + (lshr3_di): Likewise. + (ashl3_di): Likewise. + (iwmmxt_wmadds): Likewise. + (iwmmxt_wmaddu): Likewise. + (iwmmxt_tmia): Likewise. + (iwmmxt_tmiaph): Likewise. + (iwmmxt_tmiabb): Likewise. + (iwmmxt_tmiatb): Likewise. + (iwmmxt_tmiabt): Likewise. + (iwmmxt_tmiatt): Likewise. + (iwmmxt_tmovmskb): Likewise. + (iwmmxt_tmovmskh): Likewise. + (iwmmxt_tmovmskw): Likewise. + (iwmmxt_waccb): Likewise. + (iwmmxt_wacch): Likewise. + (iwmmxt_waccw): Likewise. + (iwmmxt_waligni): Likewise. + (iwmmxt_walignr): Likewise. + (iwmmxt_walignr0): Likewise. + (iwmmxt_walignr1): Likewise. + (iwmmxt_walignr2): Likewise. + (iwmmxt_walignr3): Likewise. + (iwmmxt_wsadb): Likewise. + (iwmmxt_wsadh): Likewise. + (iwmmxt_wsadbz): Likewise. + (iwmmxt_wsadhz): Likewise. + * iwmmxt2.md (iwmmxt_wabs3): Update for attribute changes. + (iwmmxt_wabsdiffb): Likewise. + (iwmmxt_wabsdiffh): Likewise. + (iwmmxt_wabsdiffw): Likewise. + (iwmmxt_waddsubhx): Likewise + (iwmmxt_wsubaddhx): Likewise. + (addc3): Likewise. + (iwmmxt_avg4): Likewise. + (iwmmxt_avg4r): Likewise. + (iwmmxt_wmaddsx): Likewise. + (iwmmxt_wmaddux): Likewise. + (iwmmxt_wmaddsn): Likewise. + (iwmmxt_wmaddun): Likewise. + (iwmmxt_wmulwsm): Likewise. + (iwmmxt_wmulwum): Likewise. + (iwmmxt_wmulsmr): Likewise. + (iwmmxt_wmulumr): Likewise. + (iwmmxt_wmulwsmr): Likewise. + (iwmmxt_wmulwumr): Likewise. + (iwmmxt_wmulwl): Likewise. + (iwmmxt_wqmulm): Likewise. + (iwmmxt_wqmulwm): Likewise. + (iwmmxt_wqmulmr): Likewise. + (iwmmxt_wqmulwmr): Likewise. + (iwmmxt_waddbhusm): Likewise. + (iwmmxt_waddbhusl): Likewise. + (iwmmxt_wqmiabb): Likewise. + (iwmmxt_wqmiabt): Likewise. + (iwmmxt_wqmiatb): Likewise. + (iwmmxt_wqmiatt): Likewise. + (iwmmxt_wqmiabbn): Likewise. + (iwmmxt_wqmiabtn): Likewise. + (iwmmxt_wqmiatbn): Likewise. + (iwmmxt_wqmiattn): Likewise. + (iwmmxt_wmiabb): Likewise. + (iwmmxt_wmiabt): Likewise. + (iwmmxt_wmiatb): Likewise. + (iwmmxt_wmiatt): Likewise. + (iwmmxt_wmiabbn): Likewise. + (iwmmxt_wmiabtn): Likewise. + (iwmmxt_wmiatbn): Likewise. + (iwmmxt_wmiattn): Likewise. + (iwmmxt_wmiawbb): Likewise. + (iwmmxt_wmiawbt): Likewise. + (iwmmxt_wmiawtb): Likewise. + (iwmmxt_wmiawtt): Likewise. + (iwmmxt_wmiawbbn): Likewise. + (iwmmxt_wmiawbtn): Likewise. + (iwmmxt_wmiawtbn): Likewise. + (iwmmxt_wmiawttn): Likewise. + (iwmmxt_wmerge): Likewise. + (iwmmxt_tandc3): Likewise. + (iwmmxt_torc3): Likewise. + (iwmmxt_torvsc3): Likewise. + (iwmmxt_textrc3): Likewise. + * marvell-f-iwmmxt.md (wmmxt_shift): Update for attribute changes. + (wmmxt_pack): Likewise. + (wmmxt_mult_c1): Likewise. + (wmmxt_mult_c2): Likewise. + (wmmxt_alu_c1): Likewise. + (wmmxt_alu_c2): Likewise. + (wmmxt_alu_c3): Likewise. + (wmmxt_transfer_c1): Likewise. + (wmmxt_transfer_c2): Likewise. + (wmmxt_transfer_c3): Likewise. + (marvell_f_iwmmxt_wstr): Likewise. + (marvell_f_iwmmxt_wldr): Likewise. + +2013-08-07 Christophe Lyon + + Backport from trunk r201237. + 2013-07-25 Terry Guo + + * config/arm/arm.c (thumb1_size_rtx_costs): Assign proper cost for + shift_add/shift_sub0/shift_sub1 RTXs. + +2013-08-06 Christophe Lyon + + Backport from trunk r200596,201067,201083. + 2013-07-02 Ian Bolton + + * config/aarch64/aarch64-simd.md (absdi2): Support abs for + DI mode. + + 2013-07-19 Ian Bolton + + * config/aarch64/arm_neon.h (vabs_s64): New function + + 2013-07-20 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_fold_builtin): Fold abs in all modes. + * config/aarch64/aarch64-simd-builtins.def + (abs): Enable for all modes. + * config/aarch64/arm_neon.h + (vabs_s<8,16,32,64): Rewrite using builtins. + (vabs_f64): Add missing intrinsic. + +2013-08-06 Christophe Lyon + + Backport from trunk r198735,198831,199959. + 2013-05-09 Sofiane Naci + + * config/aarch64/aarch64.md: New movtf split. + (*movtf_aarch64): Update. + (aarch64_movdi_tilow): Handle TF modes and rename to + aarch64_movdi_low. + (aarch64_movdi_tihigh): Handle TF modes and rename to + aarch64_movdi_high + (aarch64_movtihigh_di): Handle TF modes and rename to + aarch64_movhigh_di + (aarch64_movtilow_di): Handle TF modes and rename to + aarch64_movlow_di + (aarch64_movtilow_tilow): Remove spurious whitespace. + * config/aarch64/aarch64.c (aarch64_split_128bit_move): Handle TFmode + splits. + (aarch64_print_operand): Update. + + 2013-05-13 Sofiane Naci + + * config/aarch64/aarch64-simd.md (aarch64_simd_mov): Group + similar switch cases. + (aarch64_simd_mov): Rename to aarch64_split_simd_mov. Update. + (aarch64_simd_mov_to_low): Delete. + (aarch64_simd_mov_to_high): Delete. + (move_lo_quad_): Add w<-r alternative. + (aarch64_simd_move_hi_quad_): Likewise. + (aarch64_simd_mov_from_*): Update type attribute. + * config/aarch64/aarch64.c (aarch64_split_simd_move): Refacror switch + statement. + + 2013-06-11 Sofiane Naci + + * config/aarch64/aarch64-simd.md (move_lo_quad_): Update. + +2013-08-06 Christophe Lyon + + Backport from trunk r199438,199439,201326. + + 2013-05-30 Zhenqiang Chen + + * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added. + (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes. + (arm_emit_vfp_multi_reg_pop): Likewise. + (thumb2_emit_ldrd_pop): Likewise. + (arm_expand_epilogue): Add misc REG_CFA notes. + (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE. + + 2013-05-30 Bernd Schmidt + Zhenqiang Chen + + * config/arm/arm-protos.h: Add and update function protos. + * config/arm/arm.c (use_simple_return_p): New added. + (thumb2_expand_return): Check simple_return flag. + * config/arm/arm.md: Add simple_return and conditional simple_return. + * config/arm/iterators.md: Add iterator for return and simple_return. + + 2013-07-30 Zhenqiang Chen + + PR rtl-optimization/57637 + * function.c (move_insn_for_shrink_wrap): Also check the + GEN set of the LIVE problem for the liveness analysis + if it exists, otherwise give up. + +2013-08-06 Christophe Lyon + + Backport from trunk r198928,198973,199203,201240,201241,201307. + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * config/arm/predicates.md (call_insn_operand): New predicate. + * config/arm/constraints.md ("Cs", "Ss"): New constraints. + * config/arm/arm.md (*call_insn, *call_value_insn): Match only + if insn is not a tail call. + (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through + registers. + * config/arm/arm.h (enum reg_class): New caller save register class. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling + without decls. + + 2013-05-16 Ramana Radhakrishnan + + PR target/19599 + * config/arm/arm.c (arm_function_ok_for_sibcall): Add check + for NULL decl. + + 2013-05-22 Ramana Radhakrishnan + + PR target/19599 + PR target/57340 + * config/arm/arm.c (any_sibcall_uses_r3): Rename to .. + (any_sibcall_could_use_r3): this and handle indirect calls. + (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3. + + 2013-07-25 Ramana Radhakrishnan + + PR target/19599 + PR target/57731 + PR target/57748 + * config/arm/arm.md ("*sibcall_value_insn): Replace use of + Ss with US. Adjust output for v5 and v4t. + (*sibcall_value_insn): Likewise and loosen predicate on + operand0. + * config/arm/constraints.md ("Ss"): Rename to US. + + 2013-07-25 Ramana Radhakrishnan + + * config/arm/arm.md (*sibcall_insn): Remove unnecessary space. + + 2013-07-29 Ramana Radhakrishnan + Fix incorrect changelog entry. + + Replace + PR target/57748 + with + PR target/57837 + +2013-08-05 Yvan Roux + + Backport from trunk r200922. + 2013-07-12 Tejas Belagod + + * config/aarch64/aarch64-protos.h + (aarch64_simd_immediate_valid_for_move): Remove. + * config/aarch64/aarch64.c (simd_immediate_info): New member. + (aarch64_simd_valid_immediate): Recognize idioms for shifting ones + cases. + (aarch64_output_simd_mov_immediate): Print the correct shift specifier. + +2013-08-05 Yvan Roux + + Backport from trunk r200670. + 2013-07-04 Tejas Belagod + + * config/aarch64/aarch64-protos.h (cpu_vector_cost): New. + (tune_params): New member 'const vec_costs'. + * config/aarch64/aarch64.c (generic_vector_cost): New. + (generic_tunings): New member 'generic_vector_cost'. + (aarch64_builtin_vectorization_cost): New. + (aarch64_add_stmt_cost): New. + (TARGET_VECTORIZE_ADD_STMT_COST): New. + (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New. + +2013-08-05 Yvan Roux + + Backport from trunk r200637. + 2013-07-03 Yufeng Zhang + + * config/aarch64/aarch64.h (enum arm_abi_type): Remove. + (ARM_ABI_AAPCS64): Ditto. + (arm_abi): Ditto. + (ARM_DEFAULT_ABI): Ditto. + +2013-08-05 Yvan Roux + + Backport from trunk r200532, r200565. + 2013-06-28 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_cannot_force_const_mem): Adjust + layout. + + 2013-06-29 Yufeng Zhang + + * config/aarch64/aarch64.c: Remove junk from the beginning of the + file. + +2013-08-05 Yvan Roux + + Backport from trunk r200531. + 2013-06-28 Marcus Shawcroft + + * config/aarch64/aarch64-protos.h (aarch64_symbol_type): + Update comment w.r.t SYMBOL_TINY_ABSOLUTE. + +2013-08-05 Yvan Roux + + Backport from trunk r200519. + 2013-06-28 Marcus Shawcroft + + * config/aarch64/aarch64-protos.h + aarch64_classify_symbol_expression): Define. + (aarch64_symbolic_constant_p): Remove. + * config/aarch64/aarch64.c (aarch64_classify_symbol_expression): Remove + static. Fix line length and white space. + (aarch64_symbolic_constant_p): Remove. + * config/aarch64/predicates.md (aarch64_valid_symref): + Use aarch64_classify_symbol_expression. + +2013-08-05 Yvan Roux + + Backport from trunk r200466, r200467. + 2013-06-27 Yufeng Zhang + + * config/aarch64/aarch64.c (aarch64_force_temporary): Add an extra + parameter 'mode' of type 'enum machine_mode mode'; change to pass + 'mode' to force_reg. + (aarch64_add_offset): Update calls to aarch64_force_temporary. + (aarch64_expand_mov_immediate): Likewise. + + 2013-06-27 Yufeng Zhang + + * config/aarch64/aarch64.c (aarch64_add_offset): Change to pass + 'mode' to aarch64_plus_immediate and gen_rtx_PLUS. + +2013-08-05 Yvan Roux + + Backport from trunk r200419. + 2013-06-26 Greta Yorsh + + * config/arm/arm.h (MAX_CONDITIONAL_EXECUTE): Define macro. + * config/arm/arm-protos.h (arm_max_conditional_execute): New + declaration. + (tune_params): Update comment. + * config/arm/arm.c (arm_cortex_a15_tune): Set max_cond_insns to 2. + (arm_max_conditional_execute): New function. + (thumb2_final_prescan_insn): Use max_insn_skipped and + MAX_INSN_PER_IT_BLOCK to compute maximum instructions in a block. + +2013-07-24 Matthew Gretton-Dann + + * LINARO-VERSION: Bump version. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + * LINARO-VERSION: Update. + +2013-07-19 Matthew Gretton-Dann + + Backport from trunk r201005. + 2013-07-17 Yvan Roux + + PR target/57909 + * config/arm/arm.c (gen_movmem_ldrd_strd): Fix unaligned load/store + usage in HI mode. + +2013-07-09 Christophe Lyon + + * LINARO-VERSION: Bump version. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + * LINARO-VERSION: Update. + +2013-07-03 Christophe Lyon + + Revert backport from trunk r198928,198973,199203. + 2013-05-22 Ramana Radhakrishnan + + PR target/19599 + PR target/57340 + * config/arm/arm.c (any_sibcall_uses_r3): Rename to .. + (any_sibcall_could_use_r3): this and handle indirect calls. + (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3. + + 2013-05-16 Ramana Radhakrishnan + + PR target/19599 + * config/arm/arm.c (arm_function_ok_for_sibcall): Add check + for NULL decl. + + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * config/arm/predicates.md (call_insn_operand): New predicate. + * config/arm/constraints.md ("Cs", "Ss"): New constraints. + * config/arm/arm.md (*call_insn, *call_value_insn): Match only + if insn is not a tail call. + (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through + registers. + * config/arm/arm.h (enum reg_class): New caller save register class. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling + without decls. + +2013-07-03 Christophe Lyon + + Revert backport from mainline (r199438, r199439) + 2013-05-30 Zhenqiang Chen + + * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added. + (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes. + (arm_emit_vfp_multi_reg_pop): Likewise. + (thumb2_emit_ldrd_pop): Likewise. + (arm_expand_epilogue): Add misc REG_CFA notes. + (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE. + + 2013-05-30 Bernd Schmidt + Zhenqiang Chen + + * config/arm/arm-protos.h: Add and update function protos. + * config/arm/arm.c (use_simple_return_p): New added. + (thumb2_expand_return): Check simple_return flag. + * config/arm/arm.md: Add simple_return and conditional simple_return. + * config/arm/iterators.md: Add iterator for return and simple_return. + * gcc.dg/shrink-wrap-alloca.c: New added. + * gcc.dg/shrink-wrap-pretend.c: New added. + * gcc.dg/shrink-wrap-sibcall.c: New added. + +2013-07-03 Christophe Lyon + + Backport from trunk r199640, 199705, 199733, 199734, 199739. + 2013-06-04 Kyrylo Tkachov + + * rtl.def: Add extra fourth optional field to define_cond_exec. + * gensupport.c (process_one_cond_exec): Process attributes from + define_cond_exec. + * doc/md.texi: Document fourth field in define_cond_exec. + + 2013-06-05 Kyrylo Tkachov + + * config/arm/arm.md (enabled_for_depr_it): New attribute. + (predicable_short_it): Likewise. + (predicated): Likewise. + (enabled): Handle above. + (define_cond_exec): Set predicated attribute to yes. + + 2013-06-06 Kyrylo Tkachov + + * config/arm/sync.md (atomic_loaddi_1): + Disable predication for arm_restrict_it. + (arm_load_exclusive): Likewise. + (arm_load_exclusivesi): Likewise. + (arm_load_exclusivedi): Likewise. + (arm_load_acquire_exclusive): Likewise. + (arm_load_acquire_exclusivesi): Likewise. + (arm_load_acquire_exclusivedi): Likewise. + (arm_store_exclusive): Likewise. + (arm_store_exclusive): Likewise. + (arm_store_release_exclusivedi): Likewise. + (arm_store_release_exclusive): Likewise. + + 2013-06-06 Kyrylo Tkachov + + * config/arm/arm-ldmstm.ml: Set "predicable_short_it" to "no" + where appropriate. + * config/arm/ldmstm.md: Regenerate. + + 2013-06-06 Kyrylo Tkachov + + * config/arm/arm-fixed.md (add3,usadd3,ssadd3, + sub3, ussub3, sssub3, arm_ssatsihi_shift, + arm_usatsihi): Adjust alternatives for arm_restrict_it. + +2013-07-02 Rob Savoye + + Backport from trunk 200096 + + 2013-06-14 Vidya Praveen + + * config/aarch64/aarch64-simd.md (aarch64_mlal_lo): + New pattern. + (aarch64_mlal_hi, aarch64_mlsl_lo): Likewise. + (aarch64_mlsl_hi, aarch64_mlal): Likewise. + (aarch64_mlsl): Likewise. + +2013-07-02 Rob Savoye + + Backport from trunk 200062 + + 2013-06-13 Bin Cheng + * fold-const.c (operand_equal_p): Consider NOP_EXPR and + CONVERT_EXPR as equal nodes. + +2013-07-02 Rob Savoye + Backport from trunk 199810 + + 2013-06-07 Kyrylo Tkachov + + * config/arm/arm.md (anddi3_insn): Remove duplicate alternatives. + Clean up alternatives. + +2013-06-20 Rob Savoye + + Backport from trunk 200152 + 2013-06-17 Sofiane Naci + + * config/aarch64/aarch64-simd.md (aarch64_dup_lane): Add r<-w + alternative and update. + (aarch64_dup_lanedi): Delete. + * config/aarch64/arm_neon.h (vdup_lane_*): Update. + * config/aarch64/aarch64-simd-builtins.def: Update. + +2013-06-20 Rob Savoye + + Backport from trunk 200061 + 2013-06-13 Bin Cheng + + * rtlanal.c (noop_move_p): Check the code to be executed for + COND_EXEC. + +2013-06-20 Rob Savoye + + Backport from trunk 199694 + 2013-06-05 Kyrylo Tkachov + + * config/arm/arm.c (MAX_INSN_PER_IT_BLOCK): New macro. + (arm_option_override): Override arm_restrict_it where appropriate. + (thumb2_final_prescan_insn): Use MAX_INSN_PER_IT_BLOCK. + * config/arm/arm.opt (mrestrict-it): New command-line option. + * doc/invoke.texi: Document -mrestrict-it. + +2013-06-20 Christophe Lyon + + Backport from trunk r198683. + 2013-05-07 Christophe Lyon + + * config/arm/arm.c (arm_asan_shadow_offset): New function. + (TARGET_ASAN_SHADOW_OFFSET): Define. + * config/arm/linux-eabi.h (ASAN_CC1_SPEC): Define. + (LINUX_OR_ANDROID_CC): Add ASAN_CC1_SPEC. + +2013-06-18 Rob Savoye + + * LINARO-VERSION: Bump version. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + * LINARO-VERSION: Update. + +2013-06-06 Zhenqiang Chen + + Backport from mainline (r199438, r199439) + 2013-05-30 Zhenqiang Chen + + * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): New added. + (arm_emit_multi_reg_pop): Add REG_CFA_ADJUST_CFA notes. + (arm_emit_vfp_multi_reg_pop): Likewise. + (thumb2_emit_ldrd_pop): Likewise. + (arm_expand_epilogue): Add misc REG_CFA notes. + (arm_unwind_emit): Skip REG_CFA_ADJUST_CFA and REG_CFA_RESTORE. + + 2013-05-30 Bernd Schmidt + Zhenqiang Chen + + * config/arm/arm-protos.h: Add and update function protos. + * config/arm/arm.c (use_simple_return_p): New added. + (thumb2_expand_return): Check simple_return flag. + * config/arm/arm.md: Add simple_return and conditional simple_return. + * config/arm/iterators.md: Add iterator for return and simple_return. + * gcc.dg/shrink-wrap-alloca.c: New added. + * gcc.dg/shrink-wrap-pretend.c: New added. + * gcc.dg/shrink-wrap-sibcall.c: New added. + +2013-06-06 Kugan Vivekanandarajah + + Backport from mainline r198879: + + 2013-05-14 Chung-Lin Tang + PR target/42017 + * config/arm/arm.h (EPILOGUE_USES): Only return true + for LR_REGNUM after epilogue_completed. + +2013-06-05 Christophe Lyon + + Backport from trunk r199652,199653,199656,199657,199658. + + 2013-06-04 Ian Bolton + + * config/aarch64/aarch64.md (*mov_aarch64): Call + into function to generate MOVI instruction. + * config/aarch64/aarch64.c (aarch64_simd_container_mode): + New function. + (aarch64_preferred_simd_mode): Turn into wrapper. + (aarch64_output_scalar_simd_mov_immediate): New function. + * config/aarch64/aarch64-protos.h: Add prototype for above. + + 2013-06-04 Ian Bolton + + * config/aarch64/aarch64.c (simd_immediate_info): Remove + element_char member. + (sizetochar): Return signed char. + (aarch64_simd_valid_immediate): Remove elchar and other + unnecessary variables. + (aarch64_output_simd_mov_immediate): Take rtx instead of &rtx. + Calculate element_char as required. + * config/aarch64/aarch64-protos.h: Update and move prototype + for aarch64_output_simd_mov_immediate. + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): + Update arguments. + + 2013-06-04 Ian Bolton + + * config/aarch64/aarch64.c (simd_immediate_info): Struct to hold + information completed by aarch64_simd_valid_immediate. + (aarch64_legitimate_constant_p): Update arguments. + (aarch64_simd_valid_immediate): Work with struct rather than many + pointers. + (aarch64_simd_scalar_immediate_valid_for_move): Update arguments. + (aarch64_simd_make_constant): Update arguments. + (aarch64_output_simd_mov_immediate): Work with struct rather than + many pointers. Output immediate directly rather than as operand. + * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate): + Update prototype. + * config/aarch64/constraints.md (Dn): Update arguments. + + 2013-06-04 Ian Bolton + + * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): No + longer static. + (aarch64_simd_immediate_valid_for_move): Remove. + (aarch64_simd_scalar_immediate_valid_for_move): Update call. + (aarch64_simd_make_constant): Update call. + (aarch64_output_simd_mov_immediate): Update call. + * config/aarch64/aarch64-protos.h (aarch64_simd_valid_immediate): + Add prototype. + * config/aarch64/constraints.md (Dn): Update call. + + 2013-06-04 Ian Bolton + + * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Change + return type to bool for prototype. + (aarch64_legitimate_constant_p): Check for true instead of not -1. + (aarch64_simd_valid_immediate): Fix up each return to return a bool. + (aarch64_simd_immediate_valid_for_move): Update retval for bool. + +2013-06-04 Christophe Lyon + + Backport from trunk r199261. + 2013-05-23 Christian Bruel + + PR debug/57351 + * config/arm/arm.c (arm_dwarf_register_span): Do not use dbx number. + +2013-06-03 Christophe Lyon + + Backport from trunk + r198890,199254,199259,199260,199293,199407,199408,199454,199544,199545. + + 2013-05-31 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): + Remove un-necessary braces. + + 2013-05-31 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_classify_symbol): + Use SYMBOL_TINY_ABSOLUTE for AARCH64_CMODEL_TINY_PIC. + + 2013-05-30 Ian Bolton + + * config/aarch64/aarch64.md (insv): New define_expand. + (*insv_reg): New define_insn. + + 2012-05-29 Chris Schlumberger-Socha + Marcus Shawcroft + + * config/aarch64/aarch64-protos.h (aarch64_symbol_type): Define + SYMBOL_TINY_ABSOLUTE. + * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Handle + SYMBOL_TINY_ABSOLUTE. + (aarch64_expand_mov_immediate): Likewise. + (aarch64_classify_symbol): Likewise. + (aarch64_mov_operand_p): Remove ATTRIBUTE_UNUSED. + Permit SYMBOL_TINY_ABSOLUTE. + * config/aarch64/predicates.md (aarch64_mov_operand): Permit CONST. + + 2013-05-29 Chris Schlumberger-Socha + Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_classify_symbol): Remove comment. + Refactor if/switch. Replace gcc_assert with if. + + 2013-05-24 Ian Bolton + + * config/aarch64/aarch64.c (aarch64_print_operand): Change the + X format specifier to only display bottom 16 bits. + * config/aarch64/aarch64.md (insv_imm): Allow any size of + immediate to match for operand 2, since it will be masked. + + 2013-05-23 Chris Schlumberger-Socha + Marcus Shawcroft + + * config/aarch64/aarch64.md (*movdi_aarch64): Replace Usa with S. + * config/aarch64/constraints.md (Usa): Remove. + * doc/md.texi (AArch64 Usa): Remove. + + 2013-05-23 Chris Schlumberger-Socha + Marcus Shawcroft + + * config/aarch64/aarch64-protos.h (aarch64_mov_operand_p): Define. + * config/aarch64/aarch64.c (aarch64_mov_operand_p): Define. + * config/aarch64/predicates.md (aarch64_const_address): Remove. + (aarch64_mov_operand): Use aarch64_mov_operand_p. + + 2013-05-23 Vidya Praveen + + * config/aarch64/aarch64-simd.md (clzv4si2): Support for CLZ + instruction (AdvSIMD). + * config/aarch64/aarch64-builtins.c + (aarch64_builtin_vectorized_function): Handler for BUILT_IN_CLZ. + * config/aarch64/aarch-simd-builtins.def: Entry for CLZ. + + 2013-05-14 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_vcond_internal): Rename to... + (aarch64_vcond_internal): ...This, for integer modes. + (aarch64_vcond_internal): ...This for + float modes. Clarify all iterator modes. + (vcond): Use new name for vcond expanders. + (vcond): Likewise. + (vcondu: Likewise. + * config/aarch64/iterators.md (VDQF_COND): New. + +2013-05-29 Christophe Lyon + + Backport from trunk r198928,198973,199203. + 2013-05-22 Ramana Radhakrishnan + + PR target/19599 + PR target/57340 + * config/arm/arm.c (any_sibcall_uses_r3): Rename to .. + (any_sibcall_could_use_r3): this and handle indirect calls. + (arm_get_frame_offsets): Rename use of any_sibcall_uses_r3. + + 2013-05-16 Ramana Radhakrishnan + + PR target/19599 + * config/arm/arm.c (arm_function_ok_for_sibcall): Add check + for NULL decl. + + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * config/arm/predicates.md (call_insn_operand): New predicate. + * config/arm/constraints.md ("Cs", "Ss"): New constraints. + * config/arm/arm.md (*call_insn, *call_value_insn): Match only + if insn is not a tail call. + (*sibcall_insn, *sibcall_value_insn): Adjust for tailcalling through + registers. + * config/arm/arm.h (enum reg_class): New caller save register class. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/arm/arm.c (arm_function_ok_for_sibcall): Allow tailcalling + without decls. + +2013-05-28 Christophe Lyon + + Backport from trunk r198680. + 2013-05-07 Sofiane Naci + + * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): call splitter. + (aarch64_simd_mov): New expander. + (aarch64_simd_mov_to_low): New instruction pattern. + (aarch64_simd_mov_to_high): Likewise. + (aarch64_simd_mov_from_low): Likewise. + (aarch64_simd_mov_from_high): Likewise. + (aarch64_dup_lane): Update. + (aarch64_dup_lanedi): New instruction pattern. + * config/aarch64/aarch64-protos.h (aarch64_split_simd_move): New prototype. + * config/aarch64/aarch64.c (aarch64_split_simd_move): New function. + +2013-05-28 Christophe Lyon + + Backport from trunk r198497-198500. + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_. + * config/aarch64/aarch64-simd-builtins.def + (reduc_splus_): Add new modes. + (reduc_uplus_): New. + * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove. + (reduc_uplus_v4sf): Likewise. + (reduc_splus_v4sf): Likewise. + (aarch64_addv): Likewise. + (reduc_uplus_): Likewise. + (reduc_splus_): Likewise. + (aarch64_addvv2di): Likewise. + (reduc_uplus_v2di): Likewise. + (reduc_splus_v2di): Likewise. + (aarch64_addvv2si): Likewise. + (reduc_uplus_v2si): Likewise. + (reduc_splus_v2si): Likewise. + (reduc_plus_): New. + (reduc_plus_v2di): Likewise. + (reduc_plus_v2si): Likewise. + (reduc_plus_v4sf): Likewise. + (aarch64_addpv4sf): Likewise. + * config/aarch64/arm_neon.h + (vaddv_<8, 16, 32, 64): Rewrite using builtins. + * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV, + add UNSPEC_SADDV, UNSPEC_UADDV. + (SUADDV): New. + (sur): Add UNSPEC_SADDV, UNSPEC_UADDV. + + 2013-05-01 James Greenhalgh + + * config/aarch64/arm_neon.h + (v_<8, 16, 32, 64>): Rewrite using builtins. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-builtins + (aarch64_gimple_fold_builtin): Fold reduc__ builtins. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def + (reduc_smax_): New. + (reduc_smin_): Likewise. + (reduc_umax_): Likewise. + (reduc_umin_): Likewise. + (reduc_smax_nan_): Likewise. + (reduc_smin_nan_): Likewise. + (fmax): Remove. + (fmin): Likewise. + (smax): Update for V2SF, V4SF and V2DF modes. + (smin): Likewise. + (smax_nan): New. + (smin_nan): Likewise. + * config/aarch64/aarch64-simd.md (3): Rename to... + (3): ...This, refactor. + (s3): New. + (3): Likewise. + (reduc__): Refactor. + (reduc__v4sf): Likewise. + (reduc__v2si): Likewise. + (aarch64_: Remove. + * config/aarch64/arm_neon.h (vmax_f<32,64>): Rewrite to use + new builtin names. + (vmin_f<32,64>): Likewise. + * config/iterators.md (unspec): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV. + (FMAXMIN): New. + (su): Add mappings for smax, smin, umax, umin. + (maxmin): New. + (FMAXMINV): Add UNSPEC_FMAXNMV, UNSPEC_FMINNMV. + (FMAXMIN): Rename as... + (FMAXMIN_UNS): ...This. + (maxminv): Remove. + (fmaxminv): Likewise. + (fmaxmin): Likewise. + (maxmin_uns): New. + (maxmin_uns_op): Likewise. + +2013-05-28 Christophe Lyon + + Backport from trunk r199241. + 2013-05-23 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_cmdi): Add clobber of CC_REGNUM to unsplit pattern. + +2013-05-23 Christophe Lyon + + Backport from trunk r198970. + 2013-05-16 Greta Yorsh + + * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration. + * config/arm/arm.c (next_consecutive_mem): New function. + (gen_movmem_ldrd_strd): Likewise. + * config/arm/arm.md (movmemqi): Update condition and code. + (unaligned_loaddi, unaligned_storedi): New patterns. + +2013-05-19 Matthew Gretton-Dann + + * LINARO-VERSION: Bump version number. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + * LINARO-VERSION: Update. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198677. + 2013-05-07 Naveen H.S + + * config/aarch64/aarch64.md + (cmp_swp__shft_): Restrict the + shift value between 0-4. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198574-198575. + 2013-05-03 Vidya Praveen + + * config/aarch64/aarch64-simd.md (simd_fabd): Correct the description. + + 2013-05-03 Vidya Praveen + + * config/aarch64/aarch64-simd.md (*fabd_scalar3): Support + scalar form of FABD instruction. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198490-198496 + 2013-05-01 James Greenhalgh + + * config/aarch64/arm_neon.h + (vac_f<32, 64>): Rename to... + (vca_f<32, 64>): ...this, reimpliment in C. + (vca_f<32, 64>): Reimpliment in C. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-simd.md (*aarch64_fac): New. + * config/aarch64/iterators.md (FAC_COMPARISONS): New. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (vcond_internal): Handle special cases for constant masks. + (vcond): Allow nonmemory_operands for outcome vectors. + (vcondu): Likewise. + (vcond): New. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-builtins.c (BUILTIN_VALLDI): Define. + (aarch64_fold_builtin): Add folding for cm. + * config/aarch64/aarch64-simd-builtins.def + (cmeq): Update to BUILTIN_VALLDI. + (cmgt): Likewise. + (cmge): Likewise. + (cmle): Likewise. + (cmlt): Likewise. + * config/aarch64/arm_neon.h + (vc_<8,16,32,64>): Remap + to builtins or C as appropriate. + + 2013-05-01 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def (cmhs): Rename to... + (cmgeu): ...This. + (cmhi): Rename to... + (cmgtu): ...This. + * config/aarch64/aarch64-simd.md + (simd_mode): Add SF. + (aarch64_vcond_internal): Use new names for unsigned comparison insns. + (aarch64_cm): Rewrite to not use UNSPECs. + * config/aarch64/aarch64.md (*cstore_neg): Rename to... + (cstore_neg): ...This. + * config/aarch64/iterators.md + (VALLF): new. + (unspec): Remove UNSPEC_CM. + (COMPARISONS): New. + (UCOMPARISONS): Likewise. + (optab): Add missing comparisons. + (n_optab): New. + (cmp_1): Likewise. + (cmp_2): Likewise. + (CMP): Likewise. + (cmp): Remove. + (VCMP_S): Likewise. + (VCMP_U): Likewise. + (V_cmp_result): Add DF, SF modes. + (v_cmp_result): Likewise. + (v): Likewise. + (vmtype): Likewise. + * config/aarch64/predicates.md (aarch64_reg_or_fp_zero): New. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198191. + 2013-04-23 Sofiane Naci + + * config/aarch64/aarch64.md (*mov_aarch64): Add simd attribute. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r197838. + 2013-04-11 Naveen H.S + + * config/aarch64/aarch64.c (aarch64_select_cc_mode): Allow NEG + code in CC_NZ mode. + * config/aarch64/aarch64.md (*neg_3_compare0): New + pattern. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198019. + 2013-04-16 Naveen H.S + + * config/aarch64/aarch64.md (*adds_mul_imm_): New pattern. + (*subs_mul_imm_): New pattern. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198424-198425. + 2013-04-29 Ian Bolton + + * config/aarch64/aarch64.md (movsi_aarch64): Support LDR/STR + from/to S register. + (movdi_aarch64): Support LDR/STR from/to D register. + + 2013-04-29 Ian Bolton + + * common/config/aarch64/aarch64-common.c: Enable REE pass at O2 + or higher by default. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198412. + 2013-04-29 Kyrylo Tkachov + + * config/arm/arm.md (store_minmaxsi): Use only when + optimize_insn_for_size_p. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk 198394,198396-198400,198402-198404. + 2013-04-29 James Greenhalgh + + * config/aarch64/arm_neon.h + (vcvt_f<32,64>_s<32,64>): Rewrite in C. + (vcvt_f<32,64>_s<32,64>): Rewrite using builtins. + (vcvt__f<32,64>_f<32,64>): Likewise. + (vcvt_<32,64>_f<32,64>): Likewise. + (vcvta_<32,64>_f<32,64>): Likewise. + (vcvtm_<32,64>_f<32,64>): Likewise. + (vcvtn_<32,64>_f<32,64>): Likewise. + (vcvtp_<32,64>_f<32,64>): Likewise. + + 2013-04-29 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (2): New, maps to fix, fixuns. + (2): New, maps to + fix_trunc, fixuns_trunc. + (ftrunc2): New. + * config/aarch64/iterators.md (optab): Add fix, fixuns. + (fix_trunc_optab): New. + + 2013-04-29 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_builtin_vectorized_function): Vectorize over ifloorf, + iceilf, lround, iroundf. + + 2013-04-29 James Greenhalgh + + * config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi_): New. + (float_truncate_hi_): Likewise. + (float_extend_lo_): Likewise. + (float_truncate_lo_): Likewise. + * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): New. + (aarch64_float_extend_lo_v2df): Likewise. + (vec_unpacks_hi_v4sf): Likewise. + (aarch64_float_truncate_lo_v2sf): Likewise. + (aarch64_float_truncate_hi_v4sf): Likewise. + (vec_pack_trunc_v2df): Likewise. + (vec_pack_trunc_df): Likewise. + + 2013-04-29 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_fold_builtin): Fold float conversions. + * config/aarch64/aarch64-simd-builtins.def + (floatv2si, floatv4si, floatv2di): New. + (floatunsv2si, floatunsv4si, floatunsv2di): Likewise. + * config/aarch64/aarch64-simd.md + (2): New, expands to float and floatuns. + * config/aarch64/iterators.md (FLOATUORS): New. + (optab): Add float, floatuns. + (su_optab): Likewise. + + 2013-04-29 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_builtin_vectorized_function): Fold to standard pattern names. + * config/aarch64/aarch64-simd-builtins.def (frintn): New. + (frintz): Rename to... + (btrunc): ...this. + (frintp): Rename to... + (ceil): ...this. + (frintm): Rename to... + (floor): ...this. + (frinti): Rename to... + (nearbyint): ...this. + (frintx): Rename to... + (rint): ...this. + (frinta): Rename to... + (round): ...this. + * config/aarch64/aarch64-simd.md + (aarch64_frint): Delete. + (2): Convert to insn. + * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRINTN. + * config/aarch64/iterators.md (FRINT): Add UNSPEC_FRINTN. + (frint_pattern): Likewise. + (frint_suffix): Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198302-198306,198316. + 2013-04-25 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_simd_bsl_internal): Rewrite RTL to not use UNSPEC_BSL. + (aarch64_simd_bsl): Likewise. + * config/aarch64/iterators.md (unspec): Remove UNSPEC_BSL. + + 2013-04-25 James Greenhalgh + + * config/aarch64/aarch64-simd.md (neg2): Use VDQ iterator. + + 2013-04-25 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_fold_builtin): New. + * config/aarch64/aarch64-protos.h (aarch64_fold_builtin): New. + * config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define. + * config/aarch64/aarch64-simd-builtins.def (abs): New. + * config/aarch64/arm_neon.h + (vabs_): Implement using __builtin_aarch64_fabs. + + 2013-04-25 James Greenhalgh + Tejas Belagod + + * config/aarch64/aarch64-builtins.c + (aarch64_gimple_fold_builtin): New. + * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New. + * config/aarch64/aarch64-simd-builtins.def (addv): New. + * config/aarch64/aarch64-simd.md (addpv4sf): New. + (addvv4sf): Update. + * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define. + + 2013-04-25 Naveen H.S + + * config/aarch64/aarch64.md + (*cmp_swp__shft_): New pattern. + + 2013-04-25 Naveen H.S + + * config/aarch64/aarch64.md (*ngc): New pattern. + (*ngcsi_uxtw): New pattern. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk 198298. + 2013-04-25 Kyrylo Tkachov + Julian Brown + + * config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF. + (TB_DREG): Add T_V4HF. + (v4hf_UP): New macro. + (neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW. + (arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN, + NEON_FLOAT_NARROW. + Handle initialisation of V4HF. Adjust initialisation of reinterpret + built-ins. + (arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN, + NEON_FLOAT_NARROW. + (arm_vector_mode_supported_p): Handle V4HF. + (arm_mangle_map): Handle V4HFmode. + * config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF. + * config/arm/arm_neon_builtins.def: Add entries for + vcvtv4hfv4sf, vcvtv4sfv4hf. + * config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern. + (neon_vcvtv4hfv4sf): Likewise. + * config/arm/neon-gen.ml: Handle half-precision floating point + features. + * config/arm/neon-testgen.ml: Handle Requires_FP_bit feature. + * config/arm/arm_neon.h: Regenerate. + * config/arm/neon.ml (type elts): Add F16. + (type vectype): Add T_float16x4, T_floatHF. + (type vecmode): Add V4HF. + (type features): Add Requires_FP_bit feature. + (elt_width): Handle F16. + (elt_class): Likewise. + (elt_of_class_width): Likewise. + (mode_of_elt): Refactor. + (type_for_elt): Handle F16, fix error messages. + (vectype_size): Handle T_float16x4. + (vcvt_sh): New function. + (ops): Add entries for vcvt_f16_f32, vcvt_f32_f16. + (string_of_vectype): Handle T_floatHF, T_float16, T_float16x4. + (string_of_mode): Handle V4HF. + * doc/arm-neon-intrinsics.texi: Regenerate. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198136-198137,198142,198176. + 2013-04-23 Andreas Schwab + + * coretypes.h (gimple_stmt_iterator): Add struct to make + compatible with C. + + 2013-04-22 James Greenhalgh + + * coretypes.h (gimple_stmt_iterator_d): Forward declare. + (gimple_stmt_iterator): New typedef. + * gimple.h (gimple_stmt_iterator): Rename to... + (gimple_stmt_iterator_d): ... This. + * doc/tm.texi.in (TARGET_FOLD_BUILTIN): Detail restriction that + trees be valid for GIMPLE and GENERIC. + (TARGET_GIMPLE_FOLD_BUILTIN): New. + * gimple-fold.c (gimple_fold_call): Call target hook + gimple_fold_builtin. + * hooks.c (hook_bool_gsiptr_false): New. + * hooks.h (hook_bool_gsiptr_false): New. + * target.def (fold_stmt): New. + * doc/tm.texi: Regenerate. + + 2013-04-22 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (CF): Remove. + (CF0, CF1, CF2, CF3, CF4, CF10): New. + (VAR<1-12>): Add MAP parameter. + (BUILTIN_*): Likewise. + * config/aarch64/aarch64-simd-builtins.def: Set MAP parameter. + * config/aarch64/aarch64-simd.md (aarch64_sshl_n): Remove. + (aarch64_ushl_n): Likewise. + (aarch64_sshr_n): Likewise. + (aarch64_ushr_n): Likewise. + (aarch64_): Likewise. + (aarch64_sqrt): Likewise. + * config/aarch64/arm_neon.h (vshl_n_*): Use new builtin names. + (vshr_n_*): Likewise. + + 2013-04-22 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_simd_builtin_type_mode): Handle SF types. + (sf_UP): Define. + (BUILTIN_GPF): Define. + (aarch64_init_simd_builtins): Handle SF types. + * config/aarch64/aarch64-simd-builtins.def (frecpe): Add support. + (frecps): Likewise. + (frecpx): Likewise. + * config/aarch64/aarch64-simd.md + (simd_types): Update simd_frcp to simd_frecp. + (aarch64_frecpe): New. + (aarch64_frecps): Likewise. + * config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP. + (v8type): Add frecp. + (aarch64_frecp): New. + (aarch64_frecps): Likewise. + * config/aarch64/iterators.md (FRECP): New. + (frecp_suffix): Likewise. + * config/aarch64/arm_neon.h + (vrecp_<32, 64>): Convert to using builtins. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198030. + 2013-04-17 Greta Yorsh + + * config/arm/arm.md (movsicc_insn): Convert define_insn into + define_insn_and_split. + (and_scc,ior_scc,negscc): Likewise. + (cmpsi2_addneg, subsi3_compare): Convert to named patterns. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198020. + 2013-04-16 Naveen H.S + + * config/aarch64/aarch64.md (*adds__multp2): + New pattern. + (*subs__multp2): New pattern. + (*adds__): New pattern. + (*subs__): New pattern. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198004,198029. + 2013-04-17 Greta Yorsh + + * config/arm/arm.c (use_return_insn): Return 0 for targets that + can benefit from using a sequence of LDRD instructions in epilogue + instead of a single LDM instruction. + + 2013-04-16 Greta Yorsh + + * config/arm/arm.c (emit_multi_reg_push): New declaration + for an existing function. + (arm_emit_strd_push): New function. + (arm_expand_prologue): Used here. + (arm_emit_ldrd_pop): New function. + (arm_expand_epilogue): Used here. + (arm_get_frame_offsets): Update condition. + (arm_emit_multi_reg_pop): Add a special case for load of a single + register with writeback. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197965. + 2013-04-15 Kyrylo Tkachov + + * config/arm/arm.c (const_ok_for_dimode_op): Handle AND case. + * config/arm/arm.md (*anddi3_insn): Change to insn_and_split. + * config/arm/constraints.md (De): New constraint. + * config/arm/neon.md (anddi3_neon): Delete. + (neon_vand): Expand to standard anddi3 pattern. + * config/arm/predicates.md (imm_for_neon_inv_logic_operand): + Move earlier in the file. + (neon_inv_logic_op2): Likewise. + (arm_anddi_operand_neon): New predicate. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197925. + 2013-04-12 Greta Yorsh + + * config/arm/arm.md (mov_scc,mov_negscc,mov_notscc): Convert + define_insn into define_insn_and_split and emit movsicc patterns. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197807. + 2013-04-11 Naveen H.S + + * config/aarch64/aarch64.h (REVERSIBLE_CC_MODE): Define. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197642. + 2013-04-09 Kyrylo Tkachov + + * config/arm/arm.md (minmax_arithsi_non_canon): New pattern. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197530,197921. + 2013-04-12 Greta Yorsh + + * config/arm/arm.c (gen_operands_ldrd_strd): Initialize "base". + + 2013-04-05 Greta Yorsh + + * config/arm/constraints.md (q): New constraint. + * config/arm/ldrdstrd.md: New file. + * config/arm/arm.md (ldrdstrd.md) New include. + (arm_movdi): Use "q" instead of "r" constraint + for double-word memory access. + (movdf_soft_insn): Likewise. + * config/arm/vfp.md (movdi_vfp): Likewise. + * config/arm/t-arm (MD_INCLUDES): Add ldrdstrd.md. + * config/arm/arm-protos.h (gen_operands_ldrd_strd): New declaration. + * config/arm/arm.c (gen_operands_ldrd_strd): New function. + (mem_ok_for_ldrd_strd): Likewise. + (output_move_double): Update assertion. + +2013-05-02 Matthew Gretton-Dann + + Backport of trunk r197518-197522,197526-197528. + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (arm_smax_insn): Convert define_insn into + define_insn_and_split. + (arm_smin_insn,arm_umaxsi3,arm_uminsi3): Likewise. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (arm_ashldi3_1bit): Convert define_insn into + define_insn_and_split. + (arm_ashrdi3_1bit,arm_lshrdi3_1bit): Likewise. + (shiftsi3_compare): New pattern. + (rrx): New pattern. + * config/arm/unspecs.md (UNSPEC_RRX): New. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (negdi_extendsidi): New pattern. + (negdi_zero_extendsidi): Likewise. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (andsi_iorsi3_notsi): Convert define_insn into + define_insn_and_split. + (arm_negdi2,arm_abssi2,arm_neg_abssi2): Likewise. + (arm_cmpdi_insn,arm_cmpdi_unsigned): Likewise. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (arm_subdi3): Convert define_insn into + define_insn_and_split. + (subdi_di_zesidi,subdi_di_sesidi): Likewise. + (subdi_zesidi_di,subdi_sesidi_di,subdi_zesidi_zesidi): Likewise. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (subsi3_carryin): New pattern. + (subsi3_carryin_const): Likewise. + (subsi3_carryin_compare,subsi3_carryin_compare_const): Likewise. + (subsi3_carryin_shift,rsbsi3_carryin_shift): Likewise. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (incscc,arm_incscc,decscc,arm_decscc): Delete. + + 2013-04-05 Greta Yorsh + + * config/arm/arm.md (addsi3_carryin_): Set attribute predicable. + (addsi3_carryin_alt2_,addsi3_carryin_shift_): Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport of trunk r197517. + 2013-04-05 Kyrylo Tkachov + + * config/arm/arm.c (arm_expand_builtin): Change fcode + type to unsigned int. + +2013-05-02 Matthew Gretton-Dann + + Backport of trunk r197513. + 2013-04-05 Ramana Radhakrishnan + + * doc/invoke.texi (ARM Options): Document cortex-a53 support. + +2013-05-02 Matthew Gretton-Dann + + Backport of trunk r197489-197491. + 2013-04-04 Kyrylo Tkachov + + * config/arm/arm-protos.h (arm_builtin_vectorized_function): + New function prototype. + * config/arm/arm.c (TARGET_VECTORIZE_BUILTINS): Define. + (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Likewise. + (arm_builtin_vectorized_function): New function. + + 2013-04-04 Kyrylo Tkachov + + * config/arm/arm_neon_builtins.def: New file. + * config/arm/arm.c (neon_builtin_data): Move contents to + arm_neon_builtins.def. + (enum arm_builtins): Include neon builtin definitions. + (ARM_BUILTIN_NEON_BASE): Move from enum to macro. + * config/arm/t-arm (arm.o): Add dependency on + arm_neon_builtins.def. + +2013-05-02 Matthew Gretton-Dann + + Backport of trunk 196795-196797,196957 + 2013-03-19 Ian Bolton + + * config/aarch64/aarch64.md (*sub3_carryin): New pattern. + (*subsi3_carryin_uxtw): Likewise. + + 2013-03-19 Ian Bolton + + * config/aarch64/aarch64.md (*ror3_insn): New pattern. + (*rorsi3_insn_uxtw): Likewise. + + 2013-03-19 Ian Bolton + + * config/aarch64/aarch64.md (*extr5_insn): New pattern. + (*extrsi5_insn_uxtw): Likewise. + +2013-04-10 Matthew Gretton-Dann + + * LINARO-VERSION: Bump version number. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. + * LINARO-VERSION: New file. + * configure.ac: Add Linaro version string. + * configure: Regenerate. + +2013-04-08 Matthew Gretton-Dann + + Backport of trunk r197346. + 2013-04-02 Ian Caulfield + Ramana Radhakrishnan + + * config/arm/arm-arches.def (armv8-a): Default to cortex-a53. + * config/arm/t-arm (MD_INCLUDES): Depend on cortex-a53.md. + * config/arm/cortex-a53.md: New file. + * config/arm/bpabi.h (BE8_LINK_SPEC): Handle cortex-a53. + * config/arm/arm.md (generic_sched, generic_vfp): Handle cortex-a53. + * config/arm/arm.c (arm_issue_rate): Likewise. + * config/arm/arm-tune.md: Regenerate + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-cores.def: Add cortex-a53. + +2013-04-08 Matthew Gretton-Dann + + Backport of trunk r197342. + 2013-04-02 Sofiane Naci + + * config/aarch64/aarch64.md (*mov_aarch64): Add variants for + scalar load/store operations using B/H registers. + (*zero_extend2_aarch64): Likewise. + +2013-04-08 Matthew Gretton-Dann + + Backport of trunk r197341. + 2013-04-02 Sofiane Naci + + * config/aarch64/aarch64.md (*mov_aarch64): Add alternatives for + scalar move. + * config/aarch64/aarch64.c + (aarch64_simd_scalar_immediate_valid_for_move): New. + * config/aarch64/aarch64-protos.h + (aarch64_simd_scalar_immediate_valid_for_move): New. + * config/aarch64/constraints.md (Dh, Dq): New. + * config/aarch64/iterators.md (hq): New. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197207. + 2013-03-28 Naveen H.S + + * config/aarch64/aarch64.md (*and3_compare0): New pattern. + (*andsi3_compare0_uxtw): New pattern. + (*and_3_compare0): New pattern. + (*and_si3_compare0_uxtw): New pattern. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197153. + 2013-03-27 Terry Guo + + * config/arm/arm-cores.def: Added core cortex-r7. + * config/arm/arm-tune.md: Regenerated. + * config/arm/arm-tables.opt: Regenerated. + * doc/invoke.texi: Added entry for core cortex-r7. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197052. + 2013-03-25 Kyrylo Tkachov + + * config/arm/arm.md (f_sels, f_seld): New types. + (*cmov): New pattern. + * config/arm/predicates.md (arm_vsel_comparison_operator): New + predicate. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197046. + 2013-03-25 Kyrylo Tkachov + + * config/arm/arm.c (arm_emit_load_exclusive): Add acq parameter. + Emit load-acquire versions when acq is true. + (arm_emit_store_exclusive): Add rel parameter. + Emit store-release versions when rel is true. + (arm_split_compare_and_swap): Use acquire-release instructions + instead. + of barriers when appropriate. + (arm_split_atomic_op): Likewise. + * config/arm/arm.h (TARGET_HAVE_LDACQ): New macro. + * config/arm/unspecs.md (VUNSPEC_LAX): New unspec. + (VUNSPEC_SLX): Likewise. + (VUNSPEC_LDA): Likewise. + (VUNSPEC_STL): Likewise. + * config/arm/sync.md (atomic_load): New pattern. + (atomic_store): Likewise. + (arm_load_acquire_exclusive): Likewise. + (arm_load_acquire_exclusivesi): Likewise. + (arm_load_acquire_exclusivedi): Likewise. + (arm_store_release_exclusive): Likewise. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r196876. + 2013-03-21 Christophe Lyon + + * config/arm/arm-protos.h (tune_params): Add + prefer_neon_for_64bits field. + * config/arm/arm.c (prefer_neon_for_64bits): New variable. + (arm_slowmul_tune): Default prefer_neon_for_64bits to false. + (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto. + (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto. + (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto. + (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto. + (arm_option_override): Handle -mneon-for-64bits new option. + * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro. + (prefer_neon_for_64bits): Declare new variable. + * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to + avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and + nota8. + (arch_enabled): Handle new arch types. Remove support for onlya8 + and nota8. + (one_cmpldi2): Use new arch names. + * config/arm/arm.opt (mneon-for-64bits): Add option. + * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon) + (anddi3_neon, xordi3_neon, ashldi3_neon, di3_neon): Use + neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead + of onlya8. + * doc/invoke.texi (-mneon-for-64bits): Document. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r196858. + 2013-03-21 Naveen H.S + + * config/aarch64/aarch64-simd.md (simd_fabd): New Attribute. + (abd_3): New pattern. + (aba_3): New pattern. + (fabd_3): New pattern. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r196856. + 2013-03-21 Naveen H.S + + * config/aarch64/aarch64-elf.h (REGISTER_PREFIX): Remove. + * config/aarch64/aarch64.c (aarch64_print_operand): Remove all + occurrence of REGISTER_PREFIX as its empty string. --- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c +++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (float *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_floorf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_floorf } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vaesdq_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vaesdq_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint8x16_t a, b, c; + int i = 0; + + for (i = 0; i < 16; ++i) + { + a[i] = i; + b[i] = 15 - i; + } + c = vaesdq_u8 (a, b); + return c[0]; +} + +/* { dg-final { scan-assembler "aesd.8\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha256su0q_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha256su0q_u32.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + + uint32x4_t res = vsha256su0q_u32 (a, b); + return res[0]; +} + +/* { dg-final { scan-assembler "sha256su0.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1p64.c @@ -0,0 +1,19 @@ +/* Test the `vld1p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1p64 (void) +{ + poly64x1_t out_poly64x1_t; + + out_poly64x1_t = vld1_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst4p64.c @@ -0,0 +1,20 @@ +/* Test the `vst4p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst4p64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x1x4_t arg1_poly64x1x4_t; + + vst4_p64 (arg0_poly64_t, arg1_poly64x1x4_t); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_u8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_u8 (void) +{ + poly128_t out_poly128_t; + uint8x16_t arg0_uint8x16_t; + + out_poly128_t = vreinterpretq_p128_u8 (arg0_uint8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_s32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_s32 (void) +{ + poly128_t out_poly128_t; + int32x4_t arg0_int32x4_t; + + out_poly128_t = vreinterpretq_p128_s32 (arg0_int32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_u8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_u8 (void) +{ + poly64x2_t out_poly64x2_t; + uint8x16_t arg0_uint8x16_t; + + out_poly64x2_t = vreinterpretq_p64_u8 (arg0_uint8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp8_p64 (void) +{ + poly8x16_t out_poly8x16_t; + poly64x2_t arg0_poly64x2_t; + + out_poly8x16_t = vreinterpretq_p8_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_s16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_s16 (void) +{ + poly128_t out_poly128_t; + int16x8_t arg0_int16x8_t; + + out_poly128_t = vreinterpretq_p128_s16 (arg0_int16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2p64.c @@ -0,0 +1,19 @@ +/* Test the `vld2p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld2p64 (void) +{ + poly64x1x2_t out_poly64x1x2_t; + + out_poly64x1x2_t = vld2_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_u64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_u64 (void) +{ + poly64x2_t out_poly64x2_t; + uint64x2_t arg0_uint64x2_t; + + out_poly64x2_t = vreinterpretq_p64_u64 (arg0_uint64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp64.c @@ -0,0 +1,19 @@ +/* Test the `vld4_dupp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld4_dupp64 (void) +{ + poly64x1x4_t out_poly64x1x4_t; + + out_poly64x1x4_t = vld4_dup_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu8_p64 (void) +{ + uint8x16_t out_uint8x16_t; + poly64x2_t arg0_poly64x2_t; + + out_uint8x16_t = vreinterpretq_u8_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_p16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_p16 (void) +{ + poly64x2_t out_poly64x2_t; + poly16x8_t arg0_poly16x8_t; + + out_poly64x2_t = vreinterpretq_p64_p16 (arg0_poly16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_np64.c @@ -0,0 +1,19 @@ +/* Test the `vdupQ_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vdupQ_np64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64_t arg0_poly64_t; + + out_poly64x2_t = vdupq_n_p64 (arg0_poly64_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs32_p64 (void) +{ + int32x4_t out_int32x4_t; + poly64x2_t arg0_poly64x2_t; + + out_int32x4_t = vreinterpretq_s32_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_u32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_u32 (void) +{ + poly128_t out_poly128_t; + uint32x4_t arg0_uint32x4_t; + + out_poly128_t = vreinterpretq_p128_u32 (arg0_uint32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs64_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs64_p64 (void) +{ + int64x2_t out_int64x2_t; + poly64x2_t arg0_poly64x2_t; + + out_int64x2_t = vreinterpretq_s64_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3p64.c @@ -0,0 +1,19 @@ +/* Test the `vld3p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld3p64 (void) +{ + poly64x1x3_t out_poly64x1x3_t; + + out_poly64x1x3_t = vld3_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu16_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu16_p128 (void) +{ + uint16x8_t out_uint16x8_t; + poly128_t arg0_poly128_t; + + out_uint16x8_t = vreinterpretq_u16_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_u16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_u16 (void) +{ + poly128_t out_poly128_t; + uint16x8_t arg0_uint16x8_t; + + out_poly128_t = vreinterpretq_p128_u16 (arg0_uint16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vcreatep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vcreatep64.c @@ -0,0 +1,19 @@ +/* Test the `vcreatep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vcreatep64 (void) +{ + poly64x1_t out_poly64x1_t; + uint64_t arg0_uint64_t; + + out_poly64x1_t = vcreate_p64 (arg0_uint64_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanep64.c @@ -0,0 +1,19 @@ +/* Test the `vdupQ_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vdupQ_lanep64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x1_t arg0_poly64x1_t; + + out_poly64x2_t = vdupq_lane_p64 (arg0_poly64x1_t, 0); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_f32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_f32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_f32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_f32 (void) +{ + poly128_t out_poly128_t; + float32x4_t arg0_float32x4_t; + + out_poly128_t = vreinterpretq_p128_f32 (arg0_float32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vsri_np64.c @@ -0,0 +1,21 @@ +/* Test the `vsri_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vsri_np64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x1_t arg0_poly64x1_t; + poly64x1_t arg1_poly64x1_t; + + out_poly64x1_t = vsri_n_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 1); +} + +/* { dg-final { scan-assembler "vsri\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep64.c @@ -0,0 +1,20 @@ +/* Test the `vld1_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1_lanep64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x1_t arg1_poly64x1_t; + + out_poly64x1_t = vld1_lane_p64 (0, arg1_poly64x1_t, 0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs8_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs8_p128 (void) +{ + int8x16_t out_int8x16_t; + poly128_t arg0_poly128_t; + + out_int8x16_t = vreinterpretq_s8_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld4p64.c @@ -0,0 +1,19 @@ +/* Test the `vld4p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld4p64 (void) +{ + poly64x1x4_t out_poly64x1x4_t; + + out_poly64x1x4_t = vld4_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_s32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_s32 (void) +{ + poly64x2_t out_poly64x2_t; + int32x4_t arg0_int32x4_t; + + out_poly64x2_t = vreinterpretq_p64_s32 (arg0_int32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep64.c @@ -0,0 +1,20 @@ +/* Test the `vld1Q_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1Q_lanep64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x2_t arg1_poly64x2_t; + + out_poly64x2_t = vld1q_lane_p64 (0, arg1_poly64x2_t, 1); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_p8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_p8 (void) +{ + poly128_t out_poly128_t; + poly8x16_t arg0_poly8x16_t; + + out_poly128_t = vreinterpretq_p128_p8 (arg0_poly8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_p8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_p8 (void) +{ + poly64x2_t out_poly64x2_t; + poly8x16_t arg0_poly8x16_t; + + out_poly64x2_t = vreinterpretq_p64_p8 (arg0_poly8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vget_lowp64.c @@ -0,0 +1,19 @@ +/* Test the `vget_lowp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vget_lowp64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x2_t arg0_poly64x2_t; + + out_poly64x1_t = vget_low_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs64_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs64_p128 (void) +{ + int64x2_t out_int64x2_t; + poly128_t arg0_poly128_t; + + out_int64x2_t = vreinterpretq_s64_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep64.c @@ -0,0 +1,20 @@ +/* Test the `vst1_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1_lanep64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x1_t arg1_poly64x1_t; + + vst1_lane_p64 (arg0_poly64_t, arg1_poly64x1_t, 0); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs16_p64 (void) +{ + int16x8_t out_int16x8_t; + poly64x2_t arg0_poly64x2_t; + + out_int16x8_t = vreinterpretq_s16_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_s16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_s16 (void) +{ + poly64x2_t out_poly64x2_t; + int16x8_t arg0_int16x8_t; + + out_poly64x2_t = vreinterpretq_p64_s16 (arg0_int16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp16_p64 (void) +{ + poly16x4_t out_poly16x4_t; + poly64x1_t arg0_poly64x1_t; + + out_poly16x4_t = vreinterpret_p16_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp64.c @@ -0,0 +1,20 @@ +/* Test the `vst1Qp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1Qp64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x2_t arg1_poly64x2_t; + + vst1q_p64 (arg0_poly64_t, arg1_poly64x2_t); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_s64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_s64 (void) +{ + poly64x1_t out_poly64x1_t; + int64x1_t arg0_int64x1_t; + + out_poly64x1_t = vreinterpret_p64_s64 (arg0_int64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu32_p64 (void) +{ + uint32x4_t out_uint32x4_t; + poly64x2_t arg0_poly64x2_t; + + out_uint32x4_t = vreinterpretq_u32_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_u32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_u32 (void) +{ + poly64x2_t out_poly64x2_t; + uint32x4_t arg0_uint32x4_t; + + out_poly64x2_t = vreinterpretq_p64_u32 (arg0_uint32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vget_highp64.c @@ -0,0 +1,19 @@ +/* Test the `vget_highp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vget_highp64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x2_t arg0_poly64x2_t; + + out_poly64x1_t = vget_high_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu64_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu64_p64 (void) +{ + uint64x2_t out_uint64x2_t; + poly64x2_t arg0_poly64x2_t; + + out_uint64x2_t = vreinterpretq_u64_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_u16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_u16 (void) +{ + poly64x2_t out_poly64x2_t; + uint16x8_t arg0_uint16x8_t; + + out_poly64x2_t = vreinterpretq_p64_u16 (arg0_uint16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c @@ -0,0 +1,20 @@ +/* Test the `vcvtf32_f16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +void test_vcvtf32_f16 (void) +{ + float32x4_t out_float32x4_t; + float16x4_t arg0_float16x4_t; + + out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t); +} + +/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_f32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_f32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_f32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_f32 (void) +{ + poly64x2_t out_poly64x2_t; + float32x4_t arg0_float32x4_t; + + out_poly64x2_t = vreinterpretq_p64_f32 (arg0_float32x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vbslp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vbslp64.c @@ -0,0 +1,22 @@ +/* Test the `vbslp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vbslp64 (void) +{ + poly64x1_t out_poly64x1_t; + uint64x1_t arg0_uint64x1_t; + poly64x1_t arg1_poly64x1_t; + poly64x1_t arg2_poly64x1_t; + + out_poly64x1_t = vbsl_p64 (arg0_uint64x1_t, arg1_poly64x1_t, arg2_poly64x1_t); +} + +/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp16_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp16_p128 (void) +{ + poly16x8_t out_poly16x8_t; + poly128_t arg0_poly128_t; + + out_poly16x8_t = vreinterpretq_p16_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vsli_np64.c @@ -0,0 +1,21 @@ +/* Test the `vsli_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vsli_np64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x1_t arg0_poly64x1_t; + poly64x1_t arg1_poly64x1_t; + + out_poly64x1_t = vsli_n_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 1); +} + +/* { dg-final { scan-assembler "vsli\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c @@ -0,0 +1,19 @@ +/* Test the `vld1Q_dupp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1Q_dupp64 (void) +{ + poly64x2_t out_poly64x2_t; + + out_poly64x2_t = vld1q_dup_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu8_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu8_p128 (void) +{ + uint8x16_t out_uint8x16_t; + poly128_t arg0_poly128_t; + + out_uint8x16_t = vreinterpretq_u8_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQf32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQf32_p64 (void) +{ + float32x4_t out_float32x4_t; + poly64x2_t arg0_poly64x2_t; + + out_float32x4_t = vreinterpretq_f32_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_u64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_u64 (void) +{ + poly64x1_t out_poly64x1_t; + uint64x1_t arg0_uint64x1_t; + + out_poly64x1_t = vreinterpret_p64_u64 (arg0_uint64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanep64.c @@ -0,0 +1,19 @@ +/* Test the `vdup_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vdup_lanep64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x1_t arg0_poly64x1_t; + + out_poly64x1_t = vdup_lane_p64 (arg0_poly64x1_t, 0); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_p16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_p16 (void) +{ + poly64x1_t out_poly64x1_t; + poly16x4_t arg0_poly16x4_t; + + out_poly64x1_t = vreinterpret_p64_p16 (arg0_poly16x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_p64 (void) +{ + poly128_t out_poly128_t; + poly64x2_t arg0_poly64x2_t; + + out_poly128_t = vreinterpretq_p128_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu16_p64 (void) +{ + uint16x8_t out_uint16x8_t; + poly64x2_t arg0_poly64x2_t; + + out_uint16x8_t = vreinterpretq_u16_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterprets32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterprets32_p64 (void) +{ + int32x2_t out_int32x2_t; + poly64x1_t arg0_poly64x1_t; + + out_int32x2_t = vreinterpret_s32_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterprets8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterprets8_p64 (void) +{ + int8x8_t out_int8x8_t; + poly64x1_t arg0_poly64x1_t; + + out_int8x8_t = vreinterpret_s8_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c @@ -0,0 +1,20 @@ +/* Test the `vcvtf16_f32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +void test_vcvtf16_f32 (void) +{ + float16x4_t out_float16x4_t; + float32x4_t arg0_float32x4_t; + + out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t); +} + +/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterprets64_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterprets64_p64 (void) +{ + int64x1_t out_int64x1_t; + poly64x1_t arg0_poly64x1_t; + + out_int64x1_t = vreinterpret_s64_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu64_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu64_p128 (void) +{ + uint64x2_t out_uint64x2_t; + poly128_t arg0_poly128_t; + + out_uint64x2_t = vreinterpretq_u64_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_s8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_s8 (void) +{ + poly64x1_t out_poly64x1_t; + int8x8_t arg0_int8x8_t; + + out_poly64x1_t = vreinterpret_p64_s8 (arg0_int8x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp64.c @@ -0,0 +1,19 @@ +/* Test the `vld1_dupp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1_dupp64 (void) +{ + poly64x1_t out_poly64x1_t; + + out_poly64x1_t = vld1_dup_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_s32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_s32 (void) +{ + poly64x1_t out_poly64x1_t; + int32x2_t arg0_int32x2_t; + + out_poly64x1_t = vreinterpret_p64_s32 (arg0_int32x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_np64.c @@ -0,0 +1,21 @@ +/* Test the `vsriQ_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vsriQ_np64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x2_t arg0_poly64x2_t; + poly64x2_t arg1_poly64x2_t; + + out_poly64x2_t = vsriq_n_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 1); +} + +/* { dg-final { scan-assembler "vsri\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vbslQp64.c @@ -0,0 +1,22 @@ +/* Test the `vbslQp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vbslQp64 (void) +{ + poly64x2_t out_poly64x2_t; + uint64x2_t arg0_uint64x2_t; + poly64x2_t arg1_poly64x2_t; + poly64x2_t arg2_poly64x2_t; + + out_poly64x2_t = vbslq_p64 (arg0_uint64x2_t, arg1_poly64x2_t, arg2_poly64x2_t); +} + +/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs32_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs32_p128 (void) +{ + int32x4_t out_int32x4_t; + poly128_t arg0_poly128_t; + + out_int32x4_t = vreinterpretq_s32_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_u8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_u8 (void) +{ + poly64x1_t out_poly64x1_t; + uint8x8_t arg0_uint8x8_t; + + out_poly64x1_t = vreinterpret_p64_u8 (arg0_uint8x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep64.c @@ -0,0 +1,20 @@ +/* Test the `vst1Q_lanep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1Q_lanep64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x2_t arg1_poly64x2_t; + + vst1q_lane_p64 (arg0_poly64_t, arg1_poly64x2_t, 1); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_s16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_s16 (void) +{ + poly64x1_t out_poly64x1_t; + int16x4_t arg0_int16x4_t; + + out_poly64x1_t = vreinterpret_p64_s16 (arg0_int16x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterprets16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterprets16_p64 (void) +{ + int16x4_t out_int16x4_t; + poly64x1_t arg0_poly64x1_t; + + out_int16x4_t = vreinterpret_s16_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vcombinep64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vcombinep64.c @@ -0,0 +1,20 @@ +/* Test the `vcombinep64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vcombinep64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x1_t arg0_poly64x1_t; + poly64x1_t arg1_poly64x1_t; + + out_poly64x2_t = vcombine_p64 (arg0_poly64x1_t, arg1_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp64.c @@ -0,0 +1,19 @@ +/* Test the `vld1Qp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld1Qp64 (void) +{ + poly64x2_t out_poly64x2_t; + + out_poly64x2_t = vld1q_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_s64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_s64 (void) +{ + poly128_t out_poly128_t; + int64x2_t arg0_int64x2_t; + + out_poly128_t = vreinterpretq_p128_s64 (arg0_int64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp8_p64 (void) +{ + poly8x8_t out_poly8x8_t; + poly64x1_t arg0_poly64x1_t; + + out_poly8x8_t = vreinterpret_p8_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vdup_np64.c @@ -0,0 +1,19 @@ +/* Test the `vdup_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vdup_np64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64_t arg0_poly64_t; + + out_poly64x1_t = vdup_n_p64 (arg0_poly64_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst1p64.c @@ -0,0 +1,20 @@ +/* Test the `vst1p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1p64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x1_t arg1_poly64x1_t; + + vst1_p64 (arg0_poly64_t, arg1_poly64x1_t); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretu8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretu8_p64 (void) +{ + uint8x8_t out_uint8x8_t; + poly64x1_t arg0_poly64x1_t; + + out_uint8x8_t = vreinterpret_u8_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_u32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_u32 (void) +{ + poly64x1_t out_poly64x1_t; + uint32x2_t arg0_uint32x2_t; + + out_poly64x1_t = vreinterpret_p64_u32 (arg0_uint32x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretu32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretu32_p64 (void) +{ + uint32x2_t out_uint32x2_t; + poly64x1_t arg0_poly64x1_t; + + out_uint32x2_t = vreinterpret_u32_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp64.c @@ -0,0 +1,19 @@ +/* Test the `vld2_dupp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld2_dupp64 (void) +{ + poly64x1x2_t out_poly64x1x2_t; + + out_poly64x1x2_t = vld2_dup_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretu64_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretu64_p64 (void) +{ + uint64x1_t out_uint64x1_t; + poly64x1_t arg0_poly64x1_t; + + out_uint64x1_t = vreinterpret_u64_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_np64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_np64.c @@ -0,0 +1,21 @@ +/* Test the `vsliQ_np64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vsliQ_np64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x2_t arg0_poly64x2_t; + poly64x2_t arg1_poly64x2_t; + + out_poly64x2_t = vsliq_n_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 1); +} + +/* { dg-final { scan-assembler "vsli\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_u16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_u16 (void) +{ + poly64x1_t out_poly64x1_t; + uint16x4_t arg0_uint16x4_t; + + out_poly64x1_t = vreinterpret_p64_u16 (arg0_uint16x4_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_u64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_u64 (void) +{ + poly128_t out_poly128_t; + uint64x2_t arg0_uint64x2_t; + + out_poly128_t = vreinterpretq_p128_u64 (arg0_uint64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst2p64.c @@ -0,0 +1,20 @@ +/* Test the `vst2p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst2p64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x1x2_t arg1_poly64x1x2_t; + + vst2_p64 (arg0_poly64_t, arg1_poly64x1x2_t); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp8_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp8_p128 (void) +{ + poly8x16_t out_poly8x16_t; + poly128_t arg0_poly128_t; + + out_poly8x16_t = vreinterpretq_p8_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_f32.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_f32.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_f32' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_f32 (void) +{ + poly64x1_t out_poly64x1_t; + float32x2_t arg0_float32x2_t; + + out_poly64x1_t = vreinterpret_p64_f32 (arg0_float32x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQf32_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQf32_p128 (void) +{ + float32x4_t out_float32x4_t; + poly128_t arg0_poly128_t; + + out_float32x4_t = vreinterpretq_f32_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vextQp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vextQp64.c @@ -0,0 +1,21 @@ +/* Test the `vextQp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vextQp64 (void) +{ + poly64x2_t out_poly64x2_t; + poly64x2_t arg0_poly64x2_t; + poly64x2_t arg1_poly64x2_t; + + out_poly64x2_t = vextq_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 0); +} + +/* { dg-final { scan-assembler "vext\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p16.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p16.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_p16' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_p16 (void) +{ + poly128_t out_poly128_t; + poly16x8_t arg0_poly16x8_t; + + out_poly128_t = vreinterpretq_p128_p16 (arg0_poly16x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_p128 (void) +{ + poly64x2_t out_poly64x2_t; + poly128_t arg0_poly128_t; + + out_poly64x2_t = vreinterpretq_p64_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs16_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs16_p128 (void) +{ + int16x8_t out_int16x8_t; + poly128_t arg0_poly128_t; + + out_int16x8_t = vreinterpretq_s16_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQs8_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQs8_p64 (void) +{ + int8x16_t out_int8x16_t; + poly64x2_t arg0_poly64x2_t; + + out_int8x16_t = vreinterpretq_s8_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vextp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vextp64.c @@ -0,0 +1,21 @@ +/* Test the `vextp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vextp64 (void) +{ + poly64x1_t out_poly64x1_t; + poly64x1_t arg0_poly64x1_t; + poly64x1_t arg1_poly64x1_t; + + out_poly64x1_t = vext_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 0); +} + +/* { dg-final { scan-assembler "vext\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp16_p64 (void) +{ + poly16x8_t out_poly16x8_t; + poly64x2_t arg0_poly64x2_t; + + out_poly16x8_t = vreinterpretq_p16_p64 (arg0_poly64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretf32_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretf32_p64 (void) +{ + float32x2_t out_float32x2_t; + poly64x1_t arg0_poly64x1_t; + + out_float32x2_t = vreinterpret_f32_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp128_s8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp128_s8 (void) +{ + poly128_t out_poly128_t; + int8x16_t arg0_int8x16_t; + + out_poly128_t = vreinterpretq_p128_s8 (arg0_int8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_s8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_s8 (void) +{ + poly64x2_t out_poly64x2_t; + int8x16_t arg0_int8x16_t; + + out_poly64x2_t = vreinterpretq_p64_s8 (arg0_int8x16_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vst3p64.c @@ -0,0 +1,20 @@ +/* Test the `vst3p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst3p64 (void) +{ + poly64_t *arg0_poly64_t; + poly64x1x3_t arg1_poly64x1x3_t; + + vst3_p64 (arg0_poly64_t, arg1_poly64x1x3_t); +} + +/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp64.c @@ -0,0 +1,19 @@ +/* Test the `vld3_dupp64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vld3_dupp64 (void) +{ + poly64x1x3_t out_poly64x1x3_t; + + out_poly64x1x3_t = vld3_dup_p64 (0); +} + +/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_p64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretu16_p64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretu16_p64 (void) +{ + uint16x4_t out_uint16x4_t; + poly64x1_t arg0_poly64x1_t; + + out_uint16x4_t = vreinterpret_u16_p64 (arg0_poly64x1_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p8.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p8.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretp64_p8' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretp64_p8 (void) +{ + poly64x1_t out_poly64x1_t; + poly8x8_t arg0_poly8x8_t; + + out_poly64x1_t = vreinterpret_p64_p8 (arg0_poly8x8_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s64.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQp64_s64' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQp64_s64 (void) +{ + poly64x2_t out_poly64x2_t; + int64x2_t arg0_int64x2_t; + + out_poly64x2_t = vreinterpretq_p64_s64 (arg0_int64x2_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p128.c @@ -0,0 +1,19 @@ +/* Test the `vreinterpretQu32_p128' ARM Neon intrinsic. */ +/* This file was autogenerated by neon-testgen. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vreinterpretQu32_p128 (void) +{ + uint32x4_t out_uint32x4_t; + poly128_t arg0_poly128_t; + + out_uint32x4_t = vreinterpretq_u32_p128 (arg0_poly128_t); +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c +++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +unsigned long long +muld (unsigned long long X, unsigned long long Y) +{ + unsigned long long mask = 0xffffffffull; + return (X & mask) * (Y & mask); +} + +/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c +++ b/src/gcc/testsuite/gcc.target/arm/peep-ldrd-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_prefer_ldrd_strd } */ +/* { dg-options "-O2" } */ +int foo(int a, int b, int* p, int *q) +{ + a = p[2] + p[3]; + *q = a; + *p = a; + return a; +} +/* { dg-final { scan-assembler "ldrd" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselgtdf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselgtdf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i > 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/acle.exp +++ b/src/gcc/testsuite/gcc.target/arm/acle/acle.exp @@ -0,0 +1,35 @@ +# Copyright (C) 2013-2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an ARM target. +if ![istarget arm*-*-*] then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ + "" "" + +# All done. +dg-finish --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32b.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32b.c @@ -0,0 +1,20 @@ +/* Test the crc32b ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32b (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint8_t arg1_uint8_t; + + out_uint32_t = __crc32b (arg0_uint32_t, arg1_uint8_t); +} + +/* { dg-final { scan-assembler "crc32b\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32d.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32d.c @@ -0,0 +1,20 @@ +/* Test the crc32d ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32d (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint64_t arg1_uint64_t; + + out_uint32_t = __crc32d (arg0_uint32_t, arg1_uint64_t); +} + +/* { dg-final { scan-assembler-times "crc32w\t...?, ...?, ...?\n" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32cb.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32cb.c @@ -0,0 +1,20 @@ +/* Test the crc32cb ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32cb (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint8_t arg1_uint8_t; + + out_uint32_t = __crc32cb (arg0_uint32_t, arg1_uint8_t); +} + +/* { dg-final { scan-assembler "crc32cb\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32cd.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32cd.c @@ -0,0 +1,20 @@ +/* Test the crc32cd ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32cd (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint64_t arg1_uint64_t; + + out_uint32_t = __crc32cd (arg0_uint32_t, arg1_uint64_t); +} + +/* { dg-final { scan-assembler-times "crc32cw\t...?, ...?, ...?\n" 2 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32w.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32w.c @@ -0,0 +1,20 @@ +/* Test the crc32w ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32w (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint32_t arg1_uint32_t; + + out_uint32_t = __crc32w (arg0_uint32_t, arg1_uint32_t); +} + +/* { dg-final { scan-assembler "crc32w\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32h.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32h.c @@ -0,0 +1,20 @@ +/* Test the crc32h ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32h (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint16_t arg1_uint16_t; + + out_uint32_t = __crc32h (arg0_uint32_t, arg1_uint16_t); +} + +/* { dg-final { scan-assembler "crc32h\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32cw.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32cw.c @@ -0,0 +1,20 @@ +/* Test the crc32cw ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32cw (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint32_t arg1_uint32_t; + + out_uint32_t = __crc32cw (arg0_uint32_t, arg1_uint32_t); +} + +/* { dg-final { scan-assembler "crc32cw\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/acle/crc32ch.c +++ b/src/gcc/testsuite/gcc.target/arm/acle/crc32ch.c @@ -0,0 +1,20 @@ +/* Test the crc32ch ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crc_ok } */ +/* { dg-options "-save-temps -O0" } */ +/* { dg-add-options arm_crc } */ + +#include "arm_acle.h" + +void test_crc32ch (void) +{ + uint32_t out_uint32_t; + uint32_t arg0_uint32_t; + uint16_t arg1_uint16_t; + + out_uint32_t = __crc32ch (arg0_uint32_t, arg1_uint16_t); +} + +/* { dg-final { scan-assembler "crc32ch\t...?, ...?, ...?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c +++ b/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +unsigned long long or64 (unsigned long long input) +{ + return input | 0x200000004ULL; +} + +/* { dg-final { scan-assembler-not "mov\[\\t \]+.+,\[\\t \]*.+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32_t hash = 0xdeadbeef; + uint32x4_t a = {0, 1, 2, 3}; + uint32x4_t b = {3, 2, 1, 0}; + + uint32x4_t res = vsha1pq_u32 (a, hash, b); + return res[0]; +} + +/* { dg-final { scan-assembler "sha1p.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-relaxed.x" + +/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselgesf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselgesf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i >= 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c +++ b/src/gcc/testsuite/gcc.target/arm/peep-strd-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_prefer_ldrd_strd } */ +/* { dg-options "-O2" } */ +void foo(int a, int b, int* p) +{ + p[2] = a; + p[3] = b; +} +/* { dg-final { scan-assembler "strd" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1su1q_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1su1q_u32.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + + uint32x4_t res = vsha1su1q_u32 (a, b); + return res[0]; +} + +/* { dg-final { scan-assembler "sha1su1.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vmullp64.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vmullp64.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +poly128_t +foo (void) +{ + poly64_t a = 0xdeadbeef; + poly64_t b = 0xadadadad; + return vmull_p64 (a, b); +} + +/* { dg-final { scan-assembler "vmull.p64.*" } } */ --- a/src/gcc/testsuite/gcc.target/arm/lp1243022.c +++ b/src/gcc/testsuite/gcc.target/arm/lp1243022.c @@ -0,0 +1,201 @@ +/* { dg-do compile { target arm_thumb2 } } */ +/* { dg-options "-O2 -fdump-rtl-subreg2" } */ + +/* { dg-final { scan-rtl-dump "REG_INC" "subreg2" { target { ! arm_neon } } } } */ +/* { dg-final { cleanup-rtl-dump "subreg2" } } */ +struct device; +typedef unsigned int __u32; +typedef unsigned long long u64; +typedef __u32 __le32; +typedef u64 dma_addr_t; +typedef unsigned gfp_t; +int dev_warn (const struct device *dev, const char *fmt, ...); +struct usb_bus +{ + struct device *controller; +}; +struct usb_hcd +{ + struct usb_bus self; +}; +struct xhci_generic_trb +{ + __le32 field[4]; +}; +union xhci_trb +{ + struct xhci_generic_trb generic; +}; +struct xhci_segment +{ + union xhci_trb *trbs; + dma_addr_t dma; +}; +struct xhci_ring +{ + struct xhci_segment *first_seg; +}; +struct xhci_hcd +{ + struct xhci_ring *cmd_ring; + struct xhci_ring *event_ring; +}; +struct usb_hcd *xhci_to_hcd (struct xhci_hcd *xhci) +{ +} +dma_addr_t xhci_trb_virt_to_dma (struct xhci_segment * seg, + union xhci_trb * trb); +struct xhci_segment *trb_in_td (struct xhci_segment *start_seg, + dma_addr_t suspect_dma); +xhci_test_trb_in_td (struct xhci_hcd *xhci, struct xhci_segment *input_seg, + union xhci_trb *start_trb, union xhci_trb *end_trb, + dma_addr_t input_dma, struct xhci_segment *result_seg, + char *test_name, int test_number) +{ + unsigned long long start_dma; + unsigned long long end_dma; + struct xhci_segment *seg; + start_dma = xhci_trb_virt_to_dma (input_seg, start_trb); + end_dma = xhci_trb_virt_to_dma (input_seg, end_trb); + { + dev_warn (xhci_to_hcd (xhci)->self.controller, + "%d\n", test_number); + dev_warn (xhci_to_hcd (xhci)->self.controller, + "Expected seg %p, got seg %p\n", result_seg, seg); + } +} +xhci_check_trb_in_td_math (struct xhci_hcd *xhci, gfp_t mem_flags) +{ + struct + { + dma_addr_t input_dma; + struct xhci_segment *result_seg; + } + simple_test_vector[] = + { + { + 0, ((void *) 0) + } + , + { + xhci->event_ring->first_seg->dma - 16, ((void *) 0)} + , + { + xhci->event_ring->first_seg->dma - 1, ((void *) 0)} + , + { + xhci->event_ring->first_seg->dma, xhci->event_ring->first_seg} + , + { + xhci->event_ring->first_seg->dma + (64 - 1) * 16, + xhci->event_ring->first_seg + } + , + { + xhci->event_ring->first_seg->dma + (64 - 1) * 16 + 1, ((void *) 0)} + , + { + xhci->event_ring->first_seg->dma + (64) * 16, ((void *) 0)} + , + { + (dma_addr_t) (~0), ((void *) 0) + } + }; + struct + { + struct xhci_segment *input_seg; + union xhci_trb *start_trb; + union xhci_trb *end_trb; + dma_addr_t input_dma; + struct xhci_segment *result_seg; + } + complex_test_vector[] = + { + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + xhci->event_ring->first_seg->trbs,.end_trb = + &xhci->event_ring->first_seg->trbs[64 - 1],.input_dma = + xhci->cmd_ring->first_seg->dma,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + xhci->event_ring->first_seg->trbs,.end_trb = + &xhci->cmd_ring->first_seg->trbs[64 - 1],.input_dma = + xhci->cmd_ring->first_seg->dma,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + xhci->cmd_ring->first_seg->trbs,.end_trb = + &xhci->cmd_ring->first_seg->trbs[64 - 1],.input_dma = + xhci->cmd_ring->first_seg->dma,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + &xhci->event_ring->first_seg->trbs[0],.end_trb = + &xhci->event_ring->first_seg->trbs[3],.input_dma = + xhci->event_ring->first_seg->dma + 4 * 16,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + &xhci->event_ring->first_seg->trbs[3],.end_trb = + &xhci->event_ring->first_seg->trbs[6],.input_dma = + xhci->event_ring->first_seg->dma + 2 * 16,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + &xhci->event_ring->first_seg->trbs[64 - 3],.end_trb = + &xhci->event_ring->first_seg->trbs[1],.input_dma = + xhci->event_ring->first_seg->dma + 2 * 16,.result_seg = ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + &xhci->event_ring->first_seg->trbs[64 - 3],.end_trb = + &xhci->event_ring->first_seg->trbs[1],.input_dma = + xhci->event_ring->first_seg->dma + (64 - 4) * 16,.result_seg = + ((void *) 0), + } + , + { + .input_seg = xhci->event_ring->first_seg,.start_trb = + &xhci->event_ring->first_seg->trbs[64 - 3],.end_trb = + &xhci->event_ring->first_seg->trbs[1],.input_dma = + xhci->cmd_ring->first_seg->dma + 2 * 16,.result_seg = ((void *) 0), + } + }; + unsigned int num_tests; + int i, ret; + num_tests = + (sizeof (simple_test_vector) / sizeof ((simple_test_vector)[0]) + + (sizeof (struct + { + } + ))); + for (i = 0; i < num_tests; i++) + { + ret = + xhci_test_trb_in_td (xhci, xhci->event_ring->first_seg, + xhci->event_ring->first_seg->trbs, + &xhci->event_ring->first_seg->trbs[64 - 1], + simple_test_vector[i].input_dma, + simple_test_vector[i].result_seg, "Simple", i); + if (ret < 0) + return ret; + } + for (i = 0; i < num_tests; i++) + { + ret = + xhci_test_trb_in_td (xhci, complex_test_vector[i].input_seg, + complex_test_vector[i].start_trb, + complex_test_vector[i].end_trb, + complex_test_vector[i].input_dma, + complex_test_vector[i].result_seg, "Complex", i); + if (ret < 0) + return ret; + } +} --- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-comp-swap-release-acquire.x" + +/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 4 } } */ +/* { dg-final { scan-assembler-times "stlex" 4 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/pr19599.c +++ b/src/gcc/testsuite/gcc.target/arm/pr19599.c @@ -0,0 +1,10 @@ +/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */ +/* { dg-options "-O2 -march=armv5te -marm" } */ +/* { dg-final { scan-assembler "bx" } } */ + +int (*indirect_func)(); + +int indirect_call() +{ + return indirect_func(); +} --- a/src/gcc/testsuite/gcc.target/arm/crypto-vstrq_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vstrq_p128.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void +foo (poly128_t* ptr, poly128_t val) +{ + vstrq_p128 (ptr, val); +} + +/* { dg-final { scan-assembler "vst1.64\t{d\[0-9\]+-d\[0-9\]+}.*" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-seq_cst.x" + +/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselgedf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselgedf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i >= 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-consume.x" + +/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-char.x" + +/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/thumb-ltu.c +++ b/src/gcc/testsuite/gcc.target/arm/thumb-ltu.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-skip-if "incompatible options" { arm*-*-* } { "-march=*" } { "-march=armv6" "-march=armv6j" "-march=armv6z" } } */ +/* { dg-require-effective-target arm_thumb1_ok } */ /* { dg-options "-mcpu=arm1136jf-s -mthumb -O2" } */ void f(unsigned a, unsigned b, unsigned c, unsigned d) --- a/src/gcc/testsuite/gcc.target/arm/vselnesf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselnesf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i != 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vaesmcq_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vaesmcq_u8.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint8x16_t a, b; + int i = 0; + + for (i = 0; i < 16; ++i) + a[i] = i; + + b = vaesmcq_u8 (a); + return b[0]; +} + +/* { dg-final { scan-assembler "aesmc.8\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselvcsf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselvcsf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + return !__builtin_isunordered (x, y) ? x : y; +} + +/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha256hq_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha256hq_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + uint32x4_t c = {3, 2, 1, 0}; + + uint32x4_t res = vsha256hq_u32 (a, b, c); + return res[0]; +} + +/* { dg-final { scan-assembler "sha256h.32\tq\[0-9\]+, q\[0-9\]+, q\[0-9\]" } } */ --- a/src/gcc/testsuite/gcc.target/arm/minmax_minus.c +++ b/src/gcc/testsuite/gcc.target/arm/minmax_minus.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_cond_exec } */ +/* { dg-options "-O2" } */ + +#define MAX(a, b) (a > b ? a : b) +int +foo (int a, int b, int c) +{ + return c - MAX (a, b); +} + +/* { dg-final { scan-assembler-not "mov" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-release.x" + +/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselvssf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselvssf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + return __builtin_isunordered (x, y) ? x : y; +} + +/* { dg-final { scan-assembler-times "vselvs.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32_t hash = 0xdeadbeef; + uint32x4_t a = {0, 1, 2, 3}; + uint32x4_t b = {3, 2, 1, 0}; + + uint32x4_t res = vsha1cq_u32 (a, hash, b); + return res[0]; +} + +/* { dg-final { scan-assembler "sha1c.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vaeseq_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vaeseq_u8.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint8x16_t a, b, c; + int i = 0; + + for (i = 0; i < 16; ++i) + { + a[i] = i; + b[i] = 15 - i; + } + c = vaeseq_u8 (a, b); + return c[0]; +} + +/* { dg-final { scan-assembler "aese.8\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c +++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (float *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_roundf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_roundf } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon-vtst_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-vtst_p64.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" +#include + +extern void abort (void); + +int +main (void) +{ + uint64_t args[] = { 0x0, 0xdeadbeef, ~0xdeadbeef, 0xffff, + ~0xffff, 0xffffffff, ~0xffffffff, ~0x0 }; + int i, j; + + for (i = 0; i < sizeof (args) / sizeof (args[0]); ++i) + { + for (j = 0; j < sizeof (args) / sizeof (args[0]); ++j) + { + uint64_t a1 = args[i]; + uint64_t a2 = args[j]; + uint64_t res = vtst_p64 (vreinterpret_p64_u64 (a1), + vreinterpret_p64_u64 (a2)); + uint64_t exp = (a1 & a2) ? ~0x0 : 0x0; + + if (res != exp) + { + fprintf (stderr, "vtst_p64 (a1= %lx, a2= %lx)" + " returned %lx, expected %lx\n", + a1, a2, res, exp); + abort (); + } + } + } + return 0; +} --- a/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c @@ -0,0 +1,54 @@ +/* Check that Neon is *not* used by default to handle 64-bits scalar + operations. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +typedef long long i64; +typedef unsigned long long u64; +typedef unsigned int u32; +typedef int i32; + +/* Unary operators */ +#define UNARY_OP(name, op) \ + void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; } + +/* Binary operators */ +#define BINARY_OP(name, op) \ + void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; } + +/* Unsigned shift */ +#define SHIFT_U(name, op, amount) \ + void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; } + +/* Signed shift */ +#define SHIFT_S(name, op, amount) \ + void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; } + +UNARY_OP(not, ~) + +BINARY_OP(add, +) +BINARY_OP(sub, -) +BINARY_OP(and, &) +BINARY_OP(or, |) +BINARY_OP(xor, ^) + +SHIFT_U(right1, >>, 1) +SHIFT_U(right2, >>, 2) +SHIFT_U(right5, >>, 5) +SHIFT_U(rightn, >>, c) + +SHIFT_S(right1, >>, 1) +SHIFT_S(right2, >>, 2) +SHIFT_S(right5, >>, 5) +SHIFT_S(rightn, >>, c) + +/* { dg-final {scan-assembler-times "vmvn" 0} } */ +/* { dg-final {scan-assembler-times "vadd" 0} } */ +/* { dg-final {scan-assembler-times "vsub" 0} } */ +/* { dg-final {scan-assembler-times "vand" 0} } */ +/* { dg-final {scan-assembler-times "vorr" 0} } */ +/* { dg-final {scan-assembler-times "veor" 0} } */ +/* { dg-final {scan-assembler-times "vshr" 0} } */ --- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c +++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c @@ -4,7 +4,7 @@ #include -char dest[16]; +char dest[16] = { 0 }; void aligned_dest (char *src) { @@ -14,7 +14,10 @@ /* Expect a multi-word store for the main part of the copy, but subword loads/stores for the remainder. */ -/* { dg-final { scan-assembler-times "stmia" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 0 } } */ +/* { dg-final { scan-assembler-times "ldrd" 0 } } */ +/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */ /* { dg-final { scan-assembler-times "ldrh" 1 } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */ /* { dg-final { scan-assembler-times "ldrb" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32_t val = 0xdeadbeef; + return vsha1h_u32 (val); +} + +/* { dg-final { scan-assembler "sha1h.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c +++ b/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +unsigned long long xor64 (unsigned long long input) +{ + return input ^ 0x200000004ULL; +} + +/* { dg-final { scan-assembler-not "mov\[\\t \]+.+,\[\\t \]*.+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha256su1q_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha256su1q_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + uint32x4_t c = {3, 2, 1, 0}; + + uint32x4_t res = vsha256su1q_u32 (a, b, c); + return res[0]; +} + +/* { dg-final { scan-assembler "sha256su1.32\tq\[0-9\]+, q\[0-9\]+, q\[0-9\]" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-acq_rel.x" + +/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselltsf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselltsf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i < 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselge.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselnedf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselnedf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i != 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselvcdf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselvcdf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + return !__builtin_isunordered (x, y) ? x : y; +} + +/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c +++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (float *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_truncf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_btruncf } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vseleqsf.c +++ b/src/gcc/testsuite/gcc.target/arm/vseleqsf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i == 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vseleq.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/ivopts-orig_biv-inc.c +++ b/src/gcc/testsuite/gcc.target/arm/ivopts-orig_biv-inc.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ +/* { dg-skip-if "" { arm_thumb1 } } */ + +extern char *__ctype_ptr__; + +unsigned char * foo(unsigned char *ReadPtr) +{ + + unsigned char c; + + while (!(((__ctype_ptr__+sizeof(""[*ReadPtr]))[(int)(*ReadPtr)])&04) == (!(0))) + ReadPtr++; + + return ReadPtr; +} + +/* { dg-final { scan-tree-dump-times "original biv" 2 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselvsdf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselvsdf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + return __builtin_isunordered (x, y) ? x : y; +} + +/* { dg-final { scan-assembler-times "vselvs.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c +++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c @@ -4,7 +4,7 @@ #include -char src[16]; +char src[16] = {0}; void aligned_src (char *dest) { @@ -14,8 +14,11 @@ /* Expect a multi-word load for the main part of the copy, but subword loads/stores for the remainder. */ -/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -/* { dg-final { scan-assembler-times "ldrh" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "strd" 0 } } */ +/* { dg-final { scan-assembler-times "stm" 0 } } */ +/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */ -/* { dg-final { scan-assembler-times "ldrb" 1 } } */ +/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ /* { dg-final { scan-assembler-times "strb" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/pr46975-2.c +++ b/src/gcc/testsuite/gcc.target/arm/pr46975-2.c @@ -0,0 +1,10 @@ +/* { dg-options "-mthumb -O2" } */ +/* { dg-require-effective-target arm_thumb2_ok } */ +/* { dg-final { scan-assembler "sub" } } */ +/* { dg-final { scan-assembler "clz" } } */ +/* { dg-final { scan-assembler "lsr.*#5" } } */ + +int foo (int s) +{ + return s == 1; +} --- a/src/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-vceq_p64.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" +#include + +extern void abort (void); + +int +main (void) +{ + uint64_t args[] = { 0x0, 0xdeadbeef, ~0xdeadbeef, 0xffff, + ~0xffff, 0xffffffff, ~0xffffffff, ~0x0 }; + int i, j; + + for (i = 0; i < sizeof (args) / sizeof (args[0]); ++i) + { + for (j = 0; j < sizeof (args) / sizeof (args[0]); ++j) + { + uint64_t a1 = args[i]; + uint64_t a2 = args[j]; + uint64_t res = vceq_p64 (vreinterpret_p64_u64 (a1), + vreinterpret_p64_u64 (a2)); + uint64_t exp = (a1 == a2) ? ~0x0 : 0x0; + + if (res != exp) + { + fprintf (stderr, "vceq_p64 (a1= %lx, a2= %lx)" + " returned %lx, expected %lx\n", + a1, a2, res, exp); + abort (); + } + } + } + return 0; +} --- a/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c +++ b/src/gcc/testsuite/gcc.target/arm/anddi3-opt2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +long long muld(long long X, long long Y) +{ + return X & ~1; +} + +/* { dg-final { scan-assembler-not "and\[\\t \]+.+,\[\\t \]*.+,\[\\t \]*.+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c @@ -15,4 +15,4 @@ /* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ -/* { dg-final { scan-assembler "vbsl\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha256h2q_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha256h2q_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + uint32x4_t c = {3, 2, 1, 0}; + + uint32x4_t res = vsha256h2q_u32 (a, b, c); + return res[0]; +} + +/* { dg-final { scan-assembler "sha256h2.32\tq\[0-9\]+, q\[0-9\]+, q\[0-9\]" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselltdf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselltdf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i < 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselge.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c +++ b/src/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c @@ -4,8 +4,8 @@ #include -char src[16]; -char dest[16]; +char src[16] = { 0 }; +char dest[16] = { 0 }; void aligned_both (void) { @@ -14,5 +14,9 @@ /* We know both src and dest to be aligned: expect multiword loads/stores. */ -/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -/* { dg-final { scan-assembler-times "stmia" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */ --- a/src/gcc/testsuite/gcc.target/arm/vseleqdf.c +++ b/src/gcc/testsuite/gcc.target/arm/vseleqdf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i == 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vseleq.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-acquire.x" + +/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vsellesf.c +++ b/src/gcc/testsuite/gcc.target/arm/vsellesf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i <= 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c @@ -16,4 +16,4 @@ /* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ -/* { dg-final { scan-assembler "vbsl\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1su0q_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1su0q_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32x4_t a = {0xd, 0xe, 0xa, 0xd}; + uint32x4_t b = {0, 1, 2, 3}; + uint32x4_t c = {3, 2, 1, 0}; + + uint32x4_t res = vsha1su0q_u32 (a, b, c); + return res[0]; +} + +/* { dg-final { scan-assembler "sha1su0.32\tq\[0-9\]+, q\[0-9\]+, q\[0-9\]" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vmull_high_p64.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vmull_high_p64.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +poly128_t +foo (void) +{ + poly64x2_t a = { 0xdeadbeef, 0xadabcaca }; + poly64x2_t b = { 0xdcdcdcdc, 0xbdbdbdbd }; + return vmull_high_p64 (a, b); +} + +/* { dg-final { scan-assembler "vmull.p64.*" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-int.x" + +/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint32_t hash = 0xdeadbeef; + uint32x4_t a = {0, 1, 2, 3}; + uint32x4_t b = {3, 2, 1, 0}; + + uint32x4_t res = vsha1mq_u32 (a, hash, b); + return res[0]; +} + +/* { dg-final { scan-assembler "sha1m.32\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/crypto-vldrq_p128.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vldrq_p128.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +poly128_t +foo (poly128_t* ptr) +{ + return vldrq_p128 (ptr); +} + +/* { dg-final { scan-assembler "vld1.64\t{d\[0-9\]+-d\[0-9\]+}.*" } } */ --- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c +++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8a_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8a } */ + +#include "../aarch64/atomic-op-short.x" + +/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ +/* { dg-final { scan-assembler-not "dmb" } } */ --- a/src/gcc/testsuite/gcc.target/arm/pr40887.c +++ b/src/gcc/testsuite/gcc.target/arm/pr40887.c @@ -2,9 +2,9 @@ /* { dg-options "-O2 -march=armv5te" } */ /* { dg-final { scan-assembler "blx" } } */ -int (*indirect_func)(); +int (*indirect_func)(int x); int indirect_call() { - return indirect_func(); + return indirect_func(20) + indirect_func (40); } --- a/src/gcc/testsuite/gcc.target/arm/crypto-vaesimcq_u8.c +++ b/src/gcc/testsuite/gcc.target/arm/crypto-vaesimcq_u8.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +int +foo (void) +{ + uint8x16_t a, b; + int i = 0; + + for (i = 0; i < 16; ++i) + a[i] = i; + + b = vaesimcq_u8 (a); + return b[0]; +} + +/* { dg-final { scan-assembler "aesimc.8\tq\[0-9\]+, q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c +++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_neon_ok } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize" } */ +/* { dg-add-options arm_v8_neon } */ + +#define N 32 + +void +foo (float *output, float *input) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = __builtin_ceilf (input[i]); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_call_ceilf } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselledf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselledf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +double +foo (double x, double y) +{ + volatile int i = 0; + return i <= 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselgt.f64\td\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/vselgtsf.c +++ b/src/gcc/testsuite/gcc.target/arm/vselgtsf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_vfp_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_vfp } */ + +float +foo (float x, float y) +{ + volatile int i = 0; + return i > 0 ? x : y; +} + +/* { dg-final { scan-assembler-times "vselgt.f32\ts\[0-9\]+" 1 } } */ --- a/src/gcc/testsuite/gcc.target/arm/pr58578.c +++ b/src/gcc/testsuite/gcc.target/arm/pr58578.c @@ -0,0 +1,54 @@ + +/* PR target/58578 */ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +#include + +typedef struct { + long _prec; + int _flag; + long _exp; +} __my_st_t; + +typedef __my_st_t *__my_st_ptr; + +int +_test_fn (__my_st_ptr y, const __my_st_ptr xt) +{ + int inexact; + if (xt->_exp != -2147483647L) + { + (y->_flag = xt->_flag); + } + + do { + __my_st_ptr _y = y; + long _err1 = -2 * xt->_exp; + long _err2 = 2; + if (0 < _err1) + { + unsigned long _err = (unsigned long) _err1 + _err2; + if (__builtin_expect(!!(_err > _y->_prec + 1), 0)) + return 2; + return 3; + } + } while (0); + + return 0; +} + +int main () +{ + __my_st_t x, y; + long pz; + int inex; + + x._prec = 914; + y._exp = 18; + if (_test_fn (&x, &y)) + { + abort(); + } + return 0; +} --- a/src/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c +++ b/src/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c @@ -14,4 +14,4 @@ } /* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ -/* { dg-final { scan-assembler "vbit\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ --- a/src/gcc/testsuite/gcc.target/arm/pr57637.c +++ b/src/gcc/testsuite/gcc.target/arm/pr57637.c @@ -0,0 +1,206 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline" } */ + +typedef struct _GtkCssStyleProperty GtkCssStyleProperty; + +struct _GtkCssStyleProperty +{ + int *initial_value; + unsigned int id; + unsigned int inherit :1; + unsigned int animated :1; + unsigned int affects_size :1; + unsigned int affects_font :1; + + int * parse_value; + int * query_value; + int * assign_value; +}; + +void +g_assertion_message_expr (const char *domain, + const char *file, + int line, + const char *func, + const char *expr) __attribute__((__noreturn__)); + +void +g_assertion_message_expr (const char *domain, + const char *file, + int line, + const char *func, + const char *expr) +{ + __builtin_abort (); +} +int +get_id (GtkCssStyleProperty *property) +{ + return 1; +} +int +_gtk_css_style_property_get_type () +{ + return 1; +} + +GtkCssStyleProperty * +g_object_new (int object_type, + const char *first_property_name, + ...) +{ + return (GtkCssStyleProperty *) __builtin_malloc (sizeof (GtkCssStyleProperty)); +} + +typedef enum { + INHERIT = (1 << 0), + ANIMATED = (1 << 1), + RESIZE = (1 << 2), + FONT = (1 << 3) +} GtkStylePropertyFlags; + +int t = 0; +void +gtk_css_style_property_register (const char * name, + int expected_id, + int value_type, + int flags, + int *parse_value, + int *query_value, + int *assign_value, + int *initial_value) +{ + GtkCssStyleProperty *node; + + do + { + if (__builtin_expect (__extension__ ( + { + int _g_boolean_var_; + if (initial_value != ((void *)0)) + _g_boolean_var_ = 1; + else + _g_boolean_var_ = 0; + _g_boolean_var_; + }), + 1)) + ; + else + g_assertion_message_expr ("Gtk", + "gtkcssstylepropertyimpl.c", + 85, + ((const char*) (__PRETTY_FUNCTION__)), + "initial_value != NULL"); + } while (0); + + do + { + if (__builtin_expect (__extension__ ( + { + int _g_boolean_var_; + if (parse_value != ((void *)0)) + _g_boolean_var_ = 1; + else + _g_boolean_var_ = 0; + _g_boolean_var_; + }), + 1)) + ; + else + g_assertion_message_expr ("Gtk", + "gtkcssstylepropertyimpl.c", + 86, + ((const char*) (__PRETTY_FUNCTION__)), + "parse_value != NULL"); + } while (0); + + do + { + if (__builtin_expect (__extension__ ( + { + int _g_boolean_var_; + if (value_type == ((int) ((1) << (2))) + || query_value != ((void *)0)) + _g_boolean_var_ = 1; + else + _g_boolean_var_ = 0; + _g_boolean_var_; + }), + 1)) + ; + else + g_assertion_message_expr ("Gtk", + "gtkcssstylepropertyimpl.c", + 87, ((const char*) (__PRETTY_FUNCTION__)), + "value_type == NONE || query_value != NULL"); + } while (0); + + /* FLAGS is changed in a cond_exec instruction with pr57637. */ + if (flags == 15) + t = 15; + + do + { + if (__builtin_expect (__extension__ ( + { + int _g_boolean_var_; + if (value_type == ((1) << (2)) + || assign_value != ((void *)0)) + _g_boolean_var_ = 1; + else + _g_boolean_var_ = 0; + _g_boolean_var_; + }), + 1)) + ; + else + g_assertion_message_expr ("Gtk", + "gtkcssstylepropertyimpl.c", + 88, ((const char*) (__PRETTY_FUNCTION__)), + "value_type == NONE || assign_value != NULL"); + } while (0); + + node = g_object_new ((_gtk_css_style_property_get_type ()), + "value-type", value_type, + "affects-size", (flags & RESIZE) ? (0) : (!(0)), + "affects-font", (flags & FONT) ? (!(0)) : (0), + "animated", (flags & ANIMATED) ? (!(0)) : (0), + "inherit", (flags & INHERIT) ? (!(0)) : (0), + "initial-value", initial_value, + "name", name, + ((void *)0)); + + node->parse_value = parse_value; + node->query_value = query_value; + node->assign_value = assign_value; + + do + { + if (__builtin_expect (__extension__ ( + { + int _g_boolean_var_; + if (get_id (node) == expected_id) + _g_boolean_var_ = 1; + else + _g_boolean_var_ = 0; + _g_boolean_var_; + }), + 1)) + ; + else + g_assertion_message_expr ("Gtk", + "gtkcssstylepropertyimpl.c", + 106, + ((const char*) (__PRETTY_FUNCTION__)), + "get_id (node) == expected_id"); + } while (0); +} + +int main () +{ + gtk_css_style_property_register ("test", 1, 4, 15, &t, &t, &t, &t); + + if (t != 15) + __builtin_abort (); + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/insv_2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/insv_2.c @@ -0,0 +1,85 @@ +/* { dg-do run { target aarch64*-*-* } } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ +/* { dg-require-effective-target aarch64_big_endian } */ + +extern void abort (void); + +typedef struct bitfield +{ + unsigned short eight: 8; + unsigned short four: 4; + unsigned short five: 5; + unsigned short seven: 7; + unsigned int sixteen: 16; +} bitfield; + +bitfield +bfi1 (bitfield a) +{ + /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 56, 8" } } */ + a.eight = 3; + return a; +} + +bitfield +bfi2 (bitfield a) +{ + /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 43, 5" } } */ + a.five = 7; + return a; +} + +bitfield +movk (bitfield a) +{ + /* { dg-final { scan-assembler "movk\tx\[0-9\]+, 0x1d6b, lsl 16" } } */ + a.sixteen = 7531; + return a; +} + +bitfield +set1 (bitfield a) +{ + /* { dg-final { scan-assembler "orr\tx\[0-9\]+, x\[0-9\]+, 272678883688448" } } */ + a.five = 0x1f; + return a; +} + +bitfield +set0 (bitfield a) +{ + /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -272678883688449" } } */ + a.five = 0; + return a; +} + + +int +main (int argc, char** argv) +{ + static bitfield a; + bitfield b = bfi1 (a); + bitfield c = bfi2 (b); + bitfield d = movk (c); + + if (d.eight != 3) + abort (); + + if (d.five != 7) + abort (); + + if (d.sixteen != 7531) + abort (); + + d = set1 (d); + if (d.five != 0x1f) + abort (); + + d = set0 (d); + if (d.five != 0) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vrecps.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vrecps.c @@ -0,0 +1,144 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include +#include +#include + +int +test_frecps_float32_t (void) +{ + int i; + float32_t value = 0.2; + float32_t reciprocal = 5.0; + float32_t step = vrecpes_f32 (value); + /* 3 steps should give us within ~0.001 accuracy. */ + for (i = 0; i < 3; i++) + step = step * vrecpss_f32 (step, value); + + return fabs (step - reciprocal) < 0.001; +} + +/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ + +int +test_frecps_float32x2_t (void) +{ + int i; + int ret = 1; + + const float32_t value_pool[] = {0.2, 0.4}; + const float32_t reciprocal_pool[] = {5.0, 2.5}; + float32x2_t value = vld1_f32 (value_pool); + float32x2_t reciprocal = vld1_f32 (reciprocal_pool); + + float32x2_t step = vrecpe_f32 (value); + /* 3 steps should give us within ~0.001 accuracy. */ + for (i = 0; i < 3; i++) + step = step * vrecps_f32 (step, value); + + ret &= fabs (vget_lane_f32 (step, 0) + - vget_lane_f32 (reciprocal, 0)) < 0.001; + ret &= fabs (vget_lane_f32 (step, 1) + - vget_lane_f32 (reciprocal, 1)) < 0.001; + + return ret; +} + +/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */ +/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */ + +int +test_frecps_float32x4_t (void) +{ + int i; + int ret = 1; + + const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8}; + const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25}; + float32x4_t value = vld1q_f32 (value_pool); + float32x4_t reciprocal = vld1q_f32 (reciprocal_pool); + + float32x4_t step = vrecpeq_f32 (value); + /* 3 steps should give us within ~0.001 accuracy. */ + for (i = 0; i < 3; i++) + step = step * vrecpsq_f32 (step, value); + + ret &= fabs (vgetq_lane_f32 (step, 0) + - vgetq_lane_f32 (reciprocal, 0)) < 0.001; + ret &= fabs (vgetq_lane_f32 (step, 1) + - vgetq_lane_f32 (reciprocal, 1)) < 0.001; + ret &= fabs (vgetq_lane_f32 (step, 2) + - vgetq_lane_f32 (reciprocal, 2)) < 0.001; + ret &= fabs (vgetq_lane_f32 (step, 3) + - vgetq_lane_f32 (reciprocal, 3)) < 0.001; + + return ret; +} + +/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */ +/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */ + +int +test_frecps_float64_t (void) +{ + int i; + float64_t value = 0.2; + float64_t reciprocal = 5.0; + float64_t step = vrecped_f64 (value); + /* 3 steps should give us within ~0.001 accuracy. */ + for (i = 0; i < 3; i++) + step = step * vrecpsd_f64 (step, value); + + return fabs (step - reciprocal) < 0.001; +} + +/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ + +int +test_frecps_float64x2_t (void) +{ + int i; + int ret = 1; + + const float64_t value_pool[] = {0.2, 0.4}; + const float64_t reciprocal_pool[] = {5.0, 2.5}; + float64x2_t value = vld1q_f64 (value_pool); + float64x2_t reciprocal = vld1q_f64 (reciprocal_pool); + + float64x2_t step = vrecpeq_f64 (value); + /* 3 steps should give us within ~0.001 accuracy. */ + for (i = 0; i < 3; i++) + step = step * vrecpsq_f64 (step, value); + + ret &= fabs (vgetq_lane_f64 (step, 0) + - vgetq_lane_f64 (reciprocal, 0)) < 0.001; + ret &= fabs (vgetq_lane_f64 (step, 1) + - vgetq_lane_f64 (reciprocal, 1)) < 0.001; + + return ret; +} + +/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */ +/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */ + +int +main (int argc, char **argv) +{ + if (!test_frecps_float32_t ()) + abort (); + if (!test_frecps_float32x2_t ()) + abort (); + if (!test_frecps_float32x4_t ()) + abort (); + if (!test_frecps_float64_t ()) + abort (); + if (!test_frecps_float64x2_t ()) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/ands_2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/ands_2.c @@ -0,0 +1,157 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +ands_si_test1 (int a, int b, int c) +{ + int d = a & b; + + /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + /* { dg-final { scan-assembler-times "and\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +ands_si_test2 (int a, int b, int c) +{ + int d = a & 0x99999999; + + /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, -1717986919" } } */ + /* { dg-final { scan-assembler "and\tw\[0-9\]+, w\[0-9\]+, -1717986919" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +ands_si_test3 (int a, int b, int c) +{ + int d = a & (b << 3); + + /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "and\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +ands_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a & b; + + /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + /* { dg-final { scan-assembler-times "and\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +ands_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a & 0xaaaaaaaaaaaaaaaall; + + /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, -6148914691236517206" } } */ + /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -6148914691236517206" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +ands_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a & (b << 3); + + /* { dg-final { scan-assembler-not "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +main () +{ + int x; + s64 y; + + x = ands_si_test1 (29, 4, 5); + if (x != 13) + abort (); + + x = ands_si_test1 (5, 2, 20); + if (x != 25) + abort (); + + x = ands_si_test2 (29, 4, 5); + if (x != 34) + abort (); + + x = ands_si_test2 (1024, 2, 20); + if (x != 1044) + abort (); + + x = ands_si_test3 (35, 4, 5); + if (x != 41) + abort (); + + x = ands_si_test3 (5, 2, 20); + if (x != 25) + abort (); + + y = ands_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != ((0x130000029ll & 0x320000004ll) + 0x320000004ll + 0x505050505ll)) + abort (); + + y = ands_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x5000500052025ll) + abort (); + + y = ands_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != ((0x130000029ll & 0xaaaaaaaaaaaaaaaall) + 0x320000004ll + 0x505050505ll)) + abort (); + + y = ands_di_test2 (0x540004100ll, + 0x320000004ll, + 0x805050205ll); + if (y != (0x540004100ll + 0x805050205ll)) + abort (); + + y = ands_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != ((0x130000029ll & (0x064000008ll << 3)) + + 0x064000008ll + 0x505050505ll)) + abort (); + + y = ands_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != (0x130002900ll + 0x505050505ll)) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c +++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-vca.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +#define NUM_TESTS 8 + +float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f}; +float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f}; +double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5}; +double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5}; + +#define TEST(TEST, CMP, SUFFIX, WIDTH, F) \ +int \ +test_fca##TEST##SUFFIX##_float##WIDTH##_t (void) \ +{ \ + int ret = 0; \ + int i = 0; \ + uint##WIDTH##_t output[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \ + float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \ + /* Inhibit optimization of our linear test loop. */ \ + asm volatile ("" : : : "memory"); \ + output[i] = f1 CMP f2 ? -1 : 0; \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + output[i] = vca##TEST##SUFFIX##_f##WIDTH (input_##SUFFIX##1[i], \ + input_##SUFFIX##2[i]) \ + ^ output[i]; \ + /* Inhibit autovectorization of our scalar test loop. */ \ + asm volatile ("" : : : "memory"); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret |= output[i]; \ + \ + return ret; \ +} + +TEST (ge, >=, s, 32, f) +/* { dg-final { scan-assembler "facge\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ +TEST (ge, >=, d, 64, ) +/* { dg-final { scan-assembler "facge\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ +TEST (gt, >, s, 32, f) +/* { dg-final { scan-assembler "facgt\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */ +TEST (gt, >, d, 64, ) +/* { dg-final { scan-assembler "facgt\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */ + +int +main (int argc, char **argv) +{ + if (test_fcages_float32_t ()) + abort (); + if (test_fcaged_float64_t ()) + abort (); + if (test_fcagts_float32_t ()) + abort (); + if (test_fcagtd_float64_t ()) + abort (); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_ACQ_REL (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL); +} + +int +atomic_fetch_sub_ACQ_REL (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL); +} + +int +atomic_fetch_and_ACQ_REL (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL); +} + +int +atomic_fetch_nand_ACQ_REL (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL); +} + +int +atomic_fetch_xor_ACQ_REL (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL); +} + +int +atomic_fetch_or_ACQ_REL (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL); +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect_smlal_1.c @@ -0,0 +1,325 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model" } */ + +typedef signed char S8_t; +typedef signed short S16_t; +typedef signed int S32_t; +typedef signed long S64_t; +typedef signed char *__restrict__ pS8_t; +typedef signed short *__restrict__ pS16_t; +typedef signed int *__restrict__ pS32_t; +typedef signed long *__restrict__ pS64_t; +typedef unsigned char U8_t; +typedef unsigned short U16_t; +typedef unsigned int U32_t; +typedef unsigned long U64_t; +typedef unsigned char *__restrict__ pU8_t; +typedef unsigned short *__restrict__ pU16_t; +typedef unsigned int *__restrict__ pU32_t; +typedef unsigned long *__restrict__ pU64_t; + +extern void abort (); + +void +test_addS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c) +{ + int i; + for (i = 0; i < 4; i++) + a[i] += (S64_t) b[i] * (S64_t) c[i]; +} + +/* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.2d" } } */ + +void +test_addS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c) +{ + int i; + for (i = 0; i < 8; i++) + a[i] += (S32_t) b[i] * (S32_t) c[i]; +} + +/* { dg-final { scan-assembler "smlal\tv\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "smlal2\tv\[0-9\]+\.4s" } } */ + +void +test_addS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += (S16_t) b[i] * (S16_t) c[i]; +} + +void +test_addS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += (S16_t) -b[i] * (S16_t) -c[i]; +} + +void +test_addS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] -= (S16_t) b[i] * (S16_t) -c[i]; +} + +void +test_addS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] -= (S16_t) -b[i] * (S16_t) c[i]; +} + +/* { dg-final { scan-assembler-times "smlal\tv\[0-9\]+\.8h" 4 } } */ +/* { dg-final { scan-assembler-times "smlal2\tv\[0-9\]+\.8h" 4 } } */ + +void +test_subS64_tS32_t4 (pS64_t a, pS32_t b, pS32_t c) +{ + int i; + for (i = 0; i < 4; i++) + a[i] -= (S64_t) b[i] * (S64_t) c[i]; +} + +/* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.2d" } } */ + +void +test_subS32_tS16_t8 (pS32_t a, pS16_t b, pS16_t c) +{ + int i; + for (i = 0; i < 8; i++) + a[i] -= (S32_t) b[i] * (S32_t) c[i]; +} + +/* { dg-final { scan-assembler "smlsl\tv\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "smlsl2\tv\[0-9\]+\.4s" } } */ + +void +test_subS16_tS8_t16 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] -= (S16_t) b[i] * (S16_t) c[i]; +} + +void +test_subS16_tS8_t16_neg0 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += (S16_t) -b[i] * (S16_t) c[i]; +} + +void +test_subS16_tS8_t16_neg1 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += (S16_t) b[i] * (S16_t) -c[i]; +} + +void +test_subS16_tS8_t16_neg2 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += -((S16_t) b[i] * (S16_t) c[i]); +} + +void +test_subS16_tS8_t16_neg3 (pS16_t a, pS8_t b, pS8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] -= (S16_t) -b[i] * (S16_t) -c[i]; +} + +/* { dg-final { scan-assembler-times "smlsl\tv\[0-9\]+\.8h" 5 } } */ +/* { dg-final { scan-assembler-times "smlsl2\tv\[0-9\]+\.8h" 5 } } */ + +void +test_addU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c) +{ + int i; + for (i = 0; i < 4; i++) + a[i] += (U64_t) b[i] * (U64_t) c[i]; +} + +/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.2d" } } */ + +void +test_addU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c) +{ + int i; + for (i = 0; i < 8; i++) + a[i] += (U32_t) b[i] * (U32_t) c[i]; +} + +/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.4s" } } */ + +void +test_addU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] += (U16_t) b[i] * (U16_t) c[i]; +} + +/* { dg-final { scan-assembler "umlal\tv\[0-9\]+\.8h" } } */ +/* { dg-final { scan-assembler "umlal2\tv\[0-9\]+\.8h" } } */ + +void +test_subU64_tU32_t4 (pU64_t a, pU32_t b, pU32_t c) +{ + int i; + for (i = 0; i < 4; i++) + a[i] -= (U64_t) b[i] * (U64_t) c[i]; +} + +/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.2d" } } */ + +void +test_subU32_tU16_t8 (pU32_t a, pU16_t b, pU16_t c) +{ + int i; + for (i = 0; i < 8; i++) + a[i] -= (U32_t) b[i] * (U32_t) c[i]; +} + +/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.4s" } } */ + +void +test_subU16_tU8_t16 (pU16_t a, pU8_t b, pU8_t c) +{ + int i; + for (i = 0; i < 16; i++) + a[i] -= (U16_t) b[i] * (U16_t) c[i]; +} + +/* { dg-final { scan-assembler "umlsl\tv\[0-9\]+\.8h" } } */ +/* { dg-final { scan-assembler "umlsl2\tv\[0-9\]+\.8h" } } */ + + +S64_t add_rS64[4] = { 6, 7, -4, -3 }; +S32_t add_rS32[8] = { 6, 7, -4, -3, 10, 11, 0, 1 }; +S16_t add_rS16[16] = + { 6, 7, -4, -3, 10, 11, 0, 1, 14, 15, 4, 5, 18, 19, 8, 9 }; + +S64_t sub_rS64[4] = { 0, 1, 2, 3 }; +S32_t sub_rS32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; +S16_t sub_rS16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + +U64_t add_rU64[4] = { 0x6, 0x7, 0x2fffffffc, 0x2fffffffd }; + +U32_t add_rU32[8] = +{ + 0x6, 0x7, 0x2fffc, 0x2fffd, + 0xa, 0xb, 0x30000, 0x30001 +}; + +U16_t add_rU16[16] = +{ + 0x6, 0x7, 0x2fc, 0x2fd, 0xa, 0xb, 0x300, 0x301, + 0xe, 0xf, 0x304, 0x305, 0x12, 0x13, 0x308, 0x309 +}; + +U64_t sub_rU64[4] = { 0, 1, 2, 3 }; +U32_t sub_rU32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; +U16_t sub_rU16[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + +S8_t neg_r[16] = { -6, -5, 8, 9, -2, -1, 12, 13, 2, 3, 16, 17, 6, 7, 20, 21 }; + +S64_t S64_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +S32_t S32_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 }; +S32_t S32_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + +S32_t S32_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +S16_t S16_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 }; +S16_t S16_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + +S16_t S16_ta[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +S8_t S8_tb[16] = { 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2, 2, 2, -2, -2 }; +S8_t S8_tc[16] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + + +#define CHECK(T,N,AS,US) \ +do \ + { \ + for (i = 0; i < N; i++) \ + if (S##T##_ta[i] != AS##_r##US##T[i]) \ + abort (); \ + } \ +while (0) + +#define SCHECK(T,N,AS) CHECK(T,N,AS,S) +#define UCHECK(T,N,AS) CHECK(T,N,AS,U) + +#define NCHECK(RES) \ +do \ + { \ + for (i = 0; i < 16; i++) \ + if (S16_ta[i] != RES[i]) \ + abort (); \ + } \ +while (0) + + +int +main () +{ + int i; + + test_addS64_tS32_t4 (S64_ta, S32_tb, S32_tc); + SCHECK (64, 4, add); + test_addS32_tS16_t8 (S32_ta, S16_tb, S16_tc); + SCHECK (32, 8, add); + test_addS16_tS8_t16 (S16_ta, S8_tb, S8_tc); + SCHECK (16, 16, add); + test_subS64_tS32_t4 (S64_ta, S32_tb, S32_tc); + SCHECK (64, 4, sub); + test_subS32_tS16_t8 (S32_ta, S16_tb, S16_tc); + SCHECK (32, 8, sub); + test_subS16_tS8_t16 (S16_ta, S8_tb, S8_tc); + SCHECK (16, 16, sub); + + test_addU64_tU32_t4 (S64_ta, S32_tb, S32_tc); + UCHECK (64, 4, add); + test_addU32_tU16_t8 (S32_ta, S16_tb, S16_tc); + UCHECK (32, 8, add); + test_addU16_tU8_t16 (S16_ta, S8_tb, S8_tc); + UCHECK (16, 16, add); + test_subU64_tU32_t4 (S64_ta, S32_tb, S32_tc); + UCHECK (64, 4, sub); + test_subU32_tU16_t8 (S32_ta, S16_tb, S16_tc); + UCHECK (32, 8, sub); + test_subU16_tU8_t16 (S16_ta, S8_tb, S8_tc); + UCHECK (16, 16, sub); + + test_addS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc); + NCHECK (add_rS16); + test_subS16_tS8_t16_neg0 (S16_ta, S8_tb, S8_tc); + NCHECK (sub_rS16); + test_addS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc); + NCHECK (add_rS16); + test_subS16_tS8_t16_neg1 (S16_ta, S8_tb, S8_tc); + NCHECK (sub_rS16); + test_addS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc); + NCHECK (add_rS16); + test_subS16_tS8_t16_neg2 (S16_ta, S8_tb, S8_tc); + NCHECK (sub_rS16); + test_subS16_tS8_t16_neg3 (S16_ta, S8_tb, S8_tc); + NCHECK (neg_r); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/extr.c +++ b/src/gcc/testsuite/gcc.target/aarch64/extr.c @@ -0,0 +1,34 @@ +/* { dg-options "-O2 --save-temps" } */ +/* { dg-do run } */ + +extern void abort (void); + +int +test_si (int a, int b) +{ + /* { dg-final { scan-assembler "extr\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, 27\n" } } */ + return (a << 5) | ((unsigned int) b >> 27); +} + +long long +test_di (long long a, long long b) +{ + /* { dg-final { scan-assembler "extr\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, 45\n" } } */ + return (a << 19) | ((unsigned long long) b >> 45); +} + +int +main () +{ + int v; + long long w; + v = test_si (0x00000004, 0x30000000); + if (v != 0x00000086) + abort(); + w = test_di (0x0001040040040004ll, 0x0070050066666666ll); + if (w != 0x2002002000200380ll) + abort(); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-compile.c @@ -16,5 +16,7 @@ /* { dg-final { scan-assembler "uminv" } } */ /* { dg-final { scan-assembler "smaxv" } } */ /* { dg-final { scan-assembler "sminv" } } */ +/* { dg-final { scan-assembler "sabd" } } */ +/* { dg-final { scan-assembler "saba" } } */ /* { dg-final { scan-assembler-times "addv" 2} } */ /* { dg-final { scan-assembler-times "addp" 2} } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP == #define INV_OP != #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/adds3.c +++ b/src/gcc/testsuite/gcc.target/aarch64/adds3.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); +typedef long long s64; + +int +adds_ext (s64 a, int b, int c) +{ + s64 d = a + b; + + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +adds_shift_ext (s64 a, int b, int c) +{ + s64 d = (a + ((s64)b << 3)); + + if (d == 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = adds_ext (0x13000002ll, 41, 15); + if (x != 318767203) + abort (); + + x = adds_ext (0x50505050ll, 29, 4); + if (x != 1347440782) + abort (); + + x = adds_ext (0x12121212121ll, 2, 14); + if (x != 555819315) + abort (); + + x = adds_shift_ext (0x123456789ll, 4, 12); + if (x != 591751097) + abort (); + + x = adds_shift_ext (0x02020202ll, 9, 8); + if (x != 33686107) + abort (); + + x = adds_shift_ext (0x987987987987ll, 23, 41); + if (x != -2020050305) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/subs2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/subs2.c @@ -0,0 +1,155 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +subs_si_test1 (int a, int b, int c) +{ + int d = a - b; + + /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +subs_si_test2 (int a, int b, int c) +{ + int d = a - 0xfff; + + /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, #4095" } } */ + /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, #4095" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +subs_si_test3 (int a, int b, int c) +{ + int d = a - (b << 3); + + /* { dg-final { scan-assembler-not "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "sub\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +subs_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a - b; + + /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +subs_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a - 0x1000ll; + + /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, #4096" } } */ + /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, #4096" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +subs_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a - (b << 3); + + /* { dg-final { scan-assembler-not "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "sub\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = subs_si_test1 (29, 4, 5); + if (x != 34) + abort (); + + x = subs_si_test1 (5, 2, 20); + if (x != 25) + abort (); + + x = subs_si_test2 (29, 4, 5); + if (x != 34) + abort (); + + x = subs_si_test2 (1024, 2, 20); + if (x != 1044) + abort (); + + x = subs_si_test3 (35, 4, 5); + if (x != 12) + abort (); + + x = subs_si_test3 (5, 2, 20); + if (x != 25) + abort (); + + y = subs_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != 0x63505052e) + abort (); + + y = subs_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x5000500052025) + abort (); + + y = subs_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x95504f532) + abort (); + + y = subs_di_test2 (0x540004100ll, + 0x320000004ll, + 0x805050205ll); + if (y != 0x1065053309) + abort (); + + y = subs_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != 0x63505052e) + abort (); + + y = subs_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != 0x635052e05) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/bics_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/bics_1.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +bics_si_test1 (int a, int b, int c) +{ + int d = a & ~b; + + /* { dg-final { scan-assembler-times "bics\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +bics_si_test2 (int a, int b, int c) +{ + int d = a & ~(b << 3); + + /* { dg-final { scan-assembler "bics\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +bics_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a & ~b; + + /* { dg-final { scan-assembler-times "bics\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +bics_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a & ~(b << 3); + + /* { dg-final { scan-assembler "bics\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +main () +{ + int x; + s64 y; + + x = bics_si_test1 (29, ~4, 5); + if (x != ((29 & 4) + ~4 + 5)) + abort (); + + x = bics_si_test1 (5, ~2, 20); + if (x != 25) + abort (); + + x = bics_si_test2 (35, ~4, 5); + if (x != ((35 & ~(~4 << 3)) + ~4 + 5)) + abort (); + + x = bics_si_test2 (96, ~2, 20); + if (x != 116) + abort (); + + y = bics_di_test1 (0x130000029ll, + ~0x320000004ll, + 0x505050505ll); + + if (y != ((0x130000029ll & 0x320000004ll) + ~0x320000004ll + 0x505050505ll)) + abort (); + + y = bics_di_test1 (0x5000500050005ll, + ~0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x5000500052025ll) + abort (); + + y = bics_di_test2 (0x130000029ll, + ~0x064000008ll, + 0x505050505ll); + if (y != ((0x130000029ll & ~(~0x064000008ll << 3)) + + ~0x064000008ll + 0x505050505ll)) + abort (); + + y = bics_di_test2 (0x130002900ll, + ~0x088000008ll, + 0x505050505ll); + if (y != (0x130002900ll + 0x505050505ll)) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vmaxv.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -ffast-math" } */ + +#include + +extern void abort (void); + +#define NUM_TESTS 16 +#define DELTA 0.000001 + +int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; + +uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; + +#define EQUAL(a, b) (a == b) + +#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \ +int \ +test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \ +{ \ + int i, j; \ + int moves = (NUM_TESTS - LANES) + 1; \ + TYPE##_t out_l[NUM_TESTS]; \ + TYPE##_t out_v[NUM_TESTS]; \ + \ + /* Calculate linearly. */ \ + for (i = 0; i < moves; i++) \ + { \ + out_l[i] = input_##TYPE[i]; \ + for (j = 0; j < LANES; j++) \ + out_l[i] = input_##TYPE[i + j] CMP_OP out_l[i] ? \ + input_##TYPE[i + j] : out_l[i]; \ + } \ + \ + /* Calculate using vector reduction intrinsics. */ \ + for (i = 0; i < moves; i++) \ + { \ + TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \ + out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \ + } \ + \ + /* Compare. */ \ + for (i = 0; i < moves; i++) \ + { \ + if (!EQUAL (out_v[i], out_l[i])) \ + return 0; \ + } \ + return 1; \ +} + +#define BUILD_VARIANTS(TYPE, STYPE, W32, W64) \ +TEST (max, >, STYPE, , TYPE, W32) \ +TEST (max, >, STYPE, q, TYPE, W64) \ +TEST (min, <, STYPE, , TYPE, W32) \ +TEST (min, <, STYPE, q, TYPE, W64) + +BUILD_VARIANTS (int8, s8, 8, 16) +/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +BUILD_VARIANTS (uint8, u8, 8, 16) +/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +BUILD_VARIANTS (int16, s16, 4, 8) +/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +BUILD_VARIANTS (uint16, u16, 4, 8) +/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +BUILD_VARIANTS (int32, s32, 2, 4) +/* { dg-final { scan-assembler "smaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "sminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "smaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "sminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +BUILD_VARIANTS (uint32, u32, 2, 4) +/* { dg-final { scan-assembler "umaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "uminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "umaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "uminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ + +#undef TEST +#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \ +{ \ + if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS (int8, s8, 8, 16) + BUILD_VARIANTS (uint8, u8, 8, 16) + BUILD_VARIANTS (int16, s16, 4, 8) + BUILD_VARIANTS (uint16, u16, 4, 8) + BUILD_VARIANTS (int32, s32, 2, 4) + BUILD_VARIANTS (uint32, u32, 2, 4) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vrecpx.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include +#include +#include + +float32_t in_f[] = +{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125}; +float32_t rec_f[] = +{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0}; +float64_t in_d[] = +{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125}; +float32_t rec_d[] = +{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0}; + +int +test_frecpx_float32_t (void) +{ + int i = 0; + int ret = 1; + for (i = 0; i < 8; i++) + ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001; + + return ret; +} + +/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */ + +int +test_frecpx_float64_t (void) +{ + int i = 0; + int ret = 1; + for (i = 0; i < 8; i++) + ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001; + + return ret; +} + +/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */ + +int +main (int argc, char **argv) +{ + if (!test_frecpx_float32_t ()) + abort (); + if (!test_frecpx_float64_t ()) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vca.c @@ -0,0 +1,89 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +#define NUM_TESTS 8 + +float input_s1[] = {0.1f, -0.1f, 0.4f, 10.3f, 200.0f, -800.0f, -13.0f, -0.5f}; +float input_s2[] = {-0.2f, 0.4f, 0.04f, -100.3f, 2.0f, -80.0f, 13.0f, -0.5f}; +double input_d1[] = {0.1, -0.1, 0.4, 10.3, 200.0, -800.0, -13.0, -0.5}; +double input_d2[] = {-0.2, 0.4, 0.04, -100.3, 2.0, -80.0, 13.0, -0.5}; + +#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \ +int \ +test_vca##T##_float##WIDTH##x##LANES##_t (void) \ +{ \ + int ret = 0; \ + int i = 0; \ + uint##WIDTH##_t output[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + float##WIDTH##_t f1 = fabs##F (input_##SUFFIX##1[i]); \ + float##WIDTH##_t f2 = fabs##F (input_##SUFFIX##2[i]); \ + /* Inhibit optimization of our linear test loop. */ \ + asm volatile ("" : : : "memory"); \ + output[i] = f1 CMP f2 ? -1 : 0; \ + } \ + \ + for (i = 0; i < NUM_TESTS; i += LANES) \ + { \ + float##WIDTH##x##LANES##_t in1 = \ + vld1##Q##_f##WIDTH (input_##SUFFIX##1 + i); \ + float##WIDTH##x##LANES##_t in2 = \ + vld1##Q##_f##WIDTH (input_##SUFFIX##2 + i); \ + uint##WIDTH##x##LANES##_t expected_out = \ + vld1##Q##_u##WIDTH (output + i); \ + uint##WIDTH##x##LANES##_t out = \ + veor##Q##_u##WIDTH (vca##T##Q##_f##WIDTH (in1, in2), \ + expected_out); \ + vst1##Q##_u##WIDTH (output + i, out); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret |= output[i]; \ + \ + return ret; \ +} + +#define BUILD_VARIANTS(T, CMP) \ +TEST (T, CMP, s, 32, 2, , f) \ +TEST (T, CMP, s, 32, 4, q, f) \ +TEST (T, CMP, d, 64, 2, q, ) + +BUILD_VARIANTS (ge, >=) +/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "facge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +BUILD_VARIANTS (gt, >) +/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "facgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +/* No need for another scan-assembler as these tests + also generate facge, facgt instructions. */ +BUILD_VARIANTS (le, <=) +BUILD_VARIANTS (lt, <) + +#undef TEST +#define TEST(T, CMP, SUFFIX, WIDTH, LANES, Q, F) \ +if (test_vca##T##_float##WIDTH##x##LANES##_t ()) \ + abort (); + +int +main (int argc, char **argv) +{ +BUILD_VARIANTS (ge, >=) +BUILD_VARIANTS (gt, >) +BUILD_VARIANTS (le, <=) +BUILD_VARIANTS (lt, <) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vrnd.c @@ -0,0 +1,117 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +extern double trunc (double); +extern double round (double); +extern double nearbyint (double); +extern double floor (double); +extern double ceil (double); +extern double rint (double); + +extern float truncf (float); +extern float roundf (float); +extern float nearbyintf (float); +extern float floorf (float); +extern float ceilf (float); +extern float rintf (float); + +#define NUM_TESTS 8 +#define DELTA 0.000001 + +float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f}; +double input_f64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5}; + +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +int \ +test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t (void) \ +{ \ + int ret = 1; \ + int i = 0; \ + int nlanes = LANES; \ + float##WIDTH##_t expected_out[NUM_TESTS]; \ + float##WIDTH##_t actual_out[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + expected_out[i] = C_FN##F (input_f##WIDTH[i]); \ + /* Don't vectorize this. */ \ + asm volatile ("" : : : "memory"); \ + } \ + \ + /* Prevent the compiler from noticing these two loops do the same \ + thing and optimizing away the comparison. */ \ + asm volatile ("" : : : "memory"); \ + \ + for (i = 0; i < NUM_TESTS; i+=nlanes) \ + { \ + float##WIDTH##x##LANES##_t out = \ + vrnd##SUFFIX##Q##_f##WIDTH \ + (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \ + vst1##Q##_f##WIDTH (actual_out + i, out); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret &= fabs##F (expected_out[i] - actual_out[i]) < DELTA; \ + \ + return ret; \ +} \ + + +#define BUILD_VARIANTS(SUFFIX, C_FN) \ +TEST (SUFFIX, , 32, 2, C_FN, f) \ +TEST (SUFFIX, q, 32, 4, C_FN, f) \ +TEST (SUFFIX, q, 64, 2, C_FN, ) \ + +BUILD_VARIANTS ( , trunc) +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintz\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (a, round) +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinta\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (i, nearbyint) +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frinti\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (m, floor) +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintm\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (p, ceil) +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintp\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (x, rint) +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "frintx\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, WIDTH, LANES, C_FN, F) \ +{ \ + if (!test_vrnd##SUFFIX##_float##WIDTH##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS ( , trunc) + BUILD_VARIANTS (a, round) + BUILD_VARIANTS (i, nearbyint) + BUILD_VARIANTS (m, floor) + BUILD_VARIANTS (p, ceil) + BUILD_VARIANTS (x, rint) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-relaxed.x" -int -atomic_fetch_add_RELAXED (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_sub_RELAXED (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_and_RELAXED (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_nand_RELAXED (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_xor_RELAXED (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_or_RELAXED (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); -} - /* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/aes_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/aes_1.c @@ -0,0 +1,40 @@ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+crypto" } */ + +#include "arm_neon.h" + +uint8x16_t +test_vaeseq_u8 (uint8x16_t data, uint8x16_t key) +{ + return vaeseq_u8 (data, key); +} + +/* { dg-final { scan-assembler-times "aese\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + +uint8x16_t +test_vaesdq_u8 (uint8x16_t data, uint8x16_t key) +{ + return vaesdq_u8 (data, key); +} + +/* { dg-final { scan-assembler-times "aesd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + +uint8x16_t +test_vaesmcq_u8 (uint8x16_t data) +{ + return vaesmcq_u8 (data); +} + +/* { dg-final { scan-assembler-times "aesmc\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + +uint8x16_t +test_vaesimcq_u8 (uint8x16_t data) +{ + return vaesimcq_u8 (data); +} + +/* { dg-final { scan-assembler-times "aesimc\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm.x @@ -13,6 +13,8 @@ 2.0, -4.0, 8.0, -16.0, -2.125, 4.25, -8.5, 17.0}; +/* Float comparisons, float results. */ + void foo (FTYPE *in1, FTYPE *in2, FTYPE *output) { @@ -49,11 +51,52 @@ output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0; } +/* Float comparisons, int results. */ + +void +foo_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP in2[i]) ? 2 : 4; +} + +void +bar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] INV_OP in2[i]) ? 4 : 2; +} + +void +foobar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] OP 0.0) ? 4 : 2; +} + +void +foobarbar_int (FTYPE *in1, FTYPE *in2, ITYPE *output) +{ + int i = 0; + /* Vectorizable. */ + for (i = 0; i < N; i++) + output[i] = (in1[i] INV_OP 0.0) ? 4 : 2; +} + int main (int argc, char **argv) { FTYPE out1[N]; FTYPE out2[N]; + ITYPE outi1[N]; + ITYPE outi2[N]; + int i = 0; foo (input1, input2, out1); bar (input1, input2, out2); @@ -65,6 +108,17 @@ for (i = 0; i < N; i++) if (out1[i] == out2[i]) abort (); + + foo_int (input1, input2, outi1); + bar_int (input1, input2, outi2); + for (i = 0; i < N; i++) + if (outi1[i] != outi2[i]) + abort (); + foobar_int (input1, input2, outi1); + foobarbar_int (input1, input2, outi2); + for (i = 0; i < N; i++) + if (outi1[i] == outi2[i]) + abort (); return 0; } --- a/src/gcc/testsuite/gcc.target/aarch64/movi_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/movi_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void +dummy (short* b) +{ + /* { dg-final { scan-assembler "movi\tv\[0-9\]+\.4h, 0x4, lsl 8" } } */ + /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 0x400" } } */ + /* { dg-final { scan-assembler-not "movi\tv\[0-9\]+\.4h, 1024" } } */ + register short x asm ("h8") = 1024; + asm volatile ("" : : "w" (x)); + *b = x; +} --- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c @@ -0,0 +1,11 @@ + +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" + +#include "vaddv-intrinsic.x" + +/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */ +/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */ +/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c @@ -0,0 +1,101 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +extern void abort (void); + +#define ETYPE(size) int##size##_t +#define VTYPE(size, lanes) int##size##x##lanes##_t + +#define TEST_VABS(q, size, lanes) \ +static void \ +test_vabs##q##_##size (ETYPE (size) * res, \ + const ETYPE (size) *in1) \ +{ \ + VTYPE (size, lanes) a = vld1##q##_s##size (res); \ + VTYPE (size, lanes) b = vld1##q##_s##size (in1); \ + a = vabs##q##_s##size (b); \ + vst1##q##_s##size (res, a); \ +} + +#define BUILD_VARS(width, n_lanes, n_half_lanes) \ +TEST_VABS (, width, n_half_lanes) \ +TEST_VABS (q, width, n_lanes) \ + +BUILD_VARS (64, 2, 1) +BUILD_VARS (32, 4, 2) +BUILD_VARS (16, 8, 4) +BUILD_VARS (8, 16, 8) + +#define POOL1 {-10} +#define POOL2 {2, -10} +#define POOL4 {0, -10, 2, -3} +#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70} +#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \ + -5, 10, -2, 3, -4, 50, -6, 70} + +#define EXPECTED1 {10} +#define EXPECTED2 {2, 10} +#define EXPECTED4 {0, 10, 2, 3} +#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70} +#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \ + 5, 10, 2, 3, 4, 50, 6, 70} + +#define BUILD_TEST(size, lanes_64, lanes_128) \ +static void \ +test_##size (void) \ +{ \ + int i; \ + ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \ + ETYPE (size) res1[lanes_64] = {0}; \ + ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \ + ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \ + ETYPE (size) res2[lanes_128] = {0}; \ + ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \ + \ + /* Forcefully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabs_##size (res1, pool1); \ + for (i = 0; i < lanes_64; i++) \ + if (res1[i] != expected1[i]) \ + abort (); \ + \ + /* Forcefully avoid optimization. */ \ + asm volatile ("" : : : "memory"); \ + test_vabsq_##size (res2, pool2); \ + for (i = 0; i < lanes_128; i++) \ + if (res2[i] != expected2[i]) \ + abort (); \ +} + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ +BUILD_TEST (8 , 8, 16) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */ +BUILD_TEST (16, 4, 8) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */ +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */ +BUILD_TEST (32, 2, 4) + +/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ +BUILD_TEST (64, 1, 2) + +#undef BUILD_TEST + +#define BUILD_TEST(size) test_##size () + +int +main (int argc, char **argv) +{ + BUILD_TEST (8); + BUILD_TEST (16); + BUILD_TEST (32); + BUILD_TEST (64); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_RELAXED (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_sub_RELAXED (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_and_RELAXED (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_nand_RELAXED (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_xor_RELAXED (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_or_RELAXED (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect.c @@ -55,6 +55,8 @@ int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15}; unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int reduce_smax_value = 0; int reduce_smin_value = -15; unsigned int reduce_umax_value = 15; @@ -81,6 +83,8 @@ TEST (smin, s); TEST (umax, u); TEST (umin, u); + TEST (sabd, s); + TEST (saba, s); TESTV (reduce_smax, s); TESTV (reduce_smin, s); TESTV (reduce_umax, u); --- a/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c +++ b/src/gcc/testsuite/gcc.target/aarch64/scalar-mov.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-g -mgeneral-regs-only" } */ + +void +foo (const char *c, ...) +{ + char buf[256]; + buf[256 - 1] = '\0'; +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect-movi.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-movi.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -fno-inline" } */ + +extern void abort (void); + +#define N 16 + +static void +movi_msl8 (int *__restrict a) +{ + int i; + + /* { dg-final { scan-assembler "movi\\tv\[0-9\]+\.4s, 0xab, msl 8" } } */ + for (i = 0; i < N; i++) + a[i] = 0xabff; +} + +static void +movi_msl16 (int *__restrict a) +{ + int i; + + /* { dg-final { scan-assembler "movi\\tv\[0-9\]+\.4s, 0xab, msl 16" } } */ + for (i = 0; i < N; i++) + a[i] = 0xabffff; +} + +static void +mvni_msl8 (int *__restrict a) +{ + int i; + + /* { dg-final { scan-assembler "mvni\\tv\[0-9\]+\.4s, 0xab, msl 8" } } */ + for (i = 0; i < N; i++) + a[i] = 0xffff5400; +} + +static void +mvni_msl16 (int *__restrict a) +{ + int i; + + /* { dg-final { scan-assembler "mvni\\tv\[0-9\]+\.4s, 0xab, msl 16" } } */ + for (i = 0; i < N; i++) + a[i] = 0xff540000; +} + +int +main (void) +{ + int a[N] = { 0 }; + int i; + +#define CHECK_ARRAY(a, val) \ + for (i = 0; i < N; i++) \ + if (a[i] != val) \ + abort (); + + movi_msl8 (a); + CHECK_ARRAY (a, 0xabff); + + movi_msl16 (a); + CHECK_ARRAY (a, 0xabffff); + + mvni_msl8 (a); + CHECK_ARRAY (a, 0xffff5400); + + mvni_msl16 (a); + CHECK_ARRAY (a, 0xff540000); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP >= #define INV_OP < #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-acquire.x" -int -atomic_fetch_add_ACQUIRE (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE); -} - -int -atomic_fetch_sub_ACQUIRE (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE); -} - -int -atomic_fetch_and_ACQUIRE (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE); -} - -int -atomic_fetch_nand_ACQUIRE (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE); -} - -int -atomic_fetch_xor_ACQUIRE (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE); -} - -int -atomic_fetch_or_ACQUIRE (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE); -} - /* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/abs_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/abs_1.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline --save-temps" } */ + +extern long long llabs (long long); +extern void abort (void); + +long long +abs64 (long long a) +{ + /* { dg-final { scan-assembler "eor\t" } } */ + /* { dg-final { scan-assembler "sub\t" } } */ + return llabs (a); +} + +long long +abs64_in_dreg (long long a) +{ + /* { dg-final { scan-assembler "abs\td\[0-9\]+, d\[0-9\]+" } } */ + register long long x asm ("d8") = a; + register long long y asm ("d9"); + asm volatile ("" : : "w" (x)); + y = llabs (x); + asm volatile ("" : : "w" (y)); + return y; +} + +int +main (void) +{ + volatile long long ll0 = 0LL, ll1 = 1LL, llm1 = -1LL; + + if (abs64 (ll0) != 0LL) + abort (); + + if (abs64 (ll1) != 1LL) + abort (); + + if (abs64 (llm1) != 1LL) + abort (); + + if (abs64_in_dreg (ll0) != 0LL) + abort (); + + if (abs64_in_dreg (ll1) != 1LL) + abort (); + + if (abs64_in_dreg (llm1) != 1LL) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c @@ -1,41 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -#define STRONG 0 -#define WEAK 1 -int v = 0; +#include "atomic-comp-swap-release-acquire.x" -int -atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b) -{ - return __atomic_compare_exchange (&v, &a, &b, - STRONG, __ATOMIC_RELEASE, - __ATOMIC_ACQUIRE); -} - -int -atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b) -{ - return __atomic_compare_exchange (&v, &a, &b, - WEAK, __ATOMIC_RELEASE, - __ATOMIC_ACQUIRE); -} - -int -atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b) -{ - return __atomic_compare_exchange_n (&v, &a, b, - STRONG, __ATOMIC_RELEASE, - __ATOMIC_ACQUIRE); -} - -int -atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b) -{ - return __atomic_compare_exchange_n (&v, &a, b, - WEAK, __ATOMIC_RELEASE, - __ATOMIC_ACQUIRE); -} - /* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */ /* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 4 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect.x +++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x @@ -138,3 +138,17 @@ return s; } + +void sabd (pRINT a, pRINT b, pRINT c) +{ + int i; + for (i = 0; i < 16; i++) + c[i] = abs (a[i] - b[i]); +} + +void saba (pRINT a, pRINT b, pRINT c) +{ + int i; + for (i = 0; i < 16; i++) + c[i] += abs (a[i] - b[i]); +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps -fno-inline" } */ + +extern void abort (); + +void +count_lz_v4si (unsigned *__restrict a, int *__restrict b) +{ + int i; + + for (i = 0; i < 4; i++) + b[i] = __builtin_clz (a[i]); +} + +/* { dg-final { scan-assembler "clz\tv\[0-9\]+\.4s" } } */ + +int +main () +{ + unsigned int x[4] = { 0x0, 0xFFFF, 0x1FFFF, 0xFFFFFFFF }; + int r[4] = { 32, 16, 15, 0 }; + int d[4], i; + + count_lz_v4si (x, d); + + for (i = 0; i < 4; i++) + { + if (d[i] != r[i]) + abort (); + } + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/sha256_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/sha256_1.c @@ -0,0 +1,40 @@ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+crypto" } */ + +#include "arm_neon.h" + +uint32x4_t +test_vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) +{ + return vsha256hq_u32 (hash_abcd, hash_efgh, wk); +} + +/* { dg-final { scan-assembler-times "sha256h\\tq" 1 } } */ + +uint32x4_t +test_vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) +{ + return vsha256h2q_u32 (hash_efgh, hash_abcd, wk); +} + +/* { dg-final { scan-assembler-times "sha256h2\\tq" 1 } } */ + +uint32x4_t +test_vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) +{ + return vsha256su0q_u32 (w0_3, w4_7); +} + +/* { dg-final { scan-assembler-times "sha256su0\\tv" 1 } } */ + +uint32x4_t +test_vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) +{ + return vsha256su1q_u32 (tw0_3, w8_11, w12_15); +} + +/* { dg-final { scan-assembler-times "sha256su1\\tv" 1 } } */ + + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP > #define INV_OP <= #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/subs3.c +++ b/src/gcc/testsuite/gcc.target/aarch64/subs3.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); +typedef long long s64; + +int +subs_ext (s64 a, int b, int c) +{ + s64 d = a - b; + + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +subs_shift_ext (s64 a, int b, int c) +{ + s64 d = (a - ((s64)b << 3)); + + if (d == 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = subs_ext (0x13000002ll, 41, 15); + if (x != 318767121) + abort (); + + x = subs_ext (0x50505050ll, 29, 4); + if (x != 1347440724) + abort (); + + x = subs_ext (0x12121212121ll, 2, 14); + if (x != 555819311) + abort (); + + x = subs_shift_ext (0x123456789ll, 4, 12); + if (x != 591751033) + abort (); + + x = subs_shift_ext (0x02020202ll, 9, 8); + if (x != 33685963) + abort (); + + x = subs_shift_ext (0x987987987987ll, 23, 41); + if (x != -2020050673) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, sxtw" 2 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/bics_2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/bics_2.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +bics_si_test1 (int a, int b, int c) +{ + int d = a & ~b; + + /* { dg-final { scan-assembler-not "bics\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + /* { dg-final { scan-assembler-times "bic\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +bics_si_test2 (int a, int b, int c) +{ + int d = a & ~(b << 3); + + /* { dg-final { scan-assembler-not "bics\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "bic\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +bics_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a & ~b; + + /* { dg-final { scan-assembler-not "bics\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + /* { dg-final { scan-assembler-times "bic\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +bics_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a & ~(b << 3); + + /* { dg-final { scan-assembler-not "bics\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "bic\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +main () +{ + int x; + s64 y; + + x = bics_si_test1 (29, ~4, 5); + if (x != ((29 & 4) + ~4 + 5)) + abort (); + + x = bics_si_test1 (5, ~2, 20); + if (x != 25) + abort (); + + x = bics_si_test2 (35, ~4, 5); + if (x != ((35 & ~(~4 << 3)) + ~4 + 5)) + abort (); + + x = bics_si_test2 (96, ~2, 20); + if (x != 116) + abort (); + + y = bics_di_test1 (0x130000029ll, + ~0x320000004ll, + 0x505050505ll); + + if (y != ((0x130000029ll & 0x320000004ll) + ~0x320000004ll + 0x505050505ll)) + abort (); + + y = bics_di_test1 (0x5000500050005ll, + ~0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x5000500052025ll) + abort (); + + y = bics_di_test2 (0x130000029ll, + ~0x064000008ll, + 0x505050505ll); + if (y != ((0x130000029ll & ~(~0x064000008ll << 3)) + + ~0x064000008ll + 0x505050505ll)) + abort (); + + y = bics_di_test2 (0x130002900ll, + ~0x088000008ll, + 0x505050505ll); + if (y != (0x130002900ll + 0x505050505ll)) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c @@ -0,0 +1,28 @@ + +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +#include "vaddv-intrinsic.x" + +int +main (void) +{ + const float32_t pool_v2sf[] = {4.0f, 9.0f}; + const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f}; + const float64_t pool_v2df[] = {4.0, 9.0}; + + if (test_vaddv_v2sf (pool_v2sf) != 13.0f) + abort (); + + if (test_vaddv_v4sf (pool_v4sf) != 54.0f) + abort (); + + if (test_vaddv_v2df (pool_v2df) != 13.0) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_ACQUIRE (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_ACQUIRE); +} + +int +atomic_fetch_sub_ACQUIRE (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_ACQUIRE); +} + +int +atomic_fetch_and_ACQUIRE (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_ACQUIRE); +} + +int +atomic_fetch_nand_ACQUIRE (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_ACQUIRE); +} + +int +atomic_fetch_xor_ACQUIRE (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_ACQUIRE); +} + +int +atomic_fetch_or_ACQUIRE (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_ACQUIRE); +} --- a/src/gcc/testsuite/gcc.target/aarch64/sbc.c +++ b/src/gcc/testsuite/gcc.target/aarch64/sbc.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +extern void abort (void); + +typedef unsigned int u32int; +typedef unsigned long long u64int; + +u32int +test_si (u32int w1, u32int w2, u32int w3, u32int w4) +{ + u32int w0; + /* { dg-final { scan-assembler "sbc\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+\n" } } */ + w0 = w1 - w2 - (w3 < w4); + return w0; +} + +u64int +test_di (u64int x1, u64int x2, u64int x3, u64int x4) +{ + u64int x0; + /* { dg-final { scan-assembler "sbc\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+\n" } } */ + x0 = x1 - x2 - (x3 < x4); + return x0; +} + +int +main () +{ + u32int x; + u64int y; + x = test_si (7, 8, 12, 15); + if (x != -2) + abort(); + y = test_di (0x987654321ll, 0x123456789ll, 0x345345345ll, 0x123123123ll); + if (y != 0x8641fdb98ll) + abort(); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/pmull_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/pmull_1.c @@ -0,0 +1,23 @@ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+crypto" } */ + +#include "arm_neon.h" + +poly128_t +test_vmull_p64 (poly64_t a, poly64_t b) +{ + return vmull_p64 (a, b); +} + +/* { dg-final { scan-assembler-times "pmull\\tv" 1 } } */ + +poly128_t +test_vmull_high_p64 (poly64x2_t a, poly64x2_t b) +{ + return vmull_high_p64 (a, b); +} + +/* { dg-final { scan-assembler-times "pmull2\\tv" 1 } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.x @@ -0,0 +1,36 @@ + +#define STRONG 0 +#define WEAK 1 +int v = 0; + +int +atomic_compare_exchange_STRONG_RELEASE_ACQUIRE (int a, int b) +{ + return __atomic_compare_exchange (&v, &a, &b, + STRONG, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); +} + +int +atomic_compare_exchange_WEAK_RELEASE_ACQUIRE (int a, int b) +{ + return __atomic_compare_exchange (&v, &a, &b, + WEAK, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); +} + +int +atomic_compare_exchange_n_STRONG_RELEASE_ACQUIRE (int a, int b) +{ + return __atomic_compare_exchange_n (&v, &a, b, + STRONG, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); +} + +int +atomic_compare_exchange_n_WEAK_RELEASE_ACQUIRE (int a, int b) +{ + return __atomic_compare_exchange_n (&v, &a, b, + WEAK, __ATOMIC_RELEASE, + __ATOMIC_ACQUIRE); +} --- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -dp" } */ #include @@ -32,6 +32,18 @@ vqaddd_s64 (a, d)); } +/* { dg-final { scan-assembler-times "\\tabs\\td\[0-9\]+, d\[0-9\]+" 1 } } */ + +int64x1_t +test_vabs_s64 (int64x1_t a) +{ + uint64x1_t res; + force_simd (a); + res = vabs_s64 (a); + force_simd (res); + return res; +} + /* { dg-final { scan-assembler-times "\\tcmeq\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */ uint64x1_t @@ -181,7 +193,7 @@ return res; } -/* { dg-final { scan-assembler-times "\\tdup\\tb\[0-9\]+, v\[0-9\]+\.b" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev16qi" 2 } } */ int8x1_t test_vdupb_lane_s8 (int8x16_t a) @@ -195,7 +207,7 @@ return vdupb_lane_u8 (a, 2); } -/* { dg-final { scan-assembler-times "\\tdup\\th\[0-9\]+, v\[0-9\]+\.h" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */ int16x1_t test_vduph_lane_s16 (int16x8_t a) @@ -209,7 +221,7 @@ return vduph_lane_u16 (a, 2); } -/* { dg-final { scan-assembler-times "\\tdup\\ts\[0-9\]+, v\[0-9\]+\.s" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */ int32x1_t test_vdups_lane_s32 (int32x4_t a) @@ -223,18 +235,18 @@ return vdups_lane_u32 (a, 2); } -/* { dg-final { scan-assembler-times "\\tdup\\td\[0-9\]+, v\[0-9\]+\.d" 2 } } */ +/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */ int64x1_t test_vdupd_lane_s64 (int64x2_t a) { - return vdupd_lane_s64 (a, 2); + return vdupd_lane_s64 (a, 1); } uint64x1_t test_vdupd_lane_u64 (uint64x2_t a) { - return vdupd_lane_u64 (a, 2); + return vdupd_lane_u64 (a, 1); } /* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-int.x" -int -atomic_fetch_add_RELAXED (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_sub_RELAXED (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_and_RELAXED (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_nand_RELAXED (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_xor_RELAXED (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); -} - -int -atomic_fetch_or_RELAXED (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); -} - /* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/cmn-neg.c +++ b/src/gcc/testsuite/gcc.target/aarch64/cmn-neg.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +extern void abort (void); + +void __attribute__ ((noinline)) +foo_s32 (int a, int b) +{ + if (a < -b) + abort (); +} +/* { dg-final { scan-assembler "cmn\tw\[0-9\]" } } */ + +void __attribute__ ((noinline)) +foo_s64 (long long a, long long b) +{ + if (a < -b) + abort (); +} +/* { dg-final { scan-assembler "cmn\tx\[0-9\]" } } */ + + +int +main (void) +{ + int a = 30; + int b = 42; + foo_s32 (a, b); + foo_s64 (a, b); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-seq_cst.x" -int -atomic_fetch_add_SEQ_CST (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST); -} - -int -atomic_fetch_sub_SEQ_CST (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST); -} - -int -atomic_fetch_and_SEQ_CST (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST); -} - -int -atomic_fetch_nand_SEQ_CST (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST); -} - -int -atomic_fetch_xor_SEQ_CST (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST); -} - -int -atomic_fetch_or_SEQ_CST (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST); -} - /* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x +++ b/src/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x @@ -0,0 +1,27 @@ + +float32_t +test_vaddv_v2sf (const float32_t *pool) +{ + float32x2_t val; + + val = vld1_f32 (pool); + return vaddv_f32 (val); +} + +float32_t +test_vaddv_v4sf (const float32_t *pool) +{ + float32x4_t val; + + val = vld1q_f32 (pool); + return vaddvq_f32 (val); +} + +float64_t +test_vaddv_v2df (const float64_t *pool) +{ + float64x2_t val; + + val = vld1q_f64 (pool); + return vaddvq_f64 (val); +} --- a/src/gcc/testsuite/gcc.target/aarch64/negs.c +++ b/src/gcc/testsuite/gcc.target/aarch64/negs.c @@ -0,0 +1,108 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps" } */ + +extern void abort (void); +int z; + +int +negs_si_test1 (int a, int b, int c) +{ + int d = -b; + + /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+" } } */ + if (d < 0) + return a + c; + + z = d; + return b + c + d; +} + +int +negs_si_test3 (int a, int b, int c) +{ + int d = -(b) << 3; + + /* { dg-final { scan-assembler "negs\tw\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + + z = d; + return b + c + d; +} + +typedef long long s64; +s64 zz; + +s64 +negs_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = -b; + + /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+" } } */ + if (d < 0) + return a + c; + + zz = d; + return b + c + d; +} + +s64 +negs_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = -(b) << 3; + + /* { dg-final { scan-assembler "negs\tx\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + + zz = d; + return b + c + d; +} + +int main () +{ + int x; + s64 y; + + x = negs_si_test1 (2, 12, 5); + if (x != 7) + abort (); + + x = negs_si_test1 (1, 2, 32); + if (x != 33) + abort (); + + x = negs_si_test3 (13, 14, 5); + if (x != -93) + abort (); + + x = negs_si_test3 (15, 21, 2); + if (x != -145) + abort (); + + y = negs_di_test1 (0x20202020ll, + 0x65161611ll, + 0x42434243ll); + if (y != 0x62636263ll) + abort (); + + y = negs_di_test1 (0x1010101010101ll, + 0x123456789abcdll, + 0x5555555555555ll); + if (y != 0x6565656565656ll) + abort (); + + y = negs_di_test3 (0x62523781ll, + 0x64234978ll, + 0x12345123ll); + if (y != 0xfffffffd553d4edbll) + abort (); + + y = negs_di_test3 (0x763526268ll, + 0x101010101ll, + 0x222222222ll); + if (y != 0xfffffffb1b1b1b1bll) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-consume.x" -int -atomic_fetch_add_CONSUME (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME); -} - -int -atomic_fetch_sub_CONSUME (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME); -} - -int -atomic_fetch_and_CONSUME (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME); -} - -int -atomic_fetch_nand_CONSUME (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME); -} - -int -atomic_fetch_xor_CONSUME (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME); -} - -int -atomic_fetch_or_CONSUME (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME); -} - /* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vaddv.c @@ -0,0 +1,128 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -ffast-math" } */ + +#include + +extern void abort (void); +extern float fabsf (float); +extern double fabs (double); + +#define NUM_TESTS 16 +#define DELTA 0.000001 + +int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; +int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76, + -4, 34, 110, -110, 6, 4, 75, -34}; + +uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; +uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; + +uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76, + 4, 34, 110, 110, 6, 4, 75, 34}; + +float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f, + 7.9f, -870.0f, 10.4f, 310.11f, + 0.0f, -865.0f, -2213.0f, -1.5f}; + +double input_float64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5, + 7.9, -870.0, 10.4, 310.11, + 0.0, -865.0, -2213.0, -1.5}; + +#define EQUALF(a, b) (fabsf (a - b) < DELTA) +#define EQUALD(a, b) (fabs (a - b) < DELTA) +#define EQUALL(a, b) (a == b) + +#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \ +int \ +test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \ +{ \ + int i, j; \ + int moves = (NUM_TESTS - LANES) + 1; \ + TYPE##_t out_l[NUM_TESTS]; \ + TYPE##_t out_v[NUM_TESTS]; \ + \ + /* Calculate linearly. */ \ + for (i = 0; i < moves; i++) \ + { \ + out_l[i] = input_##TYPE[i]; \ + for (j = 1; j < LANES; j++) \ + out_l[i] += input_##TYPE[i + j]; \ + } \ + \ + /* Calculate using vector reduction intrinsics. */ \ + for (i = 0; i < moves; i++) \ + { \ + TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \ + out_v[i] = vaddv##Q##_##SUFFIX (t1); \ + } \ + \ + /* Compare. */ \ + for (i = 0; i < moves; i++) \ + { \ + if (!EQUAL##FLOAT (out_v[i], out_l[i])) \ + return 0; \ + } \ + return 1; \ +} + +#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \ +TEST (STYPE, , TYPE, W32, F) \ +TEST (STYPE, q, TYPE, W64, F) \ + +BUILD_VARIANTS (int8, s8, 8, 16, L) +BUILD_VARIANTS (uint8, u8, 8, 16, L) +/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */ +/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */ +BUILD_VARIANTS (int16, s16, 4, 8, L) +BUILD_VARIANTS (uint16, u16, 4, 8, L) +/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */ +/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */ +BUILD_VARIANTS (int32, s32, 2, 4, L) +BUILD_VARIANTS (uint32, u32, 2, 4, L) +/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +TEST (s64, q, int64, 2, D) +TEST (u64, q, uint64, 2, D) +/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */ + +BUILD_VARIANTS (float32, f32, 2, 4, F) +/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +TEST (f64, q, float64, 2, D) +/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \ +{ \ + if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ +BUILD_VARIANTS (int8, s8, 8, 16, L) +BUILD_VARIANTS (uint8, u8, 8, 16, L) +BUILD_VARIANTS (int16, s16, 4, 8, L) +BUILD_VARIANTS (uint16, u16, 4, 8, L) +BUILD_VARIANTS (int32, s32, 2, 4, L) +BUILD_VARIANTS (uint32, u32, 2, 4, L) + +BUILD_VARIANTS (float32, f32, 2, 4, F) +TEST (f64, q, float64, 2, D) + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -char v = 0; +#include "atomic-op-char.x" -char -atomic_fetch_add_RELAXED (char a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); -} - -char -atomic_fetch_sub_RELAXED (char a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); -} - -char -atomic_fetch_and_RELAXED (char a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); -} - -char -atomic_fetch_nand_RELAXED (char a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); -} - -char -atomic_fetch_xor_RELAXED (char a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); -} - -char -atomic_fetch_or_RELAXED (char a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); -} - /* { dg-final { scan-assembler-times "ldxrb\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxrb\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-int.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_RELAXED (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_sub_RELAXED (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_and_RELAXED (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_nand_RELAXED (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_xor_RELAXED (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); +} + +int +atomic_fetch_or_RELAXED (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); +} --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_SEQ_CST (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_SEQ_CST); +} + +int +atomic_fetch_sub_SEQ_CST (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_SEQ_CST); +} + +int +atomic_fetch_and_SEQ_CST (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_SEQ_CST); +} + +int +atomic_fetch_nand_SEQ_CST (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_SEQ_CST); +} + +int +atomic_fetch_xor_SEQ_CST (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_SEQ_CST); +} + +int +atomic_fetch_or_SEQ_CST (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_SEQ_CST); +} --- a/src/gcc/testsuite/gcc.target/aarch64/bfxil_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/bfxil_1.c @@ -0,0 +1,40 @@ +/* { dg-do run { target aarch64*-*-* } } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +extern void abort (void); + +typedef struct bitfield +{ + unsigned short eight1: 8; + unsigned short four: 4; + unsigned short eight2: 8; + unsigned short seven: 7; + unsigned int sixteen: 16; +} bitfield; + +bitfield +bfxil (bitfield a) +{ + /* { dg-final { scan-assembler "bfxil\tx\[0-9\]+, x\[0-9\]+, 16, 8" } } */ + a.eight1 = a.eight2; + return a; +} + +int +main (void) +{ + static bitfield a; + bitfield b; + + a.eight1 = 9; + a.eight2 = 57; + b = bfxil (a); + + if (b.eight1 != a.eight2) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_CONSUME (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_CONSUME); +} + +int +atomic_fetch_sub_CONSUME (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_CONSUME); +} + +int +atomic_fetch_and_CONSUME (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_CONSUME); +} + +int +atomic_fetch_nand_CONSUME (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_CONSUME); +} + +int +atomic_fetch_xor_CONSUME (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_CONSUME); +} + +int +atomic_fetch_or_CONSUME (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_CONSUME); +} --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -short v = 0; +#include "atomic-op-short.x" -short -atomic_fetch_add_RELAXED (short a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); -} - -short -atomic_fetch_sub_RELAXED (short a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); -} - -short -atomic_fetch_and_RELAXED (short a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); -} - -short -atomic_fetch_nand_RELAXED (short a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); -} - -short -atomic_fetch_xor_RELAXED (short a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); -} - -short -atomic_fetch_or_RELAXED (short a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); -} - /* { dg-final { scan-assembler-times "ldxrh\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stxrh\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-char.x @@ -0,0 +1,37 @@ +char v = 0; + +char +atomic_fetch_add_RELAXED (char a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); +} + +char +atomic_fetch_sub_RELAXED (char a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); +} + +char +atomic_fetch_and_RELAXED (char a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); +} + +char +atomic_fetch_nand_RELAXED (char a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); +} + +char +atomic_fetch_xor_RELAXED (char a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); +} + +char +atomic_fetch_or_RELAXED (char a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP == #define INV_OP != #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c @@ -11,3 +11,4 @@ /* { dg-final { scan-assembler "fdiv\\tv" } } */ /* { dg-final { scan-assembler "fneg\\tv" } } */ /* { dg-final { scan-assembler "fabs\\tv" } } */ +/* { dg-final { scan-assembler "fabd\\tv" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/adds1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/adds1.c @@ -0,0 +1,149 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +adds_si_test1 (int a, int b, int c) +{ + int d = a + b; + + /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +adds_si_test2 (int a, int b, int c) +{ + int d = a + 0xff; + + /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, 255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +adds_si_test3 (int a, int b, int c) +{ + int d = a + (b << 3); + + /* { dg-final { scan-assembler "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +adds_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a + b; + + /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +adds_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a + 0xff; + + /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, 255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +adds_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a + (b << 3); + + /* { dg-final { scan-assembler "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = adds_si_test1 (29, 4, 5); + if (x != 42) + abort (); + + x = adds_si_test1 (5, 2, 20); + if (x != 29) + abort (); + + x = adds_si_test2 (29, 4, 5); + if (x != 293) + abort (); + + x = adds_si_test2 (1024, 2, 20); + if (x != 1301) + abort (); + + x = adds_si_test3 (35, 4, 5); + if (x != 76) + abort (); + + x = adds_si_test3 (5, 2, 20); + if (x != 43) + abort (); + + y = adds_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != 0xc75050536) + abort (); + + y = adds_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x9222922294249) + abort (); + + y = adds_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x955050631) + abort (); + + y = adds_di_test2 (0x130002900ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x955052f08) + abort (); + + y = adds_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != 0x9b9050576) + abort (); + + y = adds_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != 0xafd052e4d) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/insv_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/insv_1.c @@ -0,0 +1,85 @@ +/* { dg-do run { target aarch64*-*-* } } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +extern void abort (void); + +typedef struct bitfield +{ + unsigned short eight: 8; + unsigned short four: 4; + unsigned short five: 5; + unsigned short seven: 7; + unsigned int sixteen: 16; +} bitfield; + +bitfield +bfi1 (bitfield a) +{ + /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 0, 8" } } */ + a.eight = 3; + return a; +} + +bitfield +bfi2 (bitfield a) +{ + /* { dg-final { scan-assembler "bfi\tx\[0-9\]+, x\[0-9\]+, 16, 5" } } */ + a.five = 7; + return a; +} + +bitfield +movk (bitfield a) +{ + /* { dg-final { scan-assembler "movk\tx\[0-9\]+, 0x1d6b, lsl 32" } } */ + a.sixteen = 7531; + return a; +} + +bitfield +set1 (bitfield a) +{ + /* { dg-final { scan-assembler "orr\tx\[0-9\]+, x\[0-9\]+, 2031616" } } */ + a.five = 0x1f; + return a; +} + +bitfield +set0 (bitfield a) +{ + /* { dg-final { scan-assembler "and\tx\[0-9\]+, x\[0-9\]+, -2031617" } } */ + a.five = 0; + return a; +} + + +int +main (int argc, char** argv) +{ + static bitfield a; + bitfield b = bfi1 (a); + bitfield c = bfi2 (b); + bitfield d = movk (c); + + if (d.eight != 3) + abort (); + + if (d.five != 7) + abort (); + + if (d.sixteen != 7531) + abort (); + + d = set1 (d); + if (d.five != 0x1f) + abort (); + + d = set0 (d); + if (d.five != 0) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/ror.c +++ b/src/gcc/testsuite/gcc.target/aarch64/ror.c @@ -0,0 +1,34 @@ +/* { dg-options "-O2 --save-temps" } */ +/* { dg-do run } */ + +extern void abort (void); + +int +test_si (int a) +{ + /* { dg-final { scan-assembler "ror\tw\[0-9\]+, w\[0-9\]+, 27\n" } } */ + return (a << 5) | ((unsigned int) a >> 27); +} + +long long +test_di (long long a) +{ + /* { dg-final { scan-assembler "ror\tx\[0-9\]+, x\[0-9\]+, 45\n" } } */ + return (a << 19) | ((unsigned long long) a >> 45); +} + +int +main () +{ + int v; + long long w; + v = test_si (0x0203050); + if (v != 0x4060a00) + abort(); + w = test_di (0x0000020506010304ll); + if (w != 0x1028300818200000ll) + abort(); + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/ands_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/ands_1.c @@ -0,0 +1,151 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +ands_si_test1 (int a, int b, int c) +{ + int d = a & b; + + /* { dg-final { scan-assembler-times "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" 2 } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +ands_si_test2 (int a, int b, int c) +{ + int d = a & 0xff; + + /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, 255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +ands_si_test3 (int a, int b, int c) +{ + int d = a & (b << 3); + + /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +ands_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a & b; + + /* { dg-final { scan-assembler-times "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +ands_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a & 0xff; + + /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, 255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +ands_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a & (b << 3); + + /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +main () +{ + int x; + s64 y; + + x = ands_si_test1 (29, 4, 5); + if (x != 13) + abort (); + + x = ands_si_test1 (5, 2, 20); + if (x != 25) + abort (); + + x = ands_si_test2 (29, 4, 5); + if (x != 38) + abort (); + + x = ands_si_test2 (1024, 2, 20); + if (x != 1044) + abort (); + + x = ands_si_test3 (35, 4, 5); + if (x != 41) + abort (); + + x = ands_si_test3 (5, 2, 20); + if (x != 25) + abort (); + + y = ands_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != ((0x130000029ll & 0x320000004ll) + 0x320000004ll + 0x505050505ll)) + abort (); + + y = ands_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x5000500052025ll) + abort (); + + y = ands_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != ((0x130000029ll & 0xff) + 0x320000004ll + 0x505050505ll)) + abort (); + + y = ands_di_test2 (0x130002900ll, + 0x320000004ll, + 0x505050505ll); + if (y != (0x130002900ll + 0x505050505ll)) + abort (); + + y = ands_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != ((0x130000029ll & (0x064000008ll << 3)) + + 0x064000008ll + 0x505050505ll)) + abort (); + + y = ands_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != (0x130002900ll + 0x505050505ll)) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-release.x" -int -atomic_fetch_add_RELEASE (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE); -} - -int -atomic_fetch_sub_RELEASE (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE); -} - -int -atomic_fetch_and_RELEASE (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE); -} - -int -atomic_fetch_nand_RELEASE (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE); -} - -int -atomic_fetch_xor_RELEASE (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE); -} - -int -atomic_fetch_or_RELEASE (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE); -} - /* { dg-final { scan-assembler-times "ldxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vfmaxv.c @@ -0,0 +1,169 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -ffast-math" } */ + +#include + +extern void abort (void); + +extern float fabsf (float); +extern double fabs (double); +extern int isnan (double); +extern float fmaxf (float, float); +extern float fminf (float, float); +extern double fmax (double, double); +extern double fmin (double, double); + +#define NUM_TESTS 16 +#define DELTA 0.000001 +#define NAN (0.0 / 0.0) + +float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f, + NAN, -870.0f, 10.4f, 310.11f, + 0.0f, -865.0f, -2213.0f, -1.5f}; + +double input_float64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5, + NAN, -870.0, 10.4, 310.11, + 0.0, -865.0, -2213.0, -1.5}; + +#define EQUALF(a, b) (fabsf (a - b) < DELTA) +#define EQUALD(a, b) (fabs (a - b) < DELTA) + +/* Floating point 'unordered' variants. */ + +#undef TEST +#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \ +int \ +test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \ +{ \ + int i, j; \ + int moves = (NUM_TESTS - LANES) + 1; \ + TYPE##_t out_l[NUM_TESTS]; \ + TYPE##_t out_v[NUM_TESTS]; \ + \ + /* Calculate linearly. */ \ + for (i = 0; i < moves; i++) \ + { \ + out_l[i] = input_##TYPE[i]; \ + for (j = 0; j < LANES; j++) \ + { \ + if (isnan (out_l[i])) \ + continue; \ + if (isnan (input_##TYPE[i + j]) \ + || input_##TYPE[i + j] CMP_OP out_l[i]) \ + out_l[i] = input_##TYPE[i + j]; \ + } \ + } \ + \ + /* Calculate using vector reduction intrinsics. */ \ + for (i = 0; i < moves; i++) \ + { \ + TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \ + out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \ + } \ + \ + /* Compare. */ \ + for (i = 0; i < moves; i++) \ + { \ + if (!EQUAL##FLOAT (out_v[i], out_l[i]) \ + && !(isnan (out_v[i]) && isnan (out_l[i]))) \ + return 0; \ + } \ + return 1; \ +} + +#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \ +TEST (max, >, STYPE, , TYPE, W32, F) \ +TEST (max, >, STYPE, q, TYPE, W64, F) \ +TEST (min, <, STYPE, , TYPE, W32, F) \ +TEST (min, <, STYPE, q, TYPE, W64, F) + +BUILD_VARIANTS (float32, f32, 2, 4, F) +/* { dg-final { scan-assembler "fmaxp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fminp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fmaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +TEST (max, >, f64, q, float64, 2, D) +/* { dg-final { scan-assembler "fmaxp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */ +TEST (min, <, f64, q, float64, 2, D) +/* { dg-final { scan-assembler "fminp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */ + +/* Floating point 'nm' variants. */ + +#undef TEST +#define TEST(MAXMIN, F, SUFFIX, Q, TYPE, LANES, FLOAT) \ +int \ +test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t (void) \ +{ \ + int i, j; \ + int moves = (NUM_TESTS - LANES) + 1; \ + TYPE##_t out_l[NUM_TESTS]; \ + TYPE##_t out_v[NUM_TESTS]; \ + \ + /* Calculate linearly. */ \ + for (i = 0; i < moves; i++) \ + { \ + out_l[i] = input_##TYPE[i]; \ + for (j = 0; j < LANES; j++) \ + out_l[i] = f##MAXMIN##F (input_##TYPE[i + j], out_l[i]); \ + } \ + \ + /* Calculate using vector reduction intrinsics. */ \ + for (i = 0; i < moves; i++) \ + { \ + TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \ + out_v[i] = v##MAXMIN##nmv##Q##_##SUFFIX (t1); \ + } \ + \ + /* Compare. */ \ + for (i = 0; i < moves; i++) \ + { \ + if (!EQUAL##FLOAT (out_v[i], out_l[i])) \ + return 0; \ + } \ + return 1; \ +} + +TEST (max, f, f32, , float32, 2, D) +/* { dg-final { scan-assembler "fmaxnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +TEST (min, f, f32, , float32, 2, D) +/* { dg-final { scan-assembler "fminnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */ +TEST (max, f, f32, q, float32, 4, D) +/* { dg-final { scan-assembler "fmaxnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +TEST (min, f, f32, q, float32, 4, D) +/* { dg-final { scan-assembler "fminnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */ +TEST (max, , f64, q, float64, 2, D) +/* { dg-final { scan-assembler "fmaxnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */ +TEST (min, , f64, q, float64, 2, D) +/* { dg-final { scan-assembler "fminnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \ +{ \ + if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS (float32, f32, 2, 4, F) + TEST (max, >, f64, q, float64, 2, D) + TEST (min, <, f64, q, float64, 2, D) + +#undef TEST +#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \ +{ \ + if (!test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t ()) \ + abort (); \ +} + + BUILD_VARIANTS (float32, f32, 2, 4, F) + TEST (max, >, f64, q, float64, 2, D) + TEST (min, <, f64, q, float64, 2, D) + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-short.x @@ -0,0 +1,37 @@ +short v = 0; + +short +atomic_fetch_add_RELAXED (short a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_RELAXED); +} + +short +atomic_fetch_sub_RELAXED (short a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_RELAXED); +} + +short +atomic_fetch_and_RELAXED (short a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_RELAXED); +} + +short +atomic_fetch_nand_RELAXED (short a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_RELAXED); +} + +short +atomic_fetch_xor_RELAXED (short a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_RELAXED); +} + +short +atomic_fetch_or_RELAXED (short a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_RELAXED); +} --- a/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-vcvt.c @@ -0,0 +1,132 @@ +/* { dg-do run } */ +/* { dg-options "-O3 --save-temps -ffast-math" } */ + +#include + +extern void abort (void); +extern double fabs (double); + +#define NUM_TESTS 8 +#define DELTA 0.000001 + +float input_f32[] = {0.1f, -0.1f, 0.4f, 10.3f, + 200.0f, -800.0f, -13.0f, -0.5f}; +double input_f64[] = {0.1, -0.1, 0.4, 10.3, + 200.0, -800.0, -13.0, -0.5}; + +#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \ +int \ +test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t (void) \ +{ \ + int ret = 1; \ + int i = 0; \ + int nlanes = LANES; \ + U##int##WIDTH##_t expected_out[NUM_TESTS]; \ + U##int##WIDTH##_t actual_out[NUM_TESTS]; \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + { \ + expected_out[i] \ + = vcvt##SUFFIX##D##_##S##WIDTH##_f##WIDTH (input_f##WIDTH[i]); \ + /* Don't vectorize this. */ \ + asm volatile ("" : : : "memory"); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i+=nlanes) \ + { \ + U##int##WIDTH##x##LANES##_t out = \ + vcvt##SUFFIX##Q##_##S##WIDTH##_f##WIDTH \ + (vld1##Q##_f##WIDTH (input_f##WIDTH + i)); \ + vst1##Q##_##S##WIDTH (actual_out + i, out); \ + } \ + \ + for (i = 0; i < NUM_TESTS; i++) \ + ret &= fabs (expected_out[i] - actual_out[i]) < DELTA; \ + \ + return ret; \ +} \ + + +#define BUILD_VARIANTS(SUFFIX) \ +TEST (SUFFIX, , 32, 2, s, ,s) \ +TEST (SUFFIX, q, 32, 4, s, ,s) \ +TEST (SUFFIX, q, 64, 2, s, ,d) \ +TEST (SUFFIX, , 32, 2, u,u,s) \ +TEST (SUFFIX, q, 32, 4, u,u,s) \ +TEST (SUFFIX, q, 64, 2, u,u,d) \ + +BUILD_VARIANTS ( ) +/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzs\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtzs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "fcvtzu\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzu\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtzu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (a) +/* { dg-final { scan-assembler "fcvtas\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtas\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtas\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "fcvtau\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtau\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtau\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (m) +/* { dg-final { scan-assembler "fcvtms\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtms\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtms\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "fcvtmu\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtmu\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtmu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (n) +/* { dg-final { scan-assembler "fcvtns\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtns\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtns\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "fcvtnu\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtnu\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtnu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +BUILD_VARIANTS (p) +/* { dg-final { scan-assembler "fcvtps\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtps\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtps\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ +/* { dg-final { scan-assembler "fcvtpu\\tw\[0-9\]+, s\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtpu\\tx\[0-9\]+, d\[0-9\]+" } } */ +/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */ +/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */ +/* { dg-final { scan-assembler "fcvtpu\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ + +#undef TEST +#define TEST(SUFFIX, Q, WIDTH, LANES, S, U, D) \ +{ \ + if (!test_vcvt##SUFFIX##_##S##WIDTH##_f##WIDTH##x##LANES##_t ()) \ + abort (); \ +} + +int +main (int argc, char **argv) +{ + BUILD_VARIANTS ( ) + BUILD_VARIANTS (a) + BUILD_VARIANTS (m) + BUILD_VARIANTS (n) + BUILD_VARIANTS (p) + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-release.x @@ -0,0 +1,37 @@ +int v = 0; + +int +atomic_fetch_add_RELEASE (int a) +{ + return __atomic_fetch_add (&v, a, __ATOMIC_RELEASE); +} + +int +atomic_fetch_sub_RELEASE (int a) +{ + return __atomic_fetch_sub (&v, a, __ATOMIC_RELEASE); +} + +int +atomic_fetch_and_RELEASE (int a) +{ + return __atomic_fetch_and (&v, a, __ATOMIC_RELEASE); +} + +int +atomic_fetch_nand_RELEASE (int a) +{ + return __atomic_fetch_nand (&v, a, __ATOMIC_RELEASE); +} + +int +atomic_fetch_xor_RELEASE (int a) +{ + return __atomic_fetch_xor (&v, a, __ATOMIC_RELEASE); +} + +int +atomic_fetch_or_RELEASE (int a) +{ + return __atomic_fetch_or (&v, a, __ATOMIC_RELEASE); +} --- a/src/gcc/testsuite/gcc.target/aarch64/fabd.c +++ b/src/gcc/testsuite/gcc.target/aarch64/fabd.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fno-inline --save-temps" } */ + +extern double fabs (double); +extern float fabsf (float); +extern void abort (); +extern void exit (int); + +void +fabd_d (double x, double y, double d) +{ + if ((fabs (x - y) - d) > 0.00001) + abort (); +} + +/* { dg-final { scan-assembler "fabd\td\[0-9\]+" } } */ + +void +fabd_f (float x, float y, float d) +{ + if ((fabsf (x - y) - d) > 0.00001) + abort (); +} + +/* { dg-final { scan-assembler "fabd\ts\[0-9\]+" } } */ + +int +main () +{ + fabd_d (10.0, 5.0, 5.0); + fabd_d (5.0, 10.0, 5.0); + fabd_f (10.0, 5.0, 5.0); + fabd_f (5.0, 10.0, 5.0); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c @@ -117,6 +117,16 @@ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 }; + F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f }; + + F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0 }; + /* Setup input vectors. */ for (i=1; i<=16; i++) { @@ -132,6 +142,7 @@ TEST (div, 3); TEST (neg, 2); TEST (abs, 2); + TEST (fabd, 3); return 0; } --- a/src/gcc/testsuite/gcc.target/aarch64/ngc.c +++ b/src/gcc/testsuite/gcc.target/aarch64/ngc.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); +typedef unsigned int u32; + +u32 +ngc_si (u32 a, u32 b, u32 c, u32 d) +{ + a = -b - (c < d); + return a; +} + +typedef unsigned long long u64; + +u64 +ngc_si_tst (u64 a, u32 b, u32 c, u32 d) +{ + a = -b - (c < d); + return a; +} + +u64 +ngc_di (u64 a, u64 b, u64 c, u64 d) +{ + a = -b - (c < d); + return a; +} + +int +main () +{ + int x; + u64 y; + + x = ngc_si (29, 4, 5, 4); + if (x != -4) + abort (); + + x = ngc_si (1024, 2, 20, 13); + if (x != -2) + abort (); + + y = ngc_si_tst (0x130000029ll, 32, 50, 12); + if (y != 0xffffffe0) + abort (); + + y = ngc_si_tst (0x5000500050005ll, 21, 2, 14); + if (y != 0xffffffea) + abort (); + + y = ngc_di (0x130000029ll, 0x320000004ll, 0x505050505ll, 0x123123123ll); + if (y != 0xfffffffcdffffffc) + abort (); + + y = ngc_di (0x5000500050005ll, + 0x2111211121112ll, 0x0000000002020ll, 0x1414575046477ll); + if (y != 0xfffdeeedeeedeeed) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "ngc\tw\[0-9\]+, w\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "ngc\tx\[0-9\]+, x\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/sha1_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/sha1_1.c @@ -0,0 +1,55 @@ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a+crypto" } */ + +#include "arm_neon.h" + +uint32x4_t +test_vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return vsha1cq_u32 (hash_abcd, hash_e, wk); +} + +/* { dg-final { scan-assembler-times "sha1c\\tq" 1 } } */ + +uint32x4_t +test_vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return vsha1mq_u32 (hash_abcd, hash_e, wk); +} + +/* { dg-final { scan-assembler-times "sha1m\\tq" 1 } } */ + +uint32x4_t +test_vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return vsha1pq_u32 (hash_abcd, hash_e, wk); +} + +/* { dg-final { scan-assembler-times "sha1p\\tq" 1 } } */ + +uint32_t +test_vsha1h_u32 (uint32_t hash_e) +{ + return vsha1h_u32 (hash_e); +} + +/* { dg-final { scan-assembler-times "sha1h\\ts" 1 } } */ + +uint32x4_t +test_vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) +{ + return vsha1su0q_u32 (w0_3, w4_7, w8_11); +} + +/* { dg-final { scan-assembler-times "sha1su0\\tv" 1 } } */ + +uint32x4_t +test_vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) +{ + return vsha1su1q_u32 (tw0_3, w12_15); +} + +/* { dg-final { scan-assembler-times "sha1su1\\tv" 1 } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/cmp.c +++ b/src/gcc/testsuite/gcc.target/aarch64/cmp.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int +cmp_si_test1 (int a, int b, int c) +{ + if (a > b) + return a + c; + else + return a + b + c; +} + +int +cmp_si_test2 (int a, int b, int c) +{ + if ((a >> 3) > b) + return a + c; + else + return a + b + c; +} + +typedef long long s64; + +s64 +cmp_di_test1 (s64 a, s64 b, s64 c) +{ + if (a > b) + return a + c; + else + return a + b + c; +} + +s64 +cmp_di_test2 (s64 a, s64 b, s64 c) +{ + if ((a >> 3) > b) + return a + c; + else + return a + b + c; +} + +int +cmp_di_test3 (int a, s64 b, s64 c) +{ + if (a > b) + return a + c; + else + return a + b + c; +} + +int +cmp_di_test4 (int a, s64 b, s64 c) +{ + if (((s64)a << 3) > b) + return a + c; + else + return a + b + c; +} + +/* { dg-final { scan-assembler-times "cmp\tw\[0-9\]+, w\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "cmp\tx\[0-9\]+, x\[0-9\]+" 4 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE float +#define ITYPE int #define OP >= #define INV_OP < #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/bfxil_2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/bfxil_2.c @@ -0,0 +1,42 @@ +/* { dg-do run { target aarch64*-*-* } } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ +/* { dg-require-effective-target aarch64_big_endian } */ + +extern void abort (void); + +typedef struct bitfield +{ + unsigned short eight1: 8; + unsigned short four: 4; + unsigned short eight2: 8; + unsigned short seven: 7; + unsigned int sixteen: 16; + unsigned short eight3: 8; + unsigned short eight4: 8; +} bitfield; + +bitfield +bfxil (bitfield a) +{ + /* { dg-final { scan-assembler "bfxil\tx\[0-9\]+, x\[0-9\]+, 40, 8" } } */ + a.eight4 = a.eight2; + return a; +} + +int +main (void) +{ + static bitfield a; + bitfield b; + + a.eight4 = 9; + a.eight2 = 57; + b = bfxil (a); + + if (b.eight4 != a.eight2) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.x @@ -7,6 +7,16 @@ extern float fabsf (float); extern double fabs (double); +#define DEF3a(fname, type, op) \ + void fname##_##type (pR##type a, \ + pR##type b, \ + pR##type c) \ + { \ + int i; \ + for (i = 0; i < 16; i++) \ + a[i] = op (b[i] - c[i]); \ + } + #define DEF3(fname, type, op) \ void fname##_##type (pR##type a, \ pR##type b, \ @@ -13,7 +23,7 @@ pR##type c) \ { \ int i; \ - for (i=0; i<16; i++) \ + for (i = 0; i < 16; i++) \ a[i] = b[i] op c[i]; \ } @@ -22,11 +32,15 @@ pR##type b) \ { \ int i; \ - for (i=0; i<16; i++) \ + for (i = 0; i < 16; i++) \ a[i] = op(b[i]); \ } +#define DEFN3a(fname, op) \ + DEF3a (fname, F32, op) \ + DEF3a (fname, F64, op) + #define DEFN3(fname, op) \ DEF3 (fname, F32, op) \ DEF3 (fname, F64, op) @@ -42,3 +56,5 @@ DEFN2 (neg, -) DEF2 (abs, F32, fabsf) DEF2 (abs, F64, fabs) +DEF3a (fabd, F32, fabsf) +DEF3a (fabd, F64, fabs) --- a/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c +++ b/src/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c @@ -1,43 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2" } */ -int v = 0; +#include "atomic-op-acq_rel.x" -int -atomic_fetch_add_ACQ_REL (int a) -{ - return __atomic_fetch_add (&v, a, __ATOMIC_ACQ_REL); -} - -int -atomic_fetch_sub_ACQ_REL (int a) -{ - return __atomic_fetch_sub (&v, a, __ATOMIC_ACQ_REL); -} - -int -atomic_fetch_and_ACQ_REL (int a) -{ - return __atomic_fetch_and (&v, a, __ATOMIC_ACQ_REL); -} - -int -atomic_fetch_nand_ACQ_REL (int a) -{ - return __atomic_fetch_nand (&v, a, __ATOMIC_ACQ_REL); -} - -int -atomic_fetch_xor_ACQ_REL (int a) -{ - return __atomic_fetch_xor (&v, a, __ATOMIC_ACQ_REL); -} - -int -atomic_fetch_or_ACQ_REL (int a) -{ - return __atomic_fetch_or (&v, a, __ATOMIC_ACQ_REL); -} - /* { dg-final { scan-assembler-times "ldaxr\tw\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ /* { dg-final { scan-assembler-times "stlxr\tw\[0-9\]+, w\[0-9\]+, \\\[x\[0-9\]+\\\]" 6 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/subs1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/subs1.c @@ -0,0 +1,149 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +subs_si_test1 (int a, int b, int c) +{ + int d = a - c; + + /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +subs_si_test2 (int a, int b, int c) +{ + int d = a - 0xff; + + /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, #255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int +subs_si_test3 (int a, int b, int c) +{ + int d = a - (b << 3); + + /* { dg-final { scan-assembler "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +subs_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a - c; + + /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +subs_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a - 0xff; + + /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, #255" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +s64 +subs_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a - (b << 3); + + /* { dg-final { scan-assembler "subs\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d == 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = subs_si_test1 (29, 4, 5); + if (x != 33) + abort (); + + x = subs_si_test1 (5, 2, 20); + if (x != 7) + abort (); + + x = subs_si_test2 (29, 4, 5); + if (x != -217) + abort (); + + x = subs_si_test2 (1024, 2, 20); + if (x != 791) + abort (); + + x = subs_si_test3 (35, 4, 5); + if (x != 12) + abort (); + + x = subs_si_test3 (5, 2, 20); + if (x != 11) + abort (); + + y = subs_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != 0x45000002d) + abort (); + + y = subs_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x7111711171117) + abort (); + + y = subs_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x955050433) + abort (); + + y = subs_di_test2 (0x130002900ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x955052d0a) + abort (); + + y = subs_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != 0x3790504f6) + abort (); + + y = subs_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != 0x27d052dcd) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/adds2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/adds2.c @@ -0,0 +1,155 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +extern void abort (void); + +int +adds_si_test1 (int a, int b, int c) +{ + int d = a + b; + + /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +adds_si_test2 (int a, int b, int c) +{ + int d = a + 0xfff; + + /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, 4095" } } */ + /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, 4095" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int +adds_si_test3 (int a, int b, int c) +{ + int d = a + (b << 3); + + /* { dg-final { scan-assembler-not "adds\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "add\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +typedef long long s64; + +s64 +adds_di_test1 (s64 a, s64 b, s64 c) +{ + s64 d = a + b; + + /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +adds_di_test2 (s64 a, s64 b, s64 c) +{ + s64 d = a + 0x1000ll; + + /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, 4096" } } */ + /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, 4096" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +s64 +adds_di_test3 (s64 a, s64 b, s64 c) +{ + s64 d = a + (b << 3); + + /* { dg-final { scan-assembler-not "adds\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + /* { dg-final { scan-assembler "add\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 3" } } */ + if (d <= 0) + return a + c; + else + return b + d + c; +} + +int main () +{ + int x; + s64 y; + + x = adds_si_test1 (29, 4, 5); + if (x != 42) + abort (); + + x = adds_si_test1 (5, 2, 20); + if (x != 29) + abort (); + + x = adds_si_test2 (29, 4, 5); + if (x != 4133) + abort (); + + x = adds_si_test2 (1024, 2, 20); + if (x != 5141) + abort (); + + x = adds_si_test3 (35, 4, 5); + if (x != 76) + abort (); + + x = adds_si_test3 (5, 2, 20); + if (x != 43) + abort (); + + y = adds_di_test1 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + + if (y != 0xc75050536) + abort (); + + y = adds_di_test1 (0x5000500050005ll, + 0x2111211121112ll, + 0x0000000002020ll); + if (y != 0x9222922294249) + abort (); + + y = adds_di_test2 (0x130000029ll, + 0x320000004ll, + 0x505050505ll); + if (y != 0x955051532) + abort (); + + y = adds_di_test2 (0x540004100ll, + 0x320000004ll, + 0x805050205ll); + if (y != 0x1065055309) + abort (); + + y = adds_di_test3 (0x130000029ll, + 0x064000008ll, + 0x505050505ll); + if (y != 0x9b9050576) + abort (); + + y = adds_di_test3 (0x130002900ll, + 0x088000008ll, + 0x505050505ll); + if (y != 0xafd052e4d) + abort (); + + return 0; +} + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c @@ -2,12 +2,13 @@ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ #define FTYPE double +#define ITYPE long #define OP > #define INV_OP <= #include "vect-fcm.x" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 8 "vect" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ --- a/src/gcc/testsuite/lib/target-supports.exp +++ b/src/gcc/testsuite/lib/target-supports.exp @@ -487,13 +487,6 @@ return 0 } - # We don't yet support profiling for AArch64. - if { [istarget aarch64*-*-*] - && ([lindex $test_what 1] == "-p" - || [lindex $test_what 1] == "-pg") } { - return 0 - } - # cygwin does not support -p. if { [istarget *-*-cygwin*] && $test_what == "-p" } { return 0 @@ -2012,6 +2005,7 @@ || ([istarget powerpc*-*-*] && ![istarget powerpc-*-linux*paired*]) || [istarget x86_64-*-*] + || [istarget aarch64*-*-*] || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])} { set et_vect_uintfloat_cvt_saved 1 @@ -2078,6 +2072,15 @@ }] } +# Return 1 if this is a AArch64 target supporting little endian +proc check_effective_target_aarch64_little_endian { } { + return [check_no_compiler_messages aarch64_little_endian assembly { + #if !defined(__aarch64__) || defined(__AARCH64EB__) + #error FOO + #endif + }] +} + # Return 1 is this is an arm target using 32-bit instructions proc check_effective_target_arm32 { } { return [check_no_compiler_messages arm32 assembly { @@ -2147,22 +2150,6 @@ } } -# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8 -# -mfloat-abi=softfp -proc check_effective_target_arm_v8_neon_ok {} { - if { [check_effective_target_arm32] } { - return [check_no_compiler_messages arm_v8_neon_ok object { - int foo (void) - { - __asm__ volatile ("vrintn.f32 q0, q0"); - return 0; - } - } "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"] - } else { - return 0 - } -} - # Return 1 if this is an ARM target supporting -mfpu=vfp # -mfloat-abi=hard. Some multilibs may be incompatible with these # options. @@ -2202,6 +2189,49 @@ }] } +# Return 1 if this is an ARM target supporting -mfpu=crypto-neon-fp-armv8 +# -mfloat-abi=softfp or equivalent options. Some multilibs may be +# incompatible with these options. Also set et_arm_crypto_flags to the +# best options to add. + +proc check_effective_target_arm_crypto_ok_nocache { } { + global et_arm_crypto_flags + set et_arm_crypto_flags "" + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=crypto-neon-fp-armv8" "-mfpu=crypto-neon-fp-armv8 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache arm_crypto_ok object { + #include "arm_neon.h" + uint8x16_t + foo (uint8x16_t a, uint8x16_t b) + { + return vaeseq_u8 (a, b); + } + } "$flags"] } { + set et_arm_crypto_flags $flags + return 1 + } + } + } + + return 0 +} + +# Return 1 if this is an ARM target supporting -mfpu=crypto-neon-fp-armv8 + +proc check_effective_target_arm_crypto_ok { } { + return [check_cached_effective_target arm_crypto_ok \ + check_effective_target_arm_crypto_ok_nocache] +} + +# Add options for crypto extensions. +proc add_options_for_arm_crypto { flags } { + if { ! [check_effective_target_arm_crypto_ok] } { + return "$flags" + } + global et_arm_crypto_flags + return "$flags $et_arm_crypto_flags" +} + # Add the options needed for NEON. We need either -mfloat-abi=softfp # or -mfloat-abi=hard, but if one is already specified by the # multilib, use it. Similarly, if a -mfpu option already enables @@ -2226,9 +2256,18 @@ if { ! [check_effective_target_arm_v8_neon_ok] } { return "$flags" } - return "$flags -march=armv8-a -mfpu=neon-fp-armv8 -mfloat-abi=softfp" + global et_arm_v8_neon_flags + return "$flags $et_arm_v8_neon_flags -march=armv8-a" } +proc add_options_for_arm_crc { flags } { + if { ! [check_effective_target_arm_crc_ok] } { + return "$flags" + } + global et_arm_crc_flags + return "$flags $et_arm_crc_flags" +} + # Add the options needed for NEON. We need either -mfloat-abi=softfp # or -mfloat-abi=hard, but if one is already specified by the # multilib, use it. Similarly, if a -mfpu option already enables @@ -2270,6 +2309,94 @@ check_effective_target_arm_neon_ok_nocache] } +proc check_effective_target_arm_crc_ok_nocache { } { + global et_arm_crc_flags + set et_arm_crc_flags "-march=armv8-a+crc" + return [check_no_compiler_messages_nocache arm_crc_ok object { + #if !defined (__ARM_FEATURE_CRC32) + #error FOO + #endif + } "$et_arm_crc_flags"] +} + +proc check_effective_target_arm_crc_ok { } { + return [check_cached_effective_target arm_crc_ok \ + check_effective_target_arm_crc_ok_nocache] +} + +# Return 1 if this is an ARM target supporting -mfpu=neon-fp16 +# -mfloat-abi=softfp or equivalent options. Some multilibs may be +# incompatible with these options. Also set et_arm_neon_flags to the +# best options to add. + +proc check_effective_target_arm_neon_fp16_ok_nocache { } { + global et_arm_neon_fp16_flags + set et_arm_neon_fp16_flags "" + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" + "-mfpu=neon-fp16 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object { + #include "arm_neon.h" + float16x4_t + foo (float32x4_t arg) + { + return vcvt_f16_f32 (arg); + } + } "$flags"] } { + set et_arm_neon_fp16_flags $flags + return 1 + } + } + } + + return 0 +} + +proc check_effective_target_arm_neon_fp16_ok { } { + return [check_cached_effective_target arm_neon_fp16_ok \ + check_effective_target_arm_neon_fp16_ok_nocache] +} + +proc add_options_for_arm_neon_fp16 { flags } { + if { ! [check_effective_target_arm_neon_fp16_ok] } { + return "$flags" + } + global et_arm_neon_fp16_flags + return "$flags $et_arm_neon_fp16_flags" +} + +# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8 +# -mfloat-abi=softfp or equivalent options. Some multilibs may be +# incompatible with these options. Also set et_arm_v8_neon_flags to the +# best options to add. + +proc check_effective_target_arm_v8_neon_ok_nocache { } { + global et_arm_v8_neon_flags + set et_arm_v8_neon_flags "" + if { [check_effective_target_arm32] } { + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp-armv8" "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} { + if { [check_no_compiler_messages_nocache arm_v8_neon_ok object { + #include "arm_neon.h" + void + foo () + { + __asm__ volatile ("vrintn.f32 q0, q0"); + } + } "$flags"] } { + set et_arm_v8_neon_flags $flags + return 1 + } + } + } + + return 0 +} + +proc check_effective_target_arm_v8_neon_ok { } { + return [check_cached_effective_target arm_v8_neon_ok \ + check_effective_target_arm_v8_neon_ok_nocache] +} + # Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4 # -mfloat-abi=softfp or equivalent options. Some multilibs may be # incompatible with these options. Also set et_arm_neonv2_flags to the @@ -2332,6 +2459,11 @@ # Must generate floating-point instructions. return 0 } + if [check_effective_target_arm_hf_eabi] { + # Use existing float-abi and force an fpu which supports fp16 + set et_arm_fp16_flags "-mfpu=vfpv4" + return 1; + } if [check-flags [list "" { *-*-* } { "-mfpu=*" } { "" } ]] { # The existing -mfpu value is OK; use it, but add softfp. set et_arm_fp16_flags "-mfloat-abi=softfp" @@ -2464,6 +2596,17 @@ } ""] } +# Return 1 if this is an ARM target where conditional execution is available. + +proc check_effective_target_arm_cond_exec { } { + return [check_no_compiler_messages arm_cond_exec assembly { + #if defined(__arm__) && defined(__thumb__) && !defined(__thumb2__) + #error FOO + #endif + int i; + } ""] +} + # Return 1 if this is an ARM cortex-M profile cpu proc check_effective_target_arm_cortex_m { } { @@ -2509,6 +2652,24 @@ } [add_options_for_arm_neonv2 ""]] } +# Return 1 if the target supports executing ARMv8 NEON instructions, 0 +# otherwise. + +proc check_effective_target_arm_v8_neon_hw { } { + return [check_runtime arm_v8_neon_hw_available { + #include "arm_neon.h" + int + main (void) + { + float32x2_t a; + asm ("vrinta.f32 %P0, %P1" + : "=w" (a) + : "0" (a)); + return 0; + } + } [add_options_for_arm_v8_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { @@ -4591,6 +4752,33 @@ return 0 } +# Return 1 if programs are intended to be run on hardware rather than +# on a simulator + +proc check_effective_target_hw { } { + + # All "src/sim" simulators set this one. + if [board_info target exists is_simulator] { + if [board_info target is_simulator] { + return 0 + } else { + return 1 + } + } + + # The "sid" simulators don't set that one, but at least they set + # this one. + if [board_info target exists slow_simulator] { + if [board_info target slow_simulator] { + return 0 + } else { + return 1 + } + } + + return 1 +} + # Return 1 if the target is a VxWorks kernel. proc check_effective_target_vxworks_kernel { } { --- a/src/gcc/testsuite/ChangeLog.linaro +++ b/src/gcc/testsuite/ChangeLog.linaro @@ -0,0 +1,978 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-02-10 Michael Collison + + Backport from trunk r206519 + 2014-01-10 Kyrylo Tkachov + + * lib/target-supports.exp + (check_effective_target_arm_crypto_ok_nocache): New. + (check_effective_target_arm_crypto_ok): Use above procedure. + (add_options_for_arm_crypto): Use et_arm_crypto_flags. + +2014-02-10 Michael Collison + + Backport from trunk r206151 + 2013-12-20 Kyrylo Tkachov + + * gcc.target/arm/neon-vceq_p64.c: New test. + * gcc.target/arm/neon-vtst_p64.c: Likewise. + +2014-02-10 Michael Collison + + Backport from trunk r206131 + 2013-12-04 Kyrylo Tkachov + + * lib/target-supports.exp (check_effective_target_arm_crypto_ok): + New procedure. + (add_options_for_arm_crypto): Likewise. + * gcc.target/arm/crypto-vaesdq_u8.c: New test. + * gcc.target/arm/crypto-vaeseq_u8.c: Likewise. + * gcc.target/arm/crypto-vaesimcq_u8.c: Likewise. + * gcc.target/arm/crypto-vaesmcq_u8.c: Likewise. + * gcc.target/arm/crypto-vldrq_p128.c: Likewise. + * gcc.target/arm/crypto-vmull_high_p64.c: Likewise. + * gcc.target/arm/crypto-vmullp64.c: Likewise. + * gcc.target/arm/crypto-vsha1cq_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1h_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1mq_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1pq_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1su0q_u32.c: Likewise. + * gcc.target/arm/crypto-vsha1su1q_u32.c: Likewise. + * gcc.target/arm/crypto-vsha256h2q_u32.c: Likewise. + * gcc.target/arm/crypto-vsha256hq_u32.c: Likewise. + * gcc.target/arm/crypto-vsha256su0q_u32.c: Likewise. + * gcc.target/arm/crypto-vsha256su1q_u32.c: Likewise. + * gcc.target/arm/crypto-vstrq_p128.c: Likewise. + * gcc.target/arm/neon/vbslQp64: Generate. + * gcc.target/arm/neon/vbslp64: Likewise. + * gcc.target/arm/neon/vcombinep64: Likewise. + * gcc.target/arm/neon/vcreatep64: Likewise. + * gcc.target/arm/neon/vdupQ_lanep64: Likewise. + * gcc.target/arm/neon/vdupQ_np64: Likewise. + * gcc.target/arm/neon/vdup_lanep64: Likewise. + * gcc.target/arm/neon/vdup_np64: Likewise. + * gcc.target/arm/neon/vextQp64: Likewise. + * gcc.target/arm/neon/vextp64: Likewise. + * gcc.target/arm/neon/vget_highp64: Likewise. + * gcc.target/arm/neon/vget_lowp64: Likewise. + * gcc.target/arm/neon/vld1Q_dupp64: Likewise. + * gcc.target/arm/neon/vld1Q_lanep64: Likewise. + * gcc.target/arm/neon/vld1Qp64: Likewise. + * gcc.target/arm/neon/vld1_dupp64: Likewise. + * gcc.target/arm/neon/vld1_lanep64: Likewise. + * gcc.target/arm/neon/vld1p64: Likewise. + * gcc.target/arm/neon/vld2_dupp64: Likewise. + * gcc.target/arm/neon/vld2p64: Likewise. + * gcc.target/arm/neon/vld3_dupp64: Likewise. + * gcc.target/arm/neon/vld3p64: Likewise. + * gcc.target/arm/neon/vld4_dupp64: Likewise. + * gcc.target/arm/neon/vld4p64: Likewise. + * gcc.target/arm/neon/vreinterpretQf32_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQf32_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_f32: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_p16: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_p8: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_s16: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_s32: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_s64: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_s8: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_u16: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_u32: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_u64: Likewise. + * gcc.target/arm/neon/vreinterpretQp128_u8: Likewise. + * gcc.target/arm/neon/vreinterpretQp16_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQp16_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_f32: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_p16: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_p8: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_s16: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_s32: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_s64: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_s8: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_u16: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_u32: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_u64: Likewise. + * gcc.target/arm/neon/vreinterpretQp64_u8: Likewise. + * gcc.target/arm/neon/vreinterpretQp8_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQp8_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQs16_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQs16_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQs32_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQs32_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQs64_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQs64_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQs8_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQs8_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQu16_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQu16_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQu32_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQu32_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQu64_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQu64_p64: Likewise. + * gcc.target/arm/neon/vreinterpretQu8_p128: Likewise. + * gcc.target/arm/neon/vreinterpretQu8_p64: Likewise. + * gcc.target/arm/neon/vreinterpretf32_p64: Likewise. + * gcc.target/arm/neon/vreinterpretp16_p64: Likewise. + * gcc.target/arm/neon/vreinterpretp64_f32: Likewise. + * gcc.target/arm/neon/vreinterpretp64_p16: Likewise. + * gcc.target/arm/neon/vreinterpretp64_p8: Likewise. + * gcc.target/arm/neon/vreinterpretp64_s16: Likewise. + * gcc.target/arm/neon/vreinterpretp64_s32: Likewise. + * gcc.target/arm/neon/vreinterpretp64_s64: Likewise. + * gcc.target/arm/neon/vreinterpretp64_s8: Likewise. + * gcc.target/arm/neon/vreinterpretp64_u16: Likewise. + * gcc.target/arm/neon/vreinterpretp64_u32: Likewise. + * gcc.target/arm/neon/vreinterpretp64_u64: Likewise. + * gcc.target/arm/neon/vreinterpretp64_u8: Likewise. + * gcc.target/arm/neon/vreinterpretp8_p64: Likewise. + * gcc.target/arm/neon/vreinterprets16_p64: Likewise. + * gcc.target/arm/neon/vreinterprets32_p64: Likewise. + * gcc.target/arm/neon/vreinterprets64_p64: Likewise. + * gcc.target/arm/neon/vreinterprets8_p64: Likewise. + * gcc.target/arm/neon/vreinterpretu16_p64: Likewise. + * gcc.target/arm/neon/vreinterpretu32_p64: Likewise. + * gcc.target/arm/neon/vreinterpretu64_p64: Likewise. + * gcc.target/arm/neon/vreinterpretu8_p64: Likewise. + * gcc.target/arm/neon/vsliQ_np64: Likewise. + * gcc.target/arm/neon/vsli_np64: Likewise. + * gcc.target/arm/neon/vsriQ_np64: Likewise. + * gcc.target/arm/neon/vsri_np64: Likewise. + * gcc.target/arm/neon/vst1Q_lanep64: Likewise. + * gcc.target/arm/neon/vst1Qp64: Likewise. + * gcc.target/arm/neon/vst1_lanep64: Likewise. + * gcc.target/arm/neon/vst1p64: Likewise. + * gcc.target/arm/neon/vst2p64: Likewise. + * gcc.target/arm/neon/vst3p64: Likewise. + * gcc.target/arm/neon/vst4p64: Likewise. + +2014-02-10 Michael Collison + + Backport from trunk r206128 + 2013-12-19 Kyrylo Tkachov + + * lib/target-supports.exp (add_options_for_arm_crc): New procedure. + (check_effective_target_arm_crc_ok_nocache): Likewise. + (check_effective_target_arm_crc_ok): Likewise. + * gcc.target/arm/acle/: New directory. + * gcc.target/arm/acle/acle.exp: New. + * gcc.target/arm/acle/crc32b.c: New test. + * gcc.target/arm/acle/crc32h.c: Likewise. + * gcc.target/arm/acle/crc32w.c: Likewise. + * gcc.target/arm/acle/crc32d.c: Likewise. + * gcc.target/arm/acle/crc32cb.c: Likewise. + * gcc.target/arm/acle/crc32ch.c: Likewise. + * gcc.target/arm/acle/crc32cw.c: Likewise. + * gcc.target/arm/acle/crc32cd.c: Likewise. + +2014-02-10 Michael Collison + + Backport from trunk r206120 + 2013-12-19 Tejas Belagod + + * gcc.target/aarch64/pmull_1.c: New. + +2014-02-10 Michael Collison + + Backport from trunk r206119 + 2013-12-19 Tejas Belagod + + * gcc.target/aarch64/sha256_1.c: New. + +2014-02-10 Michael Collison + + Backport from trunk r206118 + 2013-12-19 Tejas Belagod + + * gcc.target/aarch64/sha1_1.c: New. + +2014-02-10 Michael Collison + + Backport from trunk r206117 + 2013-12-19 Tejas Belagod + + * gcc.target/aarch64/aes_1.c: New. + +2014-02-01 Christophe Lyon + + Backport from trunk r203057. + 2013-10-01 Kyrylo Tkachov + + PR tree-optimization/58556 + * gcc.dg/tree-ssa/gen-vect-26.c: Use dynamic vector cost model. + * gcc.dg/tree-ssa/gen-vect-28.c: Likewise. + +2014-01-21 Zhenqiang Chen + + Backport from trunk r205509 and r200103 + 2013-11-29 Zhenqiang Chen + + * gcc.target/arm/lp1243022.c: Skip target arm-neon. + + Backport mainline r200103 + 2013-06-15 Jeff Law + + * gcc.dg/tree-ssa/coalesce-1.c: New test. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-12-06 Michael Collison + + Backport from trunk r202872. + 2013-09-24 Kyrylo Tkachov + + * lib/target-supports.exp (check_effective_target_arm_cond_exec): + New Procedure + * gcc.target/arm/minmax_minus.c: Check for cond_exec target. + +2013-12-06 Christophe Lyon + + Backport from trunk r203327. + 2013-10-09 Zhenqiang Chen + + * gcc.dg/tree-ssa/phi-opt-11.c: New test. + +2013-12-06 Charles Baylis + + Backport from trunk r203799. + 2013-10-17 Charles Bayis + + * gcc.dg/builtin-apply2.c: Skip test on arm hardfloat ABI + targets. + * gcc.dg/tls/pr42894.c: Remove dg-options for arm*-*-* targets. + * gcc.target/arm/thumb-ltu.c: Remove dg-skip-if and require + effective target arm_thumb1_ok. + * lib/target-supports.exp + (check_effective_target_arm_fp16_ok_nocache): Don't force + -mfloat-abi=soft when building for hardfloat target. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-11-06 Christophe Lyon + + Revert backport from trunk r197526. + 2013-04-05 Greta Yorsh + + * gcc.target/arm/negdi-1.c: New test. + * gcc.target/arm/negdi-2.c: Likewise. + * gcc.target/arm/negdi-3.c: Likewise. + * gcc.target/arm/negdi-4.c: Likewise. + +2013-11-05 Zhenqiang Chen + + Backport from trunk r204247. + 2013-10-31 Zhenqiang Chen + + * gcc.target/arm/lp1243022.c: New test. + +2013-11-04 Kugan Vivekanandarajah + + Backport from trunk r204336 + 2013-11-03 Kugan Vivekanandarajah + + * gcc.target/arm/neon-vcond-gt.c: Scan for vbsl or vbit or vbif. + * gcc.target/arm/neon-vcond-ltgt.c: Scan for vbsl or vbit or vbif. + * gcc.target/arm/neon-vcond-unordered.c: Scan for vbsl or vbit or + vbif. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-10-09 Christophe Lyon + + Backport from trunk r198526,200595,200597. + 2013-05-02 Ian Bolton + + * gcc.target/aarch64/bics_1.c: New test. + * gcc.target/aarch64/bics_2.c: Likewise. + + 2013-07-02 Ian Bolton + + * gcc.target/aarch64/bfxil_1.c: New test. + * gcc.target/aarch64/bfxil_2.c: Likewise. + + 2013-07-02 Ian Bolton + + * gcc.target/config/aarch64/insv_1.c: Update to show it doesn't work + on big endian. + * gcc.target/config/aarch64/insv_2.c: New test for big endian. + * lib/target-supports.exp: Define aarch64_little_endian. + +2013-10-03 Christophe Lyon + + Backport from trunk r202400. + 2013-09-09 Kyrylo Tkachov + + * gcc.target/aarch64/cmn-neg.c: New test. + +2013-10-03 Christophe Lyon + + Backport from trunk r202164. + 2013-09-02 Bin Cheng + + * gcc.target/arm/ivopts-orig_biv-inc.c: New testcase. + +2013-10-01 Kugan Vivekanandarajah + + Backport from trunk r203059,203116. + 2013-10-01 Kugan Vivekanandarajah + + PR Target/58578 + * gcc.target/arm/pr58578.c: New test. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-09-06 Venkataramanan Kumar + + Backport from trunk r201411. + 2013-08-01 Kyrylo Tkachov + + * gcc.target/arm/pr46972-2.c: New test. + +2013-09-05 Yvan Roux + + Backport from trunk r201267. + 2013-07-26 Kyrylo Tkachov + + * gcc.target/arm/minmax_minus.c: Scan for absence of mov. + +2013-09-05 Christophe Lyon + + Backport from trunk r199527,199814,201435. + 2013-05-31 Kyrylo Tkachov + + PR target/56315 + * gcc.target/arm/iordi3-opt.c: New test. + + 2013-06-07 Kyrylo Tkachov + + PR target/56315 + * gcc.target/arm/xordi3-opt.c: New test. + + 2013-08-02 Kyrylo Tkachov + + * gcc.target/arm/neon-for-64bits-2.c: Delete. + +2013-09-05 Christophe Lyon + + Backport from trunk r201730,201731. + + 2013-08-14 Janis Johnson + + * gcc.target/arm/atomic-comp-swap-release-acquire.c: Move dg-do + to be the first test directive. + * gcc.target/arm/atomic-op-acq_rel.c: Likewise. + * gcc.target/arm/atomic-op-acquire.c: Likewise. + * gcc.target/arm/atomic-op-char.c: Likewise. + * gcc.target/arm/atomic-op-consume.c: Likewise. + * gcc.target/arm/atomic-op-int.c: Likewise. + * gcc.target/arm/atomic-op-relaxed.c: Likewise. + * gcc.target/arm/atomic-op-release.c: Likewise. + * gcc.target/arm/atomic-op-seq_cst.c: Likewise. + * gcc.target/arm/atomic-op-short.c: Likewise. + + 2013-08-14 Janis Johnson + + * gcc.target/arm/pr19599.c: Skip for -mthumb. + +2013-09-03 Venkataramanan Kumar + + Backport from trunk r201624. + 2013-08-09 James Greenhalgh + + * gcc.target/aarch64/scalar_intrinsics.c: Update expected + output of vdup intrinsics + +2013-08-26 Kugan Vivekanandarajah + + Backport from trunk r201636. + 2013-08-09 Yufeng Zhang + + * gcc.dg/lower-subreg-1.c: Skip aarch64*-*-*. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-08-07 Christophe Lyon + + Backport from trunk r199720 + 2013-06-06 Marcus Shawcroft + + * gcc.dg/vect/no-section-anchors-vect-68.c: + Add dg-skip-if aarch64_tiny. + +2013-08-07 Christophe Lyon + + Backport from trunk r201237. + 2013-07-25 Terry Guo + + * gcc.target/arm/thumb1-Os-mult.c: New test case. + +2013-08-06 Christophe Lyon + + Backport from trunk r200596,201067,201083. + 2013-07-02 Ian Bolton + + * gcc.target/aarch64/abs_1.c: New test. + + 2013-07-19 Ian Bolton + + * gcc.target/aarch64/scalar_intrinsics.c (test_vabs_s64): Added + new testcase. + + 2013-07-20 James Greenhalgh + + * gcc.target/aarch64/vabs_intrinsic_1.c: New file. + +2013-08-06 Christophe Lyon + + Backport from trunk r198864. + 2013-05-07 Ian Bolton + + * gcc.target/aarch64/ands_1.c: New test. + * gcc.target/aarch64/ands_2.c: Likewise + +2013-08-06 Christophe Lyon + + Backport from trunk r199439,199533,201326. + + 2013-05-30 Zhenqiang Chen + + * gcc.dg/shrink-wrap-alloca.c: New added. + * gcc.dg/shrink-wrap-pretend.c: New added. + * gcc.dg/shrink-wrap-sibcall.c: New added. + + 2013-05-31 Rainer Orth + + * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca. + + 2013-07-30 Zhenqiang Chen + + * gcc.target/arm/pr57637.c: New testcase. + +2013-08-06 Christophe Lyon + + Backport from trunk r198928,198973,199203,201240,201241. + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * gcc.target/arm/pr40887.c: Adjust testcase. + * gcc.target/arm/pr19599.c: New test. + +2013-08-05 Yvan Roux + + Backport from trunk r200922. + 2013-07-12 Tejas Belagod + + * gcc.target/aarch64/vect-movi.c: New. + +2013-08-05 Yvan Roux + + Backport from trunk r200720. + 2013-07-05 Marcus Shawcroft + + * gcc.dg/pr57518.c: Adjust scan-rtl-dump-not pattern. + +2013-07-21 Yvan Roux + + Backport from trunk r200204. + 2013-06-19 Yufeng Zhang + + * gcc.dg/torture/stackalign/builtin-apply-2.c: set + STACK_ARGUMENTS_SIZE with 0 if __aarch64__ is defined. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-07-03 Christophe Lyon + + Revert backport from trunk r198928. + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * gcc.target/arm/pr40887.c: Adjust testcase. + * gcc.target/arm/pr19599.c: New test. + +2013-07-03 Christophe Lyon + + Revert backport from trunk 199439, 199533 + 2013-05-31 Rainer Orth + + * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca. + + 2013-05-30 Zhenqiang Chen + + * gcc.dg/shrink-wrap-alloca.c: New added. + * gcc.dg/shrink-wrap-pretend.c: New added. + * gcc.dg/shrink-wrap-sibcall.c: New added. + +2013-07-02 Rob Savoye + + Backport from trunk 200096 + + 2013-06-14 Vidya Praveen + + * gcc.target/aarch64/vect_smlal_1.c: New file. + +2013-07-02 Rob Savoye + + Backport from trunk 200019 + 2013-06-12 Ramana Radhakrishnan + + * gcc.target/arm/unaligned-memcpy-4.c (src, dst): Initialize + to ensure alignment. + * gcc.target/arm/unaligned-memcpy-3.c (src): Likewise. + +2013-06-20 Rob Savoye + + Backport from trunk 200152 + 2013-06-17 Sofiane Naci + + * gcc.target/aarch64/scalar_intrinsics.c: Update. + +2013-06-20 Rob Savoye + + Backport from trunk 200148 + 2013-06-17 Kyrylo Tkachov + + * gcc.target/arm/unaligned-memcpy-2.c (dest): Initialize to + ensure alignment. + +2013-06-20 Rob Savoye + + Backport from trunk 199533 + 2013-05-31 Rainer Orth + + * gcc.dg/shrink-wrap-alloca.c: Use __builtin_alloca. + +2013-06-20 Christophe Lyon + + Backport from trunk r198683. + 2013-05-07 Christophe Lyon + + * lib/target-supports.exp (check_effective_target_hw): New + function. + * c-c++-common/asan/clone-test-1.c: Call + check_effective_target_hw. + * c-c++-common/asan/rlimit-mmap-test-1.c: Likewise. + * c-c++-common/asan/heap-overflow-1.c: Update regexps to accept + possible decorations. + * c-c++-common/asan/null-deref-1.c: Likewise. + * c-c++-common/asan/stack-overflow-1.c: Likewise. + * c-c++-common/asan/strncpy-overflow-1.c: Likewise. + * c-c++-common/asan/use-after-free-1.c: Likewise. + * g++.dg/asan/deep-thread-stack-1.C: Likewise. + * g++.dg/asan/large-func-test-1.C: Likewise. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-06-06 Zhenqiang Chen + + Backport from mainline r199439. + 2013-05-30 Zhenqiang Chen + + * gcc.dg/shrink-wrap-alloca.c: New added. + * gcc.dg/shrink-wrap-pretend.c: New added. + * gcc.dg/shrink-wrap-sibcall.c: New added. + +2013-06-05 Christophe Lyon + + Backport from trunk r199658. + 2013-06-04 Ian Bolton + + * gcc.target/aarch64/movi_1.c: New test. + +2013-06-04 Christophe Lyon + + Backport from trunk r199261. + 2013-05-23 Christian Bruel + + PR debug/57351 + * gcc.dg/debug/pr57351.c: New test + +2013-06-03 Christophe Lyon + Backport from trunk r198890,199254,199294,199454. + + 2013-05-30 Ian Bolton + + * gcc.target/aarch64/insv_1.c: New test. + + 2013-05-24 Ian Bolton + + * gcc.target/aarch64/scalar_intrinsics.c + (force_simd): Use a valid instruction. + (test_vdupd_lane_s64): Pass a valid lane argument. + (test_vdupd_lane_u64): Likewise. + + 2013-05-23 Vidya Praveen + + * gcc.target/aarch64/vect-clz.c: New file. + + 2013-05-14 James Greenhalgh + + * gcc.target/aarch64/vect-fcm.x: Add cases testing + FLOAT cmp FLOAT ? INT : INT. + * gcc.target/aarch64/vect-fcm-eq-d.c: Define IMODE. + * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise. + * gcc.target/aarch64/vect-fcm-ge-d.c: Likewise. + * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise. + * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise. + * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise. + +2013-05-29 Christophe Lyon + + Backport from trunk r198928. + 2013-05-15 Ramana Radhakrishnan + + PR target/19599 + * gcc.target/arm/pr40887.c: Adjust testcase. + * gcc.target/arm/pr19599.c: New test. + +2013-05-28 Christophe Lyon + + Backport from trunk r198680. + 2013-05-07 Sofiane Naci + + * gcc.target/aarch64/scalar_intrinsics.c: Update. + +2013-05-28 Christophe Lyon + + Backport from trunk r198499-198500. + 2013-05-01 James Greenhalgh + * gcc.target/aarch64/vect-vaddv.c: New. + + 2013-05-01 James Greenhalgh + + * gcc.target/aarch64/vect-vmaxv.c: New. + * gcc.target/aarch64/vect-vfmaxv.c: Likewise. + +2013-05-23 Christophe Lyon + + Backport from trunk r198970. + 2013-05-16 Greta Yorsh + + * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output. + * gcc.target/arm/unaligned-memcpy-3.c: Likewise. + * gcc.target/arm/unaligned-memcpy-4.c: Likewise. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198574-198575. + 2013-05-03 Vidya Praveen + + * gcc.target/aarch64/fabd.c: New file. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198490-198496. + 2013-05-01 James Greenhalgh + + * gcc.target/aarch64/scalar-vca.c: New. + * gcc.target/aarch64/vect-vca.c: Likewise. + + 2013-05-01 James Greenhalgh + + * gcc.target/aarch64/scalar_intrinsics.c (force_simd): New. + (test_vceqd_s64): Force arguments to SIMD registers. + (test_vceqzd_s64): Likewise. + (test_vcged_s64): Likewise. + (test_vcled_s64): Likewise. + (test_vcgezd_s64): Likewise. + (test_vcged_u64): Likewise. + (test_vcgtd_s64): Likewise. + (test_vcltd_s64): Likewise. + (test_vcgtzd_s64): Likewise. + (test_vcgtd_u64): Likewise. + (test_vclezd_s64): Likewise. + (test_vcltzd_s64): Likewise. + (test_vtst_s64): Likewise. + (test_vtst_u64): Likewise. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r198191. + 2013-04-23 Sofiane Naci + + * gcc.target/aarch64/scalar-mov.c: New testcase. + +2013-05-14 Matthew Gretton-Dann + + Backport from trunk r197838. + 2013-04-11 Naveen H.S + + * gcc.target/aarch64/negs.c: New. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198019. + 2013-04-16 Naveen H.S + + * gcc.target/aarch64/adds1.c: New. + * gcc.target/aarch64/adds2.c: New. + * gcc.target/aarch64/subs1.c: New. + * gcc.target/aarch64/subs2.c: New. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198394,198396-198400,198402-198404,198406. + 2013-04-29 James Greenhalgh + + * lib/target-supports.exp (vect_uintfloat_cvt): Enable for AArch64. + + 2013-04-29 James Greenhalgh + + * gcc.target/aarch64/vect-vcvt.c: New. + + 2013-04-29 James Greenhalgh + + * gcc.target/aarch64/vect-vrnd.c: New. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198302-198306,198316. + 2013-04-25 James Greenhalgh + Tejas Belagod + + * gcc.target/aarch64/vaddv-intrinsic.c: New. + * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise. + * gcc.target/aarch64/vaddv-intrinsic.x: Likewise. + + 2013-04-25 Naveen H.S + + * gcc.target/aarch64/cmp.c: New. + + 2013-04-25 Naveen H.S + + * gcc.target/aarch64/ngc.c: New. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198298. + 2013-04-25 Kyrylo Tkachov + + * lib/target-supports.exp + (check_effective_target_arm_neon_fp16_ok_nocache): New procedure. + (check_effective_target_arm_neon_fp16_ok): Likewise. + (add_options_for_arm_neon_fp16): Likewise. + * gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated. + * gcc.target/arm/neon/vcvtf32_f16.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198136-198137,198142,198176 + 2013-04-22 James Greenhalgh + + * gcc.target/aarch64/vrecps.c: New. + * gcc.target/aarch64/vrecpx.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r198020. + 2013-04-16 Naveen H.S + + * gcc.target/aarch64/adds3.c: New. + * gcc.target/aarch64/subs3.c: New. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197965. + 2013-04-15 Kyrylo Tkachov + + * gcc.target/arm/anddi3-opt.c: New test. + * gcc.target/arm/anddi3-opt2.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197642. + 2013-04-09 Kyrylo Tkachov + + * gcc.target/arm/minmax_minus.c: New test. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197530,197921. + 2013-04-05 Greta Yorsh + + * gcc.target/arm/peep-ldrd-1.c: New test. + * gcc.target/arm/peep-strd-1.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197523. + 2013-04-05 Kyrylo Tkachov + + * lib/target-supports.exp (add_options_for_arm_v8_neon): + Add -march=armv8-a when we use v8 NEON. + (check_effective_target_vect_call_btruncf): Remove arm-*-*-*. + (check_effective_target_vect_call_ceilf): Likewise. + (check_effective_target_vect_call_floorf): Likewise. + (check_effective_target_vect_call_roundf): Likewise. + (check_vect_support_and_set_flags): Remove check for arm_v8_neon. + * gcc.target/arm/vect-rounding-btruncf.c: New testcase. + * gcc.target/arm/vect-rounding-ceilf.c: Likewise. + * gcc.target/arm/vect-rounding-floorf.c: Likewise. + * gcc.target/arm/vect-rounding-roundf.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197518-197522,197516-197528. + 2013-04-05 Greta Yorsh + + * gcc.target/arm/negdi-1.c: New test. + * gcc.target/arm/negdi-2.c: Likewise. + * gcc.target/arm/negdi-3.c: Likewise. + * gcc.target/arm/negdi-4.c: Likewise. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r197489-197491. + 2013-04-04 Kyrylo Tkachov + + * lib/target-supports.exp (check_effective_target_arm_v8_neon_hw): + New procedure. + (check_effective_target_arm_v8_neon_ok_nocache): + Likewise. + (check_effective_target_arm_v8_neon_ok): Change to use + check_effective_target_arm_v8_neon_ok_nocache. + (add_options_for_arm_v8_neon): Use et_arm_v8_neon_flags to set ARMv8 + NEON flags. + (check_effective_target_vect_call_btruncf): + Enable for arm and ARMv8 NEON. + (check_effective_target_vect_call_ceilf): Likewise. + (check_effective_target_vect_call_floorf): Likewise. + (check_effective_target_vect_call_roundf): Likewise. + (check_vect_support_and_set_flags): Handle ARMv8 NEON effective + target. + +2013-05-02 Matthew Gretton-Dann + + Backport from trunk r196795-196797,196957. + 2013-03-19 Ian Bolton + + * gcc.target/aarch64/sbc.c: New test. + + 2013-03-19 Ian Bolton + + * gcc.target/aarch64/ror.c: New test. + + 2013-03-19 Ian Bolton + + * gcc.target/aarch64/extr.c: New test. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197052. + 2013-03-25 Kyrylo Tkachov + + * gcc.target/arm/vseleqdf.c: New test. + * gcc.target/arm/vseleqsf.c: Likewise. + * gcc.target/arm/vselgedf.c: Likewise. + * gcc.target/arm/vselgesf.c: Likewise. + * gcc.target/arm/vselgtdf.c: Likewise. + * gcc.target/arm/vselgtsf.c: Likewise. + * gcc.target/arm/vselledf.c: Likewise. + * gcc.target/arm/vsellesf.c: Likewise. + * gcc.target/arm/vselltdf.c: Likewise. + * gcc.target/arm/vselltsf.c: Likewise. + * gcc.target/arm/vselnedf.c: Likewise. + * gcc.target/arm/vselnesf.c: Likewise. + * gcc.target/arm/vselvcdf.c: Likewise. + * gcc.target/arm/vselvcsf.c: Likewise. + * gcc.target/arm/vselvsdf.c: Likewise. + * gcc.target/arm/vselvssf.c: Likewise. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r197051. + 2013-03-25 Kyrylo Tkachov + + * gcc.target/aarch64/atomic-comp-swap-release-acquire.c: Move test + body from here... + * gcc.target/aarch64/atomic-comp-swap-release-acquire.x: ... to here. + * gcc.target/aarch64/atomic-op-acq_rel.c: Move test body from here... + * gcc.target/aarch64/atomic-op-acq_rel.x: ... to here. + * gcc.target/aarch64/atomic-op-acquire.c: Move test body from here... + * gcc.target/aarch64/atomic-op-acquire.x: ... to here. + * gcc.target/aarch64/atomic-op-char.c: Move test body from here... + * gcc.target/aarch64/atomic-op-char.x: ... to here. + * gcc.target/aarch64/atomic-op-consume.c: Move test body from here... + * gcc.target/aarch64/atomic-op-consume.x: ... to here. + * gcc.target/aarch64/atomic-op-int.c: Move test body from here... + * gcc.target/aarch64/atomic-op-int.x: ... to here. + * gcc.target/aarch64/atomic-op-relaxed.c: Move test body from here... + * gcc.target/aarch64/atomic-op-relaxed.x: ... to here. + * gcc.target/aarch64/atomic-op-release.c: Move test body from here... + * gcc.target/aarch64/atomic-op-release.x: ... to here. + * gcc.target/aarch64/atomic-op-seq_cst.c: Move test body from here... + * gcc.target/aarch64/atomic-op-seq_cst.x: ... to here. + * gcc.target/aarch64/atomic-op-short.c: Move test body from here... + * gcc.target/aarch64/atomic-op-short.x: ... to here. + * gcc.target/arm/atomic-comp-swap-release-acquire.c: New test. + * gcc.target/arm/atomic-op-acq_rel.c: Likewise. + * gcc.target/arm/atomic-op-acquire.c: Likewise. + * gcc.target/arm/atomic-op-char.c: Likewise. + * gcc.target/arm/atomic-op-consume.c: Likewise. + * gcc.target/arm/atomic-op-int.c: Likewise. + * gcc.target/arm/atomic-op-relaxed.c: Likewise. + * gcc.target/arm/atomic-op-release.c: Likewise. + * gcc.target/arm/atomic-op-seq_cst.c: Likewise. + * gcc.target/arm/atomic-op-short.c: Likewise. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r196876. + 2013-03-21 Christophe Lyon + + * gcc.target/arm/neon-for-64bits-1.c: New tests. + * gcc.target/arm/neon-for-64bits-2.c: Likewise. + +2013-04-08 Matthew Gretton-Dann + + Backport from trunk r196858. + 2013-03-21 Naveen H.S + + * gcc.target/aarch64/vect.c: Test and result vector added + for sabd and saba instructions. + * gcc.target/aarch64/vect-compile.c: Check for sabd and saba + instructions in assembly. + * gcc.target/aarch64/vect.x: Add sabd and saba test functions. + * gcc.target/aarch64/vect-fp.c: Test and result vector added + for fabd instruction. + * gcc.target/aarch64/vect-fp-compile.c: Check for fabd + instruction in assembly. + * gcc.target/aarch64/vect-fp.x: Add fabd test function. --- a/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c +++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-alloca.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +int *p; + +void +test (int a) +{ + if (a > 0) + p = __builtin_alloca (4); +} --- a/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c +++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-pretend.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +#include +#include +#include + +#define DEBUG_BUFFER_SIZE 80 +int unifi_debug = 5; + +void +unifi_trace (void* ospriv, int level, const char *fmt, ...) +{ + static char s[DEBUG_BUFFER_SIZE]; + va_list args; + unsigned int len; + + if (!ospriv) + return; + + if (unifi_debug >= level) + { + va_start (args, fmt); + len = vsnprintf (&(s)[0], (DEBUG_BUFFER_SIZE), fmt, args); + va_end (args); + + if (len >= DEBUG_BUFFER_SIZE) + { + (s)[DEBUG_BUFFER_SIZE - 2] = '\n'; + (s)[DEBUG_BUFFER_SIZE - 1] = 0; + } + + printf ("%s", s); + } +} + --- a/src/gcc/testsuite/gcc.dg/debug/pr57351.c +++ b/src/gcc/testsuite/gcc.dg/debug/pr57351.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon } */ +/* { dg-options "-std=c99 -Os -g -march=armv7-a" } */ +/* { dg-add-options arm_neon } */ + +typedef unsigned int size_t; +typedef int ptrdiff_t; +typedef signed char int8_t ; +typedef signed long long int64_t; +typedef int8_t GFC_INTEGER_1; +typedef GFC_INTEGER_1 GFC_LOGICAL_1; +typedef int64_t GFC_INTEGER_8; +typedef GFC_INTEGER_8 GFC_LOGICAL_8; +typedef ptrdiff_t index_type; +typedef struct descriptor_dimension +{ + index_type lower_bound; + index_type _ubound; +} +descriptor_dimension; +typedef struct { GFC_LOGICAL_1 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l1; +typedef struct { GFC_LOGICAL_8 *base_addr; size_t offset; index_type dtype; descriptor_dimension dim[7];} gfc_array_l8; +void +all_l8 (gfc_array_l8 * const restrict retarray, + gfc_array_l1 * const restrict array, + const index_type * const restrict pdim) +{ + GFC_LOGICAL_8 * restrict dest; + index_type n; + index_type len; + index_type delta; + index_type dim; + dim = (*pdim) - 1; + len = ((array)->dim[dim]._ubound + 1 - (array)->dim[dim].lower_bound); + for (n = 0; n < dim; n++) + { + const GFC_LOGICAL_1 * restrict src; + GFC_LOGICAL_8 result; + { + result = 1; + { + for (n = 0; n < len; n++, src += delta) + { + if (! *src) + { + result = 0; + break; + } + } + *dest = result; + } + } + } +} --- a/src/gcc/testsuite/gcc.dg/lower-subreg-1.c +++ b/src/gcc/testsuite/gcc.dg/lower-subreg-1.c @@ -1,4 +1,4 @@ -/* { dg-do compile { target { ! { mips64 || { arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ +/* { dg-do compile { target { ! { mips64 || { aarch64*-*-* arm*-*-* ia64-*-* sparc*-*-* spu-*-* tilegx-*-* } } } } } */ /* { dg-options "-O -fdump-rtl-subreg1" } */ /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ --- a/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c +++ b/src/gcc/testsuite/gcc.dg/shrink-wrap-sibcall.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -g" } */ + +unsigned char a, b, d, f, g; + +int test (void); + +int +baz (int c) +{ + if (c == 0) return test (); + if (b & 1) + { + g = 0; + int e = (a & 0x0f) - (g & 0x0f); + + if (!a) b |= 0x80; + a = e + test (); + f = g/5 + a*3879 + b *2985; + } + else + { + f = g + a*39879 + b *25; + } + return test (); +} --- a/src/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c +++ b/src/gcc/testsuite/gcc.dg/torture/stackalign/builtin-apply-2.c @@ -16,7 +16,7 @@ E, F and G are passed on stack. So the size of the stack argument data is 20. */ #define STACK_ARGUMENTS_SIZE 20 -#elif defined __MMIX__ +#elif defined __aarch64__ || defined __MMIX__ /* No parameters on stack for bar. */ #define STACK_ARGUMENTS_SIZE 0 #else --- a/src/gcc/testsuite/gcc.dg/tree-ssa/coalesce-1.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/coalesce-1.c @@ -0,0 +1,195 @@ +/* { dg-do compile } */ + +/* { dg-options "-O2 -fdump-rtl-expand-details" } */ + +typedef long unsigned int size_t; +union tree_node; +typedef union tree_node *tree; +union gimple_statement_d; +typedef union gimple_statement_d *gimple; +typedef const union tree_node *const_tree; +typedef const union gimple_statement_d *const_gimple; +struct gimple_seq_d; +typedef struct gimple_seq_d *gimple_seq; +struct edge_def; +typedef struct edge_def *edge; +struct basic_block_def; +typedef struct basic_block_def *basic_block; +typedef const struct basic_block_def *const_basic_block; +struct tree_exp +{ + tree operands[1]; +}; +typedef struct ssa_use_operand_d +{ + tree *use; +} ssa_use_operand_t; +struct phi_arg_d +{ + struct ssa_use_operand_d imm_use; +}; +union tree_node +{ + struct tree_exp exp; +}; +struct function +{ +}; +extern struct function *cfun; +struct edge_def +{ + unsigned int dest_idx; +}; +static __inline__ void +VEC_edge_must_be_pointer_type (void) +{ + (void) ((edge) 1 == (void *) 1); +} typedef struct VEC_edge_base + +{ + unsigned num; + unsigned alloc; + edge vec[1]; +} VEC_edge_base; +typedef struct VEC_edge_none +{ + VEC_edge_base base; +} VEC_edge_none; + +static __inline__ edge +VEC_edge_base_index (const VEC_edge_base * vec_, unsigned ix_, + const char *file_, unsigned line_, const char *function_) +{ + return vec_->vec[ix_]; +} + +typedef struct VEC_edge_gc +{ + VEC_edge_base base; +} VEC_edge_gc; +struct basic_block_def +{ + VEC_edge_gc *succs; +}; +static __inline__ edge +single_succ_edge (const_basic_block bb) +{ + return (VEC_edge_base_index + ((((bb)->succs) ? &((bb)->succs)->base : 0), (0), + "/home/gcc/virgin-gcc/gcc/basic-block.h", 563, __FUNCTION__)); +} + +edge find_edge (basic_block, basic_block); +typedef tree *def_operand_p; +typedef ssa_use_operand_t *use_operand_p; +struct gimple_seq_node_d; +typedef struct gimple_seq_node_d *gimple_seq_node; +struct gimple_seq_node_d +{ + gimple stmt; +}; +typedef struct +{ + gimple_seq_node ptr; + gimple_seq seq; + basic_block bb; +} gimple_stmt_iterator; +struct gimple_statement_phi +{ + struct phi_arg_d args[1]; +}; +union gimple_statement_d +{ + struct gimple_statement_phi gimple_phi; +}; +extern size_t const gimple_ops_offset_[]; +static __inline__ tree * +gimple_ops (gimple gs) +{ + size_t off; + off = gimple_ops_offset_[gimple_statement_structure (gs)]; + return (tree *) ((char *) gs + off); +} + +static __inline__ tree +gimple_op (const_gimple gs, unsigned i) +{ + return gimple_ops ((((union + { + const union gimple_statement_d * _q; + union gimple_statement_d * _nq;}) (((gs))))._nq))[i]; +} + +static __inline__ struct phi_arg_d * +gimple_phi_arg (gimple gs, unsigned index) +{ + return &(gs->gimple_phi.args[index]); +} + +static __inline__ tree +gimple_switch_label (const_gimple gs, unsigned index) +{ + return gimple_op (gs, index + 1); +} + +gimple_stmt_iterator gsi_start_phis (basic_block); +extern basic_block label_to_block_fn (struct function *, tree); + +static __inline__ tree +get_use_from_ptr (use_operand_p use) +{ + return *(use->use); +} + +static __inline__ use_operand_p +gimple_phi_arg_imm_use_ptr (gimple gs, int i) +{ + return &gimple_phi_arg (gs, i)->imm_use; +} + +struct switch_conv_info +{ + basic_block final_bb; + basic_block switch_bb; + const char *reason; + tree *default_values; +}; +static struct switch_conv_info info; + +static void +gather_default_values (tree default_case) +{ + gimple_stmt_iterator gsi; + basic_block bb = + (label_to_block_fn ((cfun + 0), default_case->exp.operands[2])); + edge e; + int i = 0; + if (bb == info.final_bb) + e = find_edge (info.switch_bb, bb); + else + e = single_succ_edge (bb); + for (gsi = gsi_start_phis (info.final_bb); + gsi_gsi_start_phis (info.final_bb); gsi_next (&gsi)) + { + gimple phi = gsi.ptr->stmt; + tree val = get_use_from_ptr (gimple_phi_arg_imm_use_ptr + ((((phi))), (((e)->dest_idx)))); + info.default_values[i++] = val; + } +} + +unsigned char +process_switch (gimple swtch) +{ + unsigned int i, branch_num = gimple_switch_num_labels (swtch); + tree index_type; + info.reason = "switch has no labels\n"; + gather_default_values (gimple_switch_label (swtch, 0)); +} + +/* Verify that out-of-ssa coalescing did its job by verifying there are not + any partition copies inserted. */ + +/* { dg-final { scan-rtl-dump-not "partition copy" "expand"} } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ + --- a/src/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c @@ -1,6 +1,6 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include --- a/src/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-11.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-11.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ + +int f(int a, int b, int c) +{ + if (a == 0 && b > c) + return 0; + return a; +} + +int g(int a, int b, int c) +{ + if (a == 42 && b > c) + return 42; + return a; +} + +int h(int a, int b, int c, int d) +{ + if (a == d && b > c) + return d; + return a; +} +/* { dg-final { scan-tree-dump-times "if" 0 "optimized"} } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ --- a/src/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c @@ -1,6 +1,6 @@ /* { dg-do run { target vect_cmdline_needed } } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ -/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ #include --- a/src/gcc/testsuite/gcc.dg/tls/pr42894.c +++ b/src/gcc/testsuite/gcc.dg/tls/pr42894.c @@ -1,6 +1,5 @@ /* PR target/42894 */ /* { dg-do compile } */ -/* { dg-options "-march=armv5te -mthumb" { target arm*-*-* } } */ /* { dg-require-effective-target tls } */ extern __thread int t; --- a/src/gcc/testsuite/gcc.dg/builtin-apply2.c +++ b/src/gcc/testsuite/gcc.dg/builtin-apply2.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-skip-if "Variadic funcs have all args on stack. Normal funcs have args in registers." { "aarch64*-*-* avr-*-* " } { "*" } { "" } } */ -/* { dg-skip-if "Variadic funcs use Base AAPCS. Normal funcs use VFP variant." { "arm*-*-*" } { "-mfloat-abi=hard" } { "" } } */ +/* { dg-skip-if "Variadic funcs use Base AAPCS. Normal funcs use VFP variant." { arm*-*-* && arm_hf_eabi } { "*" } { "" } } */ /* PR target/12503 */ /* Origin: */ --- a/src/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c +++ b/src/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c @@ -1,4 +1,6 @@ -/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_int } + { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} {"*"} {""} } + */ #include #include "tree-vect.h" --- a/src/gcc/testsuite/g++.dg/asan/large-func-test-1.C +++ b/src/gcc/testsuite/g++.dg/asan/large-func-test-1.C @@ -37,9 +37,9 @@ // { dg-output "ERROR: AddressSanitizer:? heap-buffer-overflow on address\[^\n\r]*" } // { dg-output "0x\[0-9a-f\]+ at pc 0x\[0-9a-f\]+ bp 0x\[0-9a-f\]+ sp 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } -// { dg-output "READ of size 4 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*READ of size 4 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } // { dg-output " #0 0x\[0-9a-f\]+ (in \[^\n\r]*LargeFunction\[^\n\r]*(large-func-test-1.C:18|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } -// { dg-output "0x\[0-9a-f\]+ is located 44 bytes to the right of 400-byte region.*(\n|\r\n|\r)" } -// { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 44 bytes to the right of 400-byte region.*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } // { dg-output " #0( 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } // { dg-output " #1|) 0x\[0-9a-f\]+ (in (operator new|_*_Zn\[aw\]\[mj\])|\[(\])\[^\n\r]*(\n|\r\n|\r)" } --- a/src/gcc/testsuite/g++.dg/asan/deep-thread-stack-1.C +++ b/src/gcc/testsuite/g++.dg/asan/deep-thread-stack-1.C @@ -45,9 +45,9 @@ } // { dg-output "ERROR: AddressSanitizer: heap-use-after-free.*(\n|\r\n|\r)" } -// { dg-output "WRITE of size 4 at 0x\[0-9a-f\]+ thread T(\[0-9\]+).*(\n|\r\n|\r)" } -// { dg-output "freed by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" } -// { dg-output "previously allocated by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*WRITE of size 4 at 0x\[0-9a-f\]+ thread T(\[0-9\]+).*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*freed by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" } +// { dg-output "\[^\n\r]*previously allocated by thread T(\[0-9\]+) here:.*(\n|\r\n|\r)" } // { dg-output "Thread T\\2 created by T(\[0-9\]+) here:.*(\n|\r\n|\r)" } // { dg-output "Thread T\\8 created by T0 here:.*(\n|\r\n|\r)" } // { dg-output "Thread T\\4 created by T(\[0-9\]+) here:.*(\n|\r\n|\r)" } --- a/src/gcc/testsuite/c-c++-common/asan/strncpy-overflow-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/strncpy-overflow-1.c @@ -15,7 +15,7 @@ /* { dg-output "WRITE of size \[0-9\]* at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)strncpy|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*strncpy-overflow-1.c:11|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */ -/* { dg-output "0x\[0-9a-f\]+ is located 0 bytes to the right of 9-byte region\[^\n\r]*(\n|\r\n|\r)" } */ -/* { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 0 bytes to the right of 9-byte region\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*strncpy-overflow-1.c:10|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ --- a/src/gcc/testsuite/c-c++-common/asan/rlimit-mmap-test-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/rlimit-mmap-test-1.c @@ -2,6 +2,7 @@ /* { dg-do run { target setrlimit } } */ /* { dg-skip-if "" { *-*-* } { "*" } { "-O0" } } */ +/* { dg-require-effective-target hw } */ /* { dg-shouldfail "asan" } */ #include --- a/src/gcc/testsuite/c-c++-common/asan/stack-overflow-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/stack-overflow-1.c @@ -19,4 +19,4 @@ /* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*stack-overflow-1.c:16|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */ -/* { dg-output "Address 0x\[0-9a-f\]+ is\[^\n\r]*frame
" } */ +/* { dg-output "\[^\n\r]*Address 0x\[0-9a-f\]+ is\[^\n\r]*frame
" } */ --- a/src/gcc/testsuite/c-c++-common/asan/use-after-free-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/use-after-free-1.c @@ -11,12 +11,12 @@ /* { dg-output "ERROR: AddressSanitizer:? heap-use-after-free on address\[^\n\r]*" } */ /* { dg-output "0x\[0-9a-f\]+ at pc 0x\[0-9a-f\]+ bp 0x\[0-9a-f\]+ sp 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */ -/* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*READ of size 1 at 0x\[0-9a-f\]+ thread T0\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:9|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */ -/* { dg-output "0x\[0-9a-f\]+ is located 5 bytes inside of 10-byte region .0x\[0-9a-f\]+,0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */ -/* { dg-output "freed by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 5 bytes inside of 10-byte region .0x\[0-9a-f\]+,0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*freed by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)free|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:8|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */ -/* { dg-output "previously allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*previously allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*use-after-free-1.c:7|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ --- a/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c @@ -3,6 +3,7 @@ /* { dg-do run { target { *-*-linux* } } } */ /* { dg-require-effective-target clone } */ +/* { dg-require-effective-target hw } */ /* { dg-options "-D_GNU_SOURCE" } */ #include --- a/src/gcc/testsuite/c-c++-common/asan/heap-overflow-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/heap-overflow-1.c @@ -25,7 +25,7 @@ /* { dg-output "READ of size 1 at 0x\[0-9a-f\]+ thread T0.*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*heap-overflow-1.c:21|\[^\n\r]*:0)|\[(\]).*(\n|\r\n|\r)" } */ -/* { dg-output "0x\[0-9a-f\]+ is located 0 bytes to the right of 10-byte region\[^\n\r]*(\n|\r\n|\r)" } */ -/* { dg-output "allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*0x\[0-9a-f\]+ is located 0 bytes to the right of 10-byte region\[^\n\r]*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*allocated by thread T0 here:\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in _*(interceptor_|)malloc|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*heap-overflow-1.c:19|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ --- a/src/gcc/testsuite/c-c++-common/asan/null-deref-1.c +++ b/src/gcc/testsuite/c-c++-common/asan/null-deref-1.c @@ -18,6 +18,6 @@ /* { dg-output "ERROR: AddressSanitizer:? SEGV on unknown address\[^\n\r]*" } */ /* { dg-output "0x\[0-9a-f\]+ \[^\n\r]*pc 0x\[0-9a-f\]+\[^\n\r]*(\n|\r\n|\r)" } */ -/* { dg-output "AddressSanitizer can not provide additional info.*(\n|\r\n|\r)" } */ +/* { dg-output "\[^\n\r]*AddressSanitizer can not provide additional info.*(\n|\r\n|\r)" } */ /* { dg-output " #0 0x\[0-9a-f\]+ (in \[^\n\r]*NullDeref\[^\n\r]* (\[^\n\r]*null-deref-1.c:10|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ /* { dg-output " #1 0x\[0-9a-f\]+ (in _*main (\[^\n\r]*null-deref-1.c:15|\[^\n\r]*:0)|\[(\])\[^\n\r]*(\n|\r\n|\r)" } */ --- a/src/gcc/objcp/ChangeLog.linaro +++ b/src/gcc/objcp/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/cp/ChangeLog.linaro +++ b/src/gcc/cp/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/tree-ssa-loop-ivopts.c +++ b/src/gcc/tree-ssa-loop-ivopts.c @@ -4827,22 +4827,36 @@ for (i = 0; i < n_iv_cands (data); i++) { struct iv_cand *cand = iv_cand (data, i); - struct iv_use *closest = NULL; + struct iv_use *closest_before = NULL; + struct iv_use *closest_after = NULL; if (cand->pos != IP_ORIGINAL) continue; + for (j = 0; j < n_iv_uses (data); j++) { struct iv_use *use = iv_use (data, j); unsigned uid = gimple_uid (use->stmt); - if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at) - || uid > gimple_uid (cand->incremented_at)) + + if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)) continue; - if (closest == NULL || uid > gimple_uid (closest->stmt)) - closest = use; + + if (uid < gimple_uid (cand->incremented_at) + && (closest_before == NULL + || uid > gimple_uid (closest_before->stmt))) + closest_before = use; + + if (uid > gimple_uid (cand->incremented_at) + && (closest_after == NULL + || uid < gimple_uid (closest_after->stmt))) + closest_after = use; } - if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand)) - continue; - cand->ainc_use = closest; + + if (closest_before != NULL + && autoinc_possible_for_pair (data, closest_before, cand)) + cand->ainc_use = closest_before; + else if (closest_after != NULL + && autoinc_possible_for_pair (data, closest_after, cand)) + cand->ainc_use = closest_after; } } --- a/src/gcc/rtl.def +++ b/src/gcc/rtl.def @@ -937,8 +937,9 @@ relational operator. Operands should have only one alternative. 1: A C expression giving an additional condition for recognizing the generated pattern. - 2: A template or C code to produce assembler output. */ -DEF_RTL_EXPR(DEFINE_COND_EXEC, "define_cond_exec", "Ess", RTX_EXTRA) + 2: A template or C code to produce assembler output. + 3: A vector of attributes to append to the resulting cond_exec insn. */ +DEF_RTL_EXPR(DEFINE_COND_EXEC, "define_cond_exec", "EssV", RTX_EXTRA) /* Definition of an operand predicate. The difference between DEFINE_PREDICATE and DEFINE_SPECIAL_PREDICATE is that genrecog will --- a/src/gcc/go/ChangeLog.linaro +++ b/src/gcc/go/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/opts.c +++ b/src/gcc/opts.c @@ -484,6 +484,7 @@ { OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 }, @@ -497,7 +498,7 @@ { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_vectorize, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, @@ -822,6 +823,17 @@ } } + /* Tune vectorization related parametees according to cost model. */ + if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP) + { + maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, + 6, opts->x_param_values, opts_set->x_param_values); + maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS, + 0, opts->x_param_values, opts_set->x_param_values); + maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT, + 0, opts->x_param_values, opts_set->x_param_values); + } + /* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion is disabled. */ if (!opts->x_flag_tree_vectorize || !opts->x_flag_tree_loop_if_convert) @@ -1592,7 +1604,7 @@ if (!opts_set->x_flag_tree_vectorize) opts->x_flag_tree_vectorize = value; if (!opts_set->x_flag_vect_cost_model) - opts->x_flag_vect_cost_model = value; + opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC; if (!opts_set->x_flag_tree_loop_distribute_patterns) opts->x_flag_tree_loop_distribute_patterns = value; break; --- a/src/gcc/ada/ChangeLog.linaro +++ b/src/gcc/ada/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/common/config/aarch64/aarch64-common.c +++ b/src/gcc/common/config/aarch64/aarch64-common.c @@ -44,6 +44,8 @@ { /* Enable section anchors by default at -O1 or higher. */ { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, + /* Enable redundant extension instructions removal at -O2 and higher. */ + { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, { OPT_LEVELS_NONE, 0, NULL, 0 } }; --- a/src/gcc/common/config/i386/i386-common.c +++ b/src/gcc/common/config/i386/i386-common.c @@ -729,7 +729,6 @@ opts->x_flag_pcc_struct_return = 2; opts->x_flag_asynchronous_unwind_tables = 2; - opts->x_flag_vect_cost_model = 1; } /* On the x86 -fsplit-stack and -fstack-protector both use the same --- a/src/gcc/fortran/ChangeLog.linaro +++ b/src/gcc/fortran/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/configure.ac +++ b/src/gcc/configure.ac @@ -813,7 +813,7 @@ ) AC_SUBST(CONFIGURE_SPECS) -ACX_PKGVERSION([GCC]) +ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) ACX_BUGURL([http://gcc.gnu.org/bugs.html]) # Sanity check enable_languages in case someone does not run the toplevel @@ -4202,8 +4202,9 @@ # ??? Once 2.11 is released, probably need to add first known working # version to the per-target configury. case "$cpu_type" in - alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze | mips \ - | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 | xtensa) + aarch64 | alpha | arm | avr | bfin | cris | i386 | m32c | m68k | microblaze \ + | mips | pa | rs6000 | score | sparc | spu | tilegx | tilepro | xstormy16 \ + | xtensa) insn="nop" ;; ia64 | s390) --- a/src/gcc/tree-vectorizer.h +++ b/src/gcc/tree-vectorizer.h @@ -838,6 +838,14 @@ return (DR_MISALIGNMENT (data_ref_info) != -1); } + +/* Return true if the vect cost model is unlimited. */ +static inline bool +unlimited_cost_model () +{ + return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED; +} + /* Source location */ extern LOC vect_location; --- a/src/gcc/tree-vect-loop.c +++ b/src/gcc/tree-vect-loop.c @@ -2629,7 +2629,7 @@ void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) { dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled."); *ret_min_profitable_niters = 0; --- a/src/gcc/flag-types.h +++ b/src/gcc/flag-types.h @@ -191,4 +191,13 @@ FP_CONTRACT_FAST = 2 }; +/* Vectorizer cost-model. */ +enum vect_cost_model { + VECT_COST_MODEL_UNLIMITED = 0, + VECT_COST_MODEL_CHEAP = 1, + VECT_COST_MODEL_DYNAMIC = 2, + VECT_COST_MODEL_DEFAULT = 3 +}; + + #endif /* ! GCC_FLAG_TYPES_H */ --- a/src/gcc/tree-vect-data-refs.c +++ b/src/gcc/tree-vect-data-refs.c @@ -1328,7 +1328,7 @@ *new_slot = slot; } - if (!supportable_dr_alignment && !flag_vect_cost_model) + if (!supportable_dr_alignment && unlimited_cost_model ()) slot->count += VECT_MAX_COST; } @@ -1438,7 +1438,7 @@ res.peel_info.dr = NULL; res.body_cost_vec = stmt_vector_for_cost(); - if (flag_vect_cost_model) + if (!unlimited_cost_model ()) { res.inside_cost = INT_MAX; res.outside_cost = INT_MAX; @@ -1668,7 +1668,7 @@ vectorization factor. We do this automtically for cost model, since we calculate cost for every peeling option. */ - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) possible_npeel_number = vf /nelements; /* Handle the aligned case. We may decide to align some other @@ -1676,7 +1676,7 @@ if (DR_MISALIGNMENT (dr) == 0) { npeel_tmp = 0; - if (!flag_vect_cost_model) + if (unlimited_cost_model ()) possible_npeel_number++; } @@ -1926,6 +1926,30 @@ if (do_peeling) { + unsigned max_allowed_peel + = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT); + if (max_allowed_peel != (unsigned)-1) + { + unsigned max_peel = npeel; + if (max_peel == 0) + { + gimple dr_stmt = DR_STMT (dr0); + stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt); + tree vtype = STMT_VINFO_VECTYPE (vinfo); + max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1; + } + if (max_peel > max_allowed_peel) + { + do_peeling = false; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Disable peeling, max peels reached: %d\n", max_peel); + } + } + } + + if (do_peeling) + { stmt_info_for_cost *si; void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); @@ -1979,16 +2003,14 @@ /* (2) Versioning to force alignment. */ /* Try versioning if: - 1) flag_tree_vect_loop_version is TRUE - 2) optimize loop for speed - 3) there is at least one unsupported misaligned data ref with an unknown + 1) optimize loop for speed + 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and - 4) all misaligned data refs with a known misalignment are supported, and - 5) the number of runtime alignment checks is within reason. */ + 3) all misaligned data refs with a known misalignment are supported, and + 4) the number of runtime alignment checks is within reason. */ do_versioning = - flag_tree_vect_loop_version - && optimize_loop_nest_for_speed_p (loop) + optimize_loop_nest_for_speed_p (loop) && (!loop->inner); /* FORNOW */ if (do_versioning) --- a/src/gcc/coretypes.h +++ b/src/gcc/coretypes.h @@ -62,6 +62,8 @@ typedef union gimple_statement_d *gimple; typedef const union gimple_statement_d *const_gimple; typedef gimple gimple_seq; +struct gimple_stmt_iterator_d; +typedef struct gimple_stmt_iterator_d gimple_stmt_iterator; union section; typedef union section section; struct gcc_options; --- a/src/gcc/tree-ssa-phiopt.c +++ b/src/gcc/tree-ssa-phiopt.c @@ -108,6 +108,26 @@ This opportunity can sometimes occur as a result of other optimizations. + + Another case caught by value replacement looks like this: + + bb0: + t1 = a == CONST; + t2 = b > c; + t3 = t1 & t2; + if (t3 != 0) goto bb1; else goto bb2; + bb1: + bb2: + x = PHI (CONST, a) + + Gets replaced with: + bb0: + bb2: + t1 = a == CONST; + t2 = b > c; + t3 = t1 & t2; + x = a; + ABS Replacement --------------- @@ -153,7 +173,7 @@ Adjacent Load Hoisting ---------------------- - + This transformation replaces bb0: @@ -275,7 +295,7 @@ phi optimizations. Both share much of the infrastructure in how to match applicable basic block patterns. DO_STORE_ELIM is true when we want to do conditional store replacement, false otherwise. - DO_HOIST_LOADS is true when we want to hoist adjacent loads out + DO_HOIST_LOADS is true when we want to hoist adjacent loads out of diamond control flow patterns, false otherwise. */ static unsigned int tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) @@ -378,7 +398,7 @@ continue; } else - continue; + continue; e1 = EDGE_SUCC (bb1, 0); @@ -426,7 +446,7 @@ if (!candorest) continue; - + phi = single_non_singleton_phi_for_edges (phis, e1, e2); if (!phi) continue; @@ -714,6 +734,93 @@ return false; } +/* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional + of the form SSA_NAME NE 0. + + If RHS is fed by a simple EQ_EXPR comparison of two values, see if + the two input values of the EQ_EXPR match arg0 and arg1. + + If so update *code and return TRUE. Otherwise return FALSE. */ + +static bool +rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1, + enum tree_code *code, const_tree rhs) +{ + /* Obviously if RHS is not an SSA_NAME, we can't look at the defining + statement. */ + if (TREE_CODE (rhs) == SSA_NAME) + { + gimple def1 = SSA_NAME_DEF_STMT (rhs); + + /* Verify the defining statement has an EQ_EXPR on the RHS. */ + if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR) + { + /* Finally verify the source operands of the EQ_EXPR are equal + to arg0 and arg1. */ + tree op0 = gimple_assign_rhs1 (def1); + tree op1 = gimple_assign_rhs2 (def1); + if ((operand_equal_for_phi_arg_p (arg0, op0) + && operand_equal_for_phi_arg_p (arg1, op1)) + || (operand_equal_for_phi_arg_p (arg0, op1) + && operand_equal_for_phi_arg_p (arg1, op0))) + { + /* We will perform the optimization. */ + *code = gimple_assign_rhs_code (def1); + return true; + } + } + } + return false; +} + +/* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND. + + Also return TRUE if arg0/arg1 are equal to the source arguments of a + an EQ comparison feeding a BIT_AND_EXPR which feeds COND. + + Return FALSE otherwise. */ + +static bool +operand_equal_for_value_replacement (const_tree arg0, const_tree arg1, + enum tree_code *code, gimple cond) +{ + gimple def; + tree lhs = gimple_cond_lhs (cond); + tree rhs = gimple_cond_rhs (cond); + + if ((operand_equal_for_phi_arg_p (arg0, lhs) + && operand_equal_for_phi_arg_p (arg1, rhs)) + || (operand_equal_for_phi_arg_p (arg1, lhs) + && operand_equal_for_phi_arg_p (arg0, rhs))) + return true; + + /* Now handle more complex case where we have an EQ comparison + which feeds a BIT_AND_EXPR which feeds COND. + + First verify that COND is of the form SSA_NAME NE 0. */ + if (*code != NE_EXPR || !integer_zerop (rhs) + || TREE_CODE (lhs) != SSA_NAME) + return false; + + /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */ + def = SSA_NAME_DEF_STMT (lhs); + if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR) + return false; + + /* Now verify arg0/arg1 correspond to the source arguments of an + EQ comparison feeding the BIT_AND_EXPR. */ + + tree tmp = gimple_assign_rhs1 (def); + if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) + return true; + + tmp = gimple_assign_rhs2 (def); + if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp)) + return true; + + return false; +} + /* The function value_replacement does the main work of doing the value replacement. Return non-zero if the replacement is done. Otherwise return 0. If we remove the middle basic block, return 2. @@ -783,10 +890,7 @@ We now need to verify that the two arguments in the PHI node match the two arguments to the equality comparison. */ - if ((operand_equal_for_phi_arg_p (arg0, gimple_cond_lhs (cond)) - && operand_equal_for_phi_arg_p (arg1, gimple_cond_rhs (cond))) - || (operand_equal_for_phi_arg_p (arg1, gimple_cond_lhs (cond)) - && operand_equal_for_phi_arg_p (arg0, gimple_cond_rhs (cond)))) + if (operand_equal_for_value_replacement (arg0, arg1, &code, cond)) { edge e; tree arg; @@ -1807,7 +1911,7 @@ /* Given a "diamond" control-flow pattern where BB0 tests a condition, BB1 and BB2 are "then" and "else" blocks dependent on this test, - and BB3 rejoins control flow following BB1 and BB2, look for + and BB3 rejoins control flow following BB1 and BB2, look for opportunities to hoist loads as follows. If BB3 contains a PHI of two loads, one each occurring in BB1 and BB2, and the loads are provably of adjacent fields in the same structure, then move both @@ -1857,7 +1961,7 @@ arg1 = gimple_phi_arg_def (phi_stmt, 0); arg2 = gimple_phi_arg_def (phi_stmt, 1); - + if (TREE_CODE (arg1) != SSA_NAME || TREE_CODE (arg2) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (arg1) --- a/src/gcc/tree-ssa-coalesce.c +++ b/src/gcc/tree-ssa-coalesce.c @@ -979,8 +979,7 @@ continue; register_ssa_partition (map, arg); - if ((SSA_NAME_VAR (arg) == SSA_NAME_VAR (res) - && TREE_TYPE (arg) == TREE_TYPE (res)) + if (gimple_can_coalesce_p (arg, res) || (e->flags & EDGE_ABNORMAL)) { saw_copy = true; @@ -1021,8 +1020,7 @@ if (gimple_assign_copy_p (stmt) && TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == SSA_NAME - && SSA_NAME_VAR (lhs) == SSA_NAME_VAR (rhs1) - && TREE_TYPE (lhs) == TREE_TYPE (rhs1)) + && gimple_can_coalesce_p (lhs, rhs1)) { v1 = SSA_NAME_VERSION (lhs); v2 = SSA_NAME_VERSION (rhs1); @@ -1073,8 +1071,7 @@ v1 = SSA_NAME_VERSION (outputs[match]); v2 = SSA_NAME_VERSION (input); - if (SSA_NAME_VAR (outputs[match]) == SSA_NAME_VAR (input) - && TREE_TYPE (outputs[match]) == TREE_TYPE (input)) + if (gimple_can_coalesce_p (outputs[match], input)) { cost = coalesce_cost (REG_BR_PROB_BASE, optimize_bb_for_size_p (bb)); @@ -1108,8 +1105,7 @@ first = var; else { - gcc_assert (SSA_NAME_VAR (var) == SSA_NAME_VAR (first) - && TREE_TYPE (var) == TREE_TYPE (first)); + gcc_assert (gimple_can_coalesce_p (var, first)); v1 = SSA_NAME_VERSION (first); v2 = SSA_NAME_VERSION (var); bitmap_set_bit (used_in_copy, v1); @@ -1246,8 +1242,7 @@ var2 = ssa_name (y); /* Assert the coalesces have the same base variable. */ - gcc_assert (SSA_NAME_VAR (var1) == SSA_NAME_VAR (var2) - && TREE_TYPE (var1) == TREE_TYPE (var2)); + gcc_assert (gimple_can_coalesce_p (var1, var2)); if (debug) fprintf (debug, "Coalesce list: "); @@ -1377,3 +1372,38 @@ return map; } + +/* Given SSA_NAMEs NAME1 and NAME2, return true if they are candidates for + coalescing together, false otherwise. + + This must stay consistent with var_map_base_init in tree-ssa-live.c. */ + +bool +gimple_can_coalesce_p (tree name1, tree name2) +{ + /* First check the SSA_NAME's associated DECL. We only want to + coalesce if they have the same DECL or both have no associated DECL. */ + if (SSA_NAME_VAR (name1) != SSA_NAME_VAR (name2)) + return false; + + /* Now check the types. If the types are the same, then we should + try to coalesce V1 and V2. */ + tree t1 = TREE_TYPE (name1); + tree t2 = TREE_TYPE (name2); + if (t1 == t2) + return true; + + /* If the types are not the same, check for a canonical type match. This + (for example) allows coalescing when the types are fundamentally the + same, but just have different names. + + Note pointer types with different address spaces may have the same + canonical type. Those are rejected for coalescing by the + types_compatible_p check. */ + if (TYPE_CANONICAL (t1) + && TYPE_CANONICAL (t1) == TYPE_CANONICAL (t2) + && types_compatible_p (t1, t2)) + return true; + + return false; +} --- a/src/gcc/lower-subreg.c +++ b/src/gcc/lower-subreg.c @@ -966,7 +966,20 @@ rtx reg; reg = gen_reg_rtx (orig_mode); + +#ifdef AUTO_INC_DEC + { + rtx move = emit_move_insn (reg, src); + if (MEM_P (src)) + { + rtx note = find_reg_note (insn, REG_INC, NULL_RTX); + if (note) + add_reg_note (move, REG_INC, XEXP (note, 0)); + } + } +#else emit_move_insn (reg, src); +#endif src = reg; } @@ -1056,6 +1069,16 @@ mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); minsn = emit_move_insn (real_dest, mdest); +#ifdef AUTO_INC_DEC + if (MEM_P (real_dest) + && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) + { + rtx note = find_reg_note (insn, REG_INC, NULL_RTX); + if (note) + add_reg_note (minsn, REG_INC, XEXP (note, 0)); + } +#endif + smove = single_set (minsn); gcc_assert (smove != NULL_RTX); --- a/src/gcc/gimple-fold.c +++ b/src/gcc/gimple-fold.c @@ -1151,6 +1151,8 @@ gimplify_and_update_call_from_tree (gsi, result); changed = true; } + else if (DECL_BUILT_IN_CLASS (callee) == BUILT_IN_MD) + changed |= targetm.gimple_fold_builtin (gsi); } return changed; --- a/src/gcc/tree-ssa-live.c +++ b/src/gcc/tree-ssa-live.c @@ -88,8 +88,12 @@ as it restricts the sets we compute conflicts for. Using TREE_TYPE to generate sets is the easies as type equivalency also holds for SSA names with the same - underlying decl. */ - m->base.from = TREE_TYPE (var); + underlying decl. + + Check gimple_can_coalesce_p when changing this code. */ + m->base.from = (TYPE_CANONICAL (TREE_TYPE (var)) + ? TYPE_CANONICAL (TREE_TYPE (var)) + : TREE_TYPE (var)); /* If base variable hasn't been seen, set it up. */ slot = (struct tree_int_map **) htab_find_slot (tree_to_index, m, INSERT); --- a/src/gcc/lto/ChangeLog.linaro +++ b/src/gcc/lto/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/po/ChangeLog.linaro +++ b/src/gcc/po/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/gcc/common.opt +++ b/src/gcc/common.opt @@ -2233,13 +2233,33 @@ Common Report Var(flag_tree_slp_vectorize) Init(2) Optimization Enable basic block vectorization (SLP) on trees +fvect-cost-model= +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) +Specifies the cost model for vectorization + +Enum +Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs) + +EnumValue +Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED) + +EnumValue +Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC) + +EnumValue +Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP) + fvect-cost-model -Common Report Var(flag_vect_cost_model) Optimization -Enable use of cost model in vectorization +Common RejectNegative Alias(fvect-cost-model=,dynamic) +Enables the dynamic vectorizer cost model. Preserved for backward compatibility. +fno-vect-cost-model +Common RejectNegative Alias(fvect-cost-model=,unlimited) +Enables the unlimited vectorizer cost model. Preserved for backward compatibility. + ftree-vect-loop-version -Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization -Enable loop versioning when doing loop vectorization on trees +Common Ignore +Does nothing. Preserved for backward compatibility. ftree-scev-cprop Common Report Var(flag_tree_scev_cprop) Init(1) Optimization --- a/src/gcc/combine.c +++ b/src/gcc/combine.c @@ -11996,6 +11996,13 @@ } } + /* We may have changed the comparison operands. Re-canonicalize. */ + if (swap_commutative_operands_p (op0, op1)) + { + tem = op0, op0 = op1, op1 = tem; + code = swap_condition (code); + } + /* If this machine only supports a subset of valid comparisons, see if we can convert an unsupported one into a supported one. */ target_canonicalize_comparison (&code, &op0, &op1, 0); --- a/src/gcc/config.gcc +++ b/src/gcc/config.gcc @@ -325,10 +325,11 @@ ;; arm*-*-*) cpu_type=arm - extra_headers="mmintrin.h arm_neon.h" + extra_headers="mmintrin.h arm_neon.h arm_acle.h" target_type_format_char='%' c_target_objs="arm-c.o" cxx_target_objs="arm-c.o" + need_64bit_hwint=yes extra_options="${extra_options} arm/arm-tables.opt" ;; avr-*-*) @@ -877,7 +878,7 @@ tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ;; esac - tmake_file="${tmake_file} arm/t-arm arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi" + tmake_file="${tmake_file} arm/t-arm arm/t-arm-elf arm/t-bpabi arm/t-linux-eabi arm/t-mlibs" tm_file="$tm_file arm/bpabi.h arm/linux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h" # Define multilib configuration for arm-linux-androideabi. case ${target} in @@ -885,10 +886,6 @@ tmake_file="$tmake_file arm/t-linux-androideabi" ;; esac - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes # The EABI requires the use of __cxa_atexit. default_use_cxa_atexit=yes with_tls=${with_tls:-gnu} @@ -897,10 +894,6 @@ tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/linux-gas.h arm/uclinux-elf.h glibc-stdint.h" tmake_file="arm/t-arm arm/t-arm-elf arm/t-bpabi" tm_file="$tm_file arm/bpabi.h arm/uclinux-eabi.h arm/aout.h vxworks-dummy.h arm/arm.h" - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes # The EABI requires the use of __cxa_atexit. default_use_cxa_atexit=yes ;; @@ -909,10 +902,6 @@ arm*eb-*-eabi*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" esac - # The BPABI long long divmod functions return a 128-bit value in - # registers r0-r3. Correctly modeling that requires the use of - # TImode. - need_64bit_hwint=yes default_use_cxa_atexit=yes tm_file="dbxelf.h elfos.h arm/unknown-elf.h arm/elf.h arm/bpabi.h" tmake_file="arm/t-arm arm/t-arm-elf" @@ -3310,6 +3299,43 @@ if test "x$with_arch" != x && test "x$with_cpu" != x; then echo "Warning: --with-arch overrides --with-cpu=$with_cpu" 1>&2 fi + + # Add extra multilibs + if test "x$with_multilib_list" != x; then + arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'` + for arm_multilib in ${arm_multilibs}; do + case ${arm_multilib} in + aprofile) + # Note that arm/t-aprofile is a + # stand-alone make file fragment to be + # used only with itself. We do not + # specifically use the + # TM_MULTILIB_OPTION framework because + # this shorthand is more + # pragmatic. Additionally it is only + # designed to work without any + # with-cpu, with-arch with-mode + # with-fpu or with-float options. + if test "x$with_arch" != x \ + || test "x$with_cpu" != x \ + || test "x$with_float" != x \ + || test "x$with_fpu" != x \ + || test "x$with_mode" != x ; then + echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=aprofile" 1>&2 + exit 1 + fi + tmake_file="${tmake_file} arm/t-aprofile" + break + ;; + default) + ;; + *) + echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2 + exit 1 + ;; + esac + done + fi ;; fr*-*-*linux*) --- a/src/gcc/Makefile.in +++ b/src/gcc/Makefile.in @@ -4282,7 +4282,8 @@ gcov.texi trouble.texi bugreport.texi service.texi \ contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ - implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi + implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \ + arm-acle-intrinsics.texi # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with # the generated tm.texi; the latter might have a more recent timestamp, --- a/src/gcc/gimple.h +++ b/src/gcc/gimple.h @@ -130,7 +130,7 @@ /* Iterator object for GIMPLE statement sequences. */ -typedef struct +struct gimple_stmt_iterator_d { /* Sequence node holding the current statement. */ gimple_seq_node ptr; @@ -141,9 +141,8 @@ block/sequence is removed. */ gimple_seq *seq; basic_block bb; -} gimple_stmt_iterator; +}; - /* Data structure definitions for GIMPLE tuples. NOTE: word markers are for 64 bit hosts. */ @@ -1033,6 +1032,9 @@ extern bool useless_type_conversion_p (tree, tree); extern bool types_compatible_p (tree, tree); +/* In tree-ssa-coalesce.c */ +extern bool gimple_can_coalesce_p (tree, tree); + /* Return the first node in GIMPLE sequence S. */ static inline gimple_seq_node --- a/src/gcc/config/i386/linux-common.h +++ b/src/gcc/config/i386/linux-common.h @@ -40,7 +40,7 @@ #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ --- a/src/gcc/config/i386/i386.c +++ b/src/gcc/config/i386/i386.c @@ -42262,20 +42262,17 @@ unsigned *cost = (unsigned *) data; unsigned retval = 0; - if (flag_vect_cost_model) - { - tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; - int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); - /* Statements in an inner loop relative to the loop being - vectorized are weighted more heavily. The value here is - arbitrary and could potentially be improved with analysis. */ - if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) - count *= 50; /* FIXME. */ + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ - retval = (unsigned) (count * stmt_cost); - cost[where] += retval; - } + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; return retval; } --- a/src/gcc/config/gnu-user.h +++ b/src/gcc/config/gnu-user.h @@ -73,10 +73,14 @@ #undef CPLUSPLUS_CPP_SPEC #define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" +#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC \ + "%{shared:-lc} \ + %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}" + #define GNU_USER_TARGET_LIB_SPEC \ - "%{pthread:-lpthread} \ - %{shared:-lc} \ - %{!shared:%{mieee-fp:-lieee} %{profile:-lc_p}%{!profile:-lc}}" + "%{pthread:-lpthread} " \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC + #undef LIB_SPEC #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC --- a/src/gcc/config/aarch64/aarch64-simd.md +++ b/src/gcc/config/aarch64/aarch64-simd.md @@ -21,7 +21,7 @@ ; Main data types used by the insntructions -(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI" +(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,TI,DI,DF,SI,SF,HI,QI" (const_string "unknown")) @@ -44,6 +44,7 @@ ; simd_dup duplicate element. ; simd_dupgp duplicate general purpose register. ; simd_ext bitwise extract from pair. +; simd_fabd floating point absolute difference. ; simd_fadd floating point add/sub. ; simd_fcmp floating point compare. ; simd_fcvti floating point convert to integer. @@ -58,9 +59,9 @@ ; simd_fmul floating point multiply. ; simd_fmul_elt floating point multiply (by element). ; simd_fnegabs floating point neg/abs. -; simd_frcpe floating point reciprocal estimate. -; simd_frcps floating point reciprocal step. -; simd_frecx floating point reciprocal exponent. +; simd_frecpe floating point reciprocal estimate. +; simd_frecps floating point reciprocal step. +; simd_frecpx floating point reciprocal exponent. ; simd_frint floating point round to integer. ; simd_fsqrt floating point square root. ; simd_icvtf integer convert to floating point. @@ -147,6 +148,7 @@ simd_dup,\ simd_dupgp,\ simd_ext,\ + simd_fabd,\ simd_fadd,\ simd_fcmp,\ simd_fcvti,\ @@ -161,9 +163,9 @@ simd_fmul,\ simd_fmul_elt,\ simd_fnegabs,\ - simd_frcpe,\ - simd_frcps,\ - simd_frecx,\ + simd_frecpe,\ + simd_frecps,\ + simd_frecpx,\ simd_frint,\ simd_fsqrt,\ simd_icvtf,\ @@ -193,6 +195,7 @@ simd_move,\ simd_move_imm,\ simd_mul,\ + simd_mul_d_long,\ simd_mul_elt,\ simd_mull,\ simd_mull_elt,\ @@ -233,6 +236,12 @@ simd_trn,\ simd_uzp,\ simd_zip,\ + simd_crypto_aes,\ + simd_crypto_sha1_xor,\ + simd_crypto_sha1_fast,\ + simd_crypto_sha1_slow,\ + simd_crypto_sha256_fast,\ + simd_crypto_sha256_slow,\ none" (const_string "none")) @@ -303,8 +312,8 @@ (eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs") (eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane") (eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane") - (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd") - (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq") + (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd") + (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq") (eq_attr "simd_type" "none") (const_string "none") ] (const_string "unknown"))) @@ -355,18 +364,6 @@ (set_attr "simd_mode" "")] ) -(define_insn "aarch64_dup_lane" - [(set (match_operand:SDQ_I 0 "register_operand" "=w") - (vec_select: - (match_operand: 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")]) - ))] - "TARGET_SIMD" - "dup\\t%0, %1.[%2]" - [(set_attr "simd_type" "simd_dup") - (set_attr "simd_mode" "")] -) - (define_insn "aarch64_simd_dup" [(set (match_operand:VDQF 0 "register_operand" "=w") (vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))] @@ -394,7 +391,7 @@ case 4: return "ins\t%0.d[0], %1"; case 5: return "mov\t%0, %1"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], + return aarch64_output_simd_mov_immediate (operands[1], mode, 64); default: gcc_unreachable (); } @@ -414,16 +411,20 @@ { switch (which_alternative) { - case 0: return "ld1\t{%0.}, %1"; - case 1: return "st1\t{%1.}, %0"; - case 2: return "orr\t%0., %1., %1."; - case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]"; - case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1"; - case 5: return "#"; + case 0: + return "ld1\t{%0.}, %1"; + case 1: + return "st1\t{%1.}, %0"; + case 2: + return "orr\t%0., %1., %1."; + case 3: + case 4: + case 5: + return "#"; case 6: - return aarch64_output_simd_mov_immediate (&operands[1], - mode, 128); - default: gcc_unreachable (); + return aarch64_output_simd_mov_immediate (operands[1], mode, 128); + default: + gcc_unreachable (); } } [(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm") @@ -452,6 +453,77 @@ aarch64_simd_disambiguate_copy (operands, dest, src, 2); }) +(define_split + [(set (match_operand:VQ 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) + || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" + [(const_int 0)] +{ + aarch64_split_simd_move (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_split_simd_mov" + [(set (match_operand:VQ 0) + (match_operand:VQ 1))] + "TARGET_SIMD" + { + rtx dst = operands[0]; + rtx src = operands[1]; + + if (GP_REGNUM_P (REGNO (src))) + { + rtx src_low_part = gen_lowpart (mode, src); + rtx src_high_part = gen_highpart (mode, src); + + emit_insn + (gen_move_lo_quad_ (dst, src_low_part)); + emit_insn + (gen_move_hi_quad_ (dst, src_high_part)); + } + + else + { + rtx dst_low_part = gen_lowpart (mode, dst); + rtx dst_high_part = gen_highpart (mode, dst); + rtx lo = aarch64_simd_vect_par_cnst_half (mode, false); + rtx hi = aarch64_simd_vect_par_cnst_half (mode, true); + + emit_insn + (gen_aarch64_simd_mov_from_low (dst_low_part, src, lo)); + emit_insn + (gen_aarch64_simd_mov_from_high (dst_high_part, src, hi)); + } + DONE; + } +) + +(define_insn "aarch64_simd_mov_from_low" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[0]" + [(set_attr "simd_type" "simd_movgp") + (set_attr "simd_mode" "") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_simd_mov_from_high" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[1]" + [(set_attr "simd_type" "simd_movgp") + (set_attr "simd_mode" "") + (set_attr "length" "4") + ]) + (define_insn "orn3" [(set (match_operand:VDQ 0 "register_operand" "=w") (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w")) @@ -503,8 +575,8 @@ ) (define_insn "neg2" - [(set (match_operand:VDQM 0 "register_operand" "=w") - (neg:VDQM (match_operand:VDQM 1 "register_operand" "w")))] + [(set (match_operand:VDQ 0 "register_operand" "=w") + (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))] "TARGET_SIMD" "neg\t%0., %1." [(set_attr "simd_type" "simd_negabs") @@ -520,6 +592,51 @@ (set_attr "simd_mode" "")] ) +(define_insn "abd_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] + "TARGET_SIMD" + "sabd\t%0., %1., %2." + [(set_attr "simd_type" "simd_abd") + (set_attr "simd_mode" "")] +) + +(define_insn "aba_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))) + (match_operand:VDQ_BHSI 3 "register_operand" "0")))] + "TARGET_SIMD" + "saba\t%0., %1., %2." + [(set_attr "simd_type" "simd_abd") + (set_attr "simd_mode" "")] +) + +(define_insn "fabd_3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (minus:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0., %1., %2." + [(set_attr "simd_type" "simd_fabd") + (set_attr "simd_mode" "")] +) + +(define_insn "*fabd_scalar3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (abs:GPF (minus:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0, %1, %2" + [(set_attr "simd_type" "simd_fabd") + (set_attr "mode" "")] +) + (define_insn "and3" [(set (match_operand:VDQ 0 "register_operand" "=w") (and:VDQ (match_operand:VDQ 1 "register_operand" "w") @@ -904,12 +1021,12 @@ ) ;; Max/Min operations. -(define_insn "3" +(define_insn "3" [(set (match_operand:VQ_S 0 "register_operand" "=w") (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w") (match_operand:VQ_S 2 "register_operand" "w")))] "TARGET_SIMD" - "\t%0., %1., %2." + "\t%0., %1., %2." [(set_attr "simd_type" "simd_minmax") (set_attr "simd_mode" "")] ) @@ -917,29 +1034,39 @@ ;; Move into low-half clearing high half to 0. (define_insn "move_lo_quad_" - [(set (match_operand:VQ 0 "register_operand" "=w") + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") (vec_concat:VQ - (match_operand: 1 "register_operand" "w") + (match_operand: 1 "register_operand" "w,r,r") (vec_duplicate: (const_int 0))))] "TARGET_SIMD" - "mov\\t%d0, %d1"; - [(set_attr "simd_type" "simd_dup") - (set_attr "simd_mode" "")] + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "v8type" "*,fmov,*") + (set_attr "simd_type" "simd_dup,*,simd_dup") + (set_attr "simd_mode" "") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] ) ;; Move into high-half. (define_insn "aarch64_simd_move_hi_quad_" - [(set (match_operand:VQ 0 "register_operand" "+w") + [(set (match_operand:VQ 0 "register_operand" "+w,w") (vec_concat:VQ (vec_select: (match_dup 0) (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) - (match_operand: 1 "register_operand" "w")))] + (match_operand: 1 "register_operand" "w,r")))] "TARGET_SIMD" - "ins\\t%0.d[1], %1.d[0]"; - [(set_attr "simd_type" "simd_ins") - (set_attr "simd_mode" "")] + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "simd_type" "simd_ins,simd_ins") + (set_attr "simd_mode" "") + (set_attr "length" "4")] ) (define_expand "move_hi_quad_" @@ -1045,6 +1172,104 @@ ;; Widening arithmetic. +(define_insn "*aarch64_mlal_lo" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "mlal\t%0., %2., %4." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + +(define_insn "*aarch64_mlal_hi" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "mlal2\t%0., %2., %4." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + +(define_insn "*aarch64_mlsl_lo" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "mlsl\t%0., %2., %4." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + +(define_insn "*aarch64_mlsl_hi" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "mlsl2\t%0., %2., %4." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + +(define_insn "*aarch64_mlal" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w"))) + (match_operand: 3 "register_operand" "0")))] + "TARGET_SIMD" + "mlal\t%0., %1., %2." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + +(define_insn "*aarch64_mlsl" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w")) + (ANY_EXTEND: + (match_operand:VDW 3 "register_operand" "w")))))] + "TARGET_SIMD" + "mlsl\t%0., %2., %3." + [(set_attr "simd_type" "simd_mlal") + (set_attr "simd_mode" "")] +) + (define_insn "aarch64_simd_vec_mult_lo_" [(set (match_operand: 0 "register_operand" "=w") (mult: (ANY_EXTEND: (vec_select: @@ -1196,7 +1421,9 @@ (set_attr "simd_mode" "")] ) -(define_insn "aarch64_frint" +;; Vector versions of the floating-point frint patterns. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +(define_insn "2" [(set (match_operand:VDQF 0 "register_operand" "=w") (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] FRINT))] @@ -1206,16 +1433,9 @@ (set_attr "simd_mode" "")] ) -;; Vector versions of the floating-point frint patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. -(define_expand "2" - [(set (match_operand:VDQF 0 "register_operand") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] - FRINT))] - "TARGET_SIMD" - {}) - -(define_insn "aarch64_fcvt" +;; Vector versions of the fcvt standard patterns. +;; Expands to lbtrunc, lround, lceil, lfloor +(define_insn "l2" [(set (match_operand: 0 "register_operand" "=w") (FIXUORS: (unspec: [(match_operand:VDQF 1 "register_operand" "w")] @@ -1226,16 +1446,141 @@ (set_attr "simd_mode" "")] ) -;; Vector versions of the fcvt standard patterns. -;; Expands to lbtrunc, lround, lceil, lfloor -(define_expand "l2" +(define_expand "2" [(set (match_operand: 0 "register_operand") (FIXUORS: (unspec: [(match_operand:VDQF 1 "register_operand")] - FCVT)))] + UNSPEC_FRINTZ)))] "TARGET_SIMD" {}) +(define_expand "2" + [(set (match_operand: 0 "register_operand") + (FIXUORS: (unspec: + [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ)))] + "TARGET_SIMD" + {}) + +(define_expand "ftrunc2" + [(set (match_operand:VDQF 0 "register_operand") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ))] + "TARGET_SIMD" + {}) + +(define_insn "2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (FLOATUORS:VDQF + (match_operand: 1 "register_operand" "w")))] + "TARGET_SIMD" + "cvtf\\t%0., %1." + [(set_attr "simd_type" "simd_icvtf") + (set_attr "simd_mode" "")] +) + +;; Conversions between vectors of floats and doubles. +;; Contains a mix of patterns to match standard pattern names +;; and those for intrinsics. + +;; Float widening operations. + +(define_insn "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 0) (const_int 1)]) + )))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "simd_type" "simd_fcvtl") + (set_attr "simd_mode" "V2DF")] +) + +(define_insn "aarch64_float_extend_lo_v2df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (match_operand:V2SF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "simd_type" "simd_fcvtl") + (set_attr "simd_mode" "V2DF")] +) + +(define_insn "vec_unpacks_hi_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 2) (const_int 3)]) + )))] + "TARGET_SIMD" + "fcvtl2\\t%0.2d, %1.4s" + [(set_attr "simd_type" "simd_fcvtl") + (set_attr "simd_mode" "V2DF")] +) + +;; Float narrowing operations. + +(define_insn "aarch64_float_truncate_lo_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=w") + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtn\\t%0.2s, %1.2d" + [(set_attr "simd_type" "simd_fcvtl") + (set_attr "simd_mode" "V2SF")] +) + +(define_insn "aarch64_float_truncate_hi_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "0") + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fcvtn2\\t%0.4s, %2.2d" + [(set_attr "simd_type" "simd_fcvtl") + (set_attr "simd_mode" "V4SF")] +) + +(define_expand "vec_pack_trunc_v2df" + [(set (match_operand:V4SF 0 "register_operand") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand")) + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[1])); + emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], + tmp, operands[2])); + DONE; + } +) + +(define_expand "vec_pack_trunc_df" + [(set (match_operand:V2SF 0 "register_operand") + (vec_concat:V2SF + (float_truncate:SF + (match_operand:DF 1 "register_operand")) + (float_truncate:SF + (match_operand:DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + emit_insn (gen_move_lo_quad_v2df (tmp, operands[1])); + emit_insn (gen_move_hi_quad_v2df (tmp, operands[2])); + emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); + DONE; + } +) + (define_insn "aarch64_vmls" [(set (match_operand:VDQF 0 "register_operand" "=w") (minus:VDQF (match_operand:VDQF 1 "register_operand" "0") @@ -1261,51 +1606,70 @@ ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring ;; NaNs. -(define_insn "smax3" +(define_insn "3" [(set (match_operand:VDQF 0 "register_operand" "=w") - (smax:VDQF (match_operand:VDQF 1 "register_operand" "w") + (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w") (match_operand:VDQF 2 "register_operand" "w")))] "TARGET_SIMD" - "fmaxnm\\t%0., %1., %2." + "fnm\\t%0., %1., %2." [(set_attr "simd_type" "simd_fminmax") (set_attr "simd_mode" "")] ) -(define_insn "smin3" +(define_insn "3" [(set (match_operand:VDQF 0 "register_operand" "=w") - (smin:VDQF (match_operand:VDQF 1 "register_operand" "w") - (match_operand:VDQF 2 "register_operand" "w")))] + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + FMAXMIN_UNS))] "TARGET_SIMD" - "fminnm\\t%0., %1., %2." + "\\t%0., %1., %2." [(set_attr "simd_type" "simd_fminmax") (set_attr "simd_mode" "")] ) -;; FP 'across lanes' max and min ops. +;; 'across lanes' add. -(define_insn "reduc_s_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] - FMAXMINV))] +(define_insn "reduc_plus_" + [(set (match_operand:VDQV 0 "register_operand" "=w") + (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] + SUADDV))] "TARGET_SIMD" - "fnmv\\t%s0, %1.4s"; - [(set_attr "simd_type" "simd_fminmaxv") - (set_attr "simd_mode" "V4SF")] + "addv\\t%0, %1." + [(set_attr "simd_type" "simd_addv") + (set_attr "simd_mode" "")] ) -(define_insn "reduc_s_" +(define_insn "reduc_plus_v2di" + [(set (match_operand:V2DI 0 "register_operand" "=w") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "addp\\t%d0, %1.2d" + [(set_attr "simd_type" "simd_addv") + (set_attr "simd_mode" "V2DI")] +) + +(define_insn "reduc_plus_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=w") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "addp\\t%0.2s, %1.2s, %1.2s" + [(set_attr "simd_type" "simd_addv") + (set_attr "simd_mode" "V2SI")] +) + +(define_insn "reduc_plus_" [(set (match_operand:V2F 0 "register_operand" "=w") (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - FMAXMINV))] + SUADDV))] "TARGET_SIMD" - "fnmp\\t%0., %1., %1."; - [(set_attr "simd_type" "simd_fminmax") + "faddp\\t%0, %1." + [(set_attr "simd_type" "simd_fadd") (set_attr "simd_mode" "")] ) -;; FP 'across lanes' add. - -(define_insn "aarch64_addvv4sf" +(define_insn "aarch64_addpv4sf" [(set (match_operand:V4SF 0 "register_operand" "=w") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] UNSPEC_FADDV))] @@ -1315,169 +1679,106 @@ (set_attr "simd_mode" "V4SF")] ) -(define_expand "reduc_uplus_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (match_operand:V4SF 1 "register_operand" "w"))] +(define_expand "reduc_plus_v4sf" + [(set (match_operand:V4SF 0 "register_operand") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] + SUADDV))] "TARGET_SIMD" { rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_aarch64_addvv4sf (tmp, operands[1])); - emit_insn (gen_aarch64_addvv4sf (operands[0], tmp)); + emit_insn (gen_aarch64_addpv4sf (tmp, operands[1])); + emit_insn (gen_aarch64_addpv4sf (operands[0], tmp)); DONE; }) -(define_expand "reduc_splus_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=w") - (match_operand:V4SF 1 "register_operand" "w"))] +(define_insn "clz2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] "TARGET_SIMD" -{ - rtx tmp = gen_reg_rtx (V4SFmode); - emit_insn (gen_aarch64_addvv4sf (tmp, operands[1])); - emit_insn (gen_aarch64_addvv4sf (operands[0], tmp)); - DONE; -}) - -(define_insn "aarch64_addv" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "faddp\\t%0, %1." - [(set_attr "simd_type" "simd_fadd") - (set_attr "simd_mode" "")] + "clz\\t%0., %1." + [(set_attr "simd_type" "simd_cls") + (set_attr "simd_mode" "")] ) -(define_expand "reduc_uplus_" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "" -) +;; 'across lanes' max and min ops. -(define_expand "reduc_splus_" - [(set (match_operand:V2F 0 "register_operand" "=w") - (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "" -) - -;; Reduction across lanes. - -(define_insn "aarch64_addv" +(define_insn "reduc__" [(set (match_operand:VDQV 0 "register_operand" "=w") (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] + MAXMINV))] "TARGET_SIMD" - "addv\\t%0, %1." - [(set_attr "simd_type" "simd_addv") + "v\\t%0, %1." + [(set_attr "simd_type" "simd_minmaxv") (set_attr "simd_mode" "")] ) -(define_expand "reduc_splus_" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_expand "reduc_uplus_" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_insn "aarch64_addvv2di" +(define_insn "reduc__v2di" [(set (match_operand:V2DI 0 "register_operand" "=w") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] + MAXMINV))] "TARGET_SIMD" - "addp\\t%d0, %1.2d" - [(set_attr "simd_type" "simd_add") + "p\\t%d0, %1.2d" + [(set_attr "simd_type" "simd_minmaxv") (set_attr "simd_mode" "V2DI")] ) -(define_expand "reduc_uplus_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=w") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_expand "reduc_splus_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=w") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_insn "aarch64_addvv2si" +(define_insn "reduc__v2si" [(set (match_operand:V2SI 0 "register_operand" "=w") (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] + MAXMINV))] "TARGET_SIMD" - "addp\\t%0.2s, %1.2s, %1.2s" - [(set_attr "simd_type" "simd_add") + "p\\t%0.2s, %1.2s, %1.2s" + [(set_attr "simd_type" "simd_minmaxv") (set_attr "simd_mode" "V2SI")] ) -(define_expand "reduc_uplus_v2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] +(define_insn "reduc__" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + FMAXMINV))] "TARGET_SIMD" - "" -) - -(define_expand "reduc_splus_v2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - UNSPEC_ADDV))] - "TARGET_SIMD" - "" -) - -(define_insn "reduc__" - [(set (match_operand:VDQV 0 "register_operand" "=w") - (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] - MAXMINV))] - "TARGET_SIMD" - "v\\t%0, %1." - [(set_attr "simd_type" "simd_minmaxv") + "p\\t%0, %1." + [(set_attr "simd_type" "simd_fminmaxv") (set_attr "simd_mode" "")] ) -(define_insn "reduc__v2si" - [(set (match_operand:V2SI 0 "register_operand" "=w") - (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] - MAXMINV))] +(define_insn "reduc__v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + FMAXMINV))] "TARGET_SIMD" - "p\\t%0.2s, %1.2s, %1.2s" - [(set_attr "simd_type" "simd_minmax") - (set_attr "simd_mode" "V2SI")] + "v\\t%s0, %1.4s" + [(set_attr "simd_type" "simd_fminmaxv") + (set_attr "simd_mode" "V4SF")] ) -;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register -;; allocation. For an intrinsic of form: -;; vD = bsl_* (vS, vN, vM) +;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register +;; allocation. +;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which +;; to select. +;; +;; Thus our BSL is of the form: +;; op0 = bsl (mask, op2, op3) ;; We can use any of: -;; bsl vS, vN, vM (if D = S) -;; bit vD, vN, vS (if D = M, so 1-bits in vS choose bits from vN, else vM) -;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN) +;; +;; if (op0 = mask) +;; bsl mask, op1, op2 +;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) +;; bit op0, op2, mask +;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) +;; bif op0, op1, mask (define_insn "aarch64_simd_bsl_internal" [(set (match_operand:VALL 0 "register_operand" "=w,w,w") - (unspec:VALL - [(match_operand: 1 "register_operand" " 0,w,w") - (match_operand:VALL 2 "register_operand" " w,w,0") - (match_operand:VALL 3 "register_operand" " w,0,w")] - UNSPEC_BSL))] + (ior:VALL + (and:VALL + (match_operand: 1 "register_operand" " 0,w,w") + (match_operand:VALL 2 "register_operand" " w,w,0")) + (and:VALL + (not: + (match_dup: 1)) + (match_operand:VALL 3 "register_operand" " w,0,w")) + ))] "TARGET_SIMD" "@ bsl\\t%0., %2., %3. @@ -1486,28 +1787,32 @@ ) (define_expand "aarch64_simd_bsl" - [(set (match_operand:VALL 0 "register_operand") - (unspec:VALL [(match_operand: 1 "register_operand") - (match_operand:VALL 2 "register_operand") - (match_operand:VALL 3 "register_operand")] - UNSPEC_BSL))] - "TARGET_SIMD" + [(match_operand:VALL 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand:VALL 2 "register_operand") + (match_operand:VALL 3 "register_operand")] + "TARGET_SIMD" { /* We can't alias operands together if they have different modes. */ operands[1] = gen_lowpart (mode, operands[1]); + emit_insn (gen_aarch64_simd_bsl_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; }) -(define_expand "aarch64_vcond_internal" +(define_expand "aarch64_vcond_internal" [(set (match_operand:VDQ 0 "register_operand") (if_then_else:VDQ (match_operator 3 "comparison_operator" [(match_operand:VDQ 4 "register_operand") (match_operand:VDQ 5 "nonmemory_operand")]) - (match_operand:VDQ 1 "register_operand") - (match_operand:VDQ 2 "register_operand")))] + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] "TARGET_SIMD" { int inverse = 0, has_zero_imm_form = 0; + rtx op1 = operands[1]; + rtx op2 = operands[2]; rtx mask = gen_reg_rtx (mode); switch (GET_CODE (operands[3])) @@ -1566,30 +1871,47 @@ } if (inverse) - emit_insn (gen_aarch64_simd_bsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_aarch64_simd_bsl (operands[0], mask, operands[1], - operands[2])); + { + op1 = operands[2]; + op2 = operands[1]; + } + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (mode) + && op2 == CONST0_RTX (mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (mode, op1); + if (!REG_P (op2)) + op2 = force_reg (mode, op2); + emit_insn (gen_aarch64_simd_bsl (operands[0], mask, + op1, op2)); + } + DONE; }) -(define_expand "aarch64_vcond_internal" - [(set (match_operand:VDQF 0 "register_operand") +(define_expand "aarch64_vcond_internal" + [(set (match_operand:VDQF_COND 0 "register_operand") (if_then_else:VDQF (match_operator 3 "comparison_operator" [(match_operand:VDQF 4 "register_operand") (match_operand:VDQF 5 "nonmemory_operand")]) - (match_operand:VDQF 1 "register_operand") - (match_operand:VDQF 2 "register_operand")))] + (match_operand:VDQF_COND 1 "nonmemory_operand") + (match_operand:VDQF_COND 2 "nonmemory_operand")))] "TARGET_SIMD" { int inverse = 0; int use_zero_form = 0; int swap_bsl_operands = 0; - rtx mask = gen_reg_rtx (mode); - rtx tmp = gen_reg_rtx (mode); + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx mask = gen_reg_rtx (mode); + rtx tmp = gen_reg_rtx (mode); rtx (*base_comparison) (rtx, rtx, rtx); rtx (*complimentary_comparison) (rtx, rtx, rtx); @@ -1609,7 +1931,7 @@ /* Fall through. */ default: if (!REG_P (operands[5])) - operands[5] = force_reg (mode, operands[5]); + operands[5] = force_reg (mode, operands[5]); } switch (GET_CODE (operands[3])) @@ -1622,8 +1944,8 @@ case UNGE: case ORDERED: case UNORDERED: - base_comparison = gen_aarch64_cmge; - complimentary_comparison = gen_aarch64_cmgt; + base_comparison = gen_aarch64_cmge; + complimentary_comparison = gen_aarch64_cmgt; break; case LE: case UNLE: @@ -1631,14 +1953,14 @@ /* Fall through. */ case GT: case UNGT: - base_comparison = gen_aarch64_cmgt; - complimentary_comparison = gen_aarch64_cmge; + base_comparison = gen_aarch64_cmgt; + complimentary_comparison = gen_aarch64_cmge; break; case EQ: case NE: case UNEQ: - base_comparison = gen_aarch64_cmeq; - complimentary_comparison = gen_aarch64_cmeq; + base_comparison = gen_aarch64_cmeq; + complimentary_comparison = gen_aarch64_cmeq; break; default: gcc_unreachable (); @@ -1666,10 +1988,10 @@ switch (GET_CODE (operands[3])) { case LT: - base_comparison = gen_aarch64_cmlt; + base_comparison = gen_aarch64_cmlt; break; case LE: - base_comparison = gen_aarch64_cmle; + base_comparison = gen_aarch64_cmle; break; default: /* Do nothing, other zero form cases already have the correct @@ -1712,9 +2034,9 @@ true iff !(a != b && a ORDERED b), swapping the operands to BSL will then give us (a == b || a UNORDERED b) as intended. */ - emit_insn (gen_aarch64_cmgt (mask, operands[4], operands[5])); - emit_insn (gen_aarch64_cmgt (tmp, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); + emit_insn (gen_aarch64_cmgt (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgt (tmp, operands[5], operands[4])); + emit_insn (gen_ior3 (mask, mask, tmp)); swap_bsl_operands = 1; break; case UNORDERED: @@ -1723,9 +2045,9 @@ swap_bsl_operands = 1; /* Fall through. */ case ORDERED: - emit_insn (gen_aarch64_cmgt (tmp, operands[4], operands[5])); - emit_insn (gen_aarch64_cmge (mask, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); + emit_insn (gen_aarch64_cmgt (tmp, operands[4], operands[5])); + emit_insn (gen_aarch64_cmge (mask, operands[5], operands[4])); + emit_insn (gen_ior3 (mask, mask, tmp)); break; default: gcc_unreachable (); @@ -1732,11 +2054,27 @@ } if (swap_bsl_operands) - emit_insn (gen_aarch64_simd_bsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_aarch64_simd_bsl (operands[0], mask, operands[1], - operands[2])); + { + op1 = operands[2]; + op2 = operands[1]; + } + + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (mode) + && op2 == CONST0_RTX (mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (mode, op1); + if (!REG_P (op2)) + op2 = force_reg (mode, op2); + emit_insn (gen_aarch64_simd_bsl (operands[0], mask, + op1, op2)); + } + DONE; }) @@ -1746,16 +2084,32 @@ (match_operator 3 "comparison_operator" [(match_operand:VALL 4 "register_operand") (match_operand:VALL 5 "nonmemory_operand")]) - (match_operand:VALL 1 "register_operand") - (match_operand:VALL 2 "register_operand")))] + (match_operand:VALL 1 "nonmemory_operand") + (match_operand:VALL 2 "nonmemory_operand")))] "TARGET_SIMD" { - emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], + emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; }) +(define_expand "vcond" + [(set (match_operand: 0 "register_operand") + (if_then_else: + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand: 1 "nonmemory_operand") + (match_operand: 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal ( + operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) (define_expand "vcondu" [(set (match_operand:VDQ 0 "register_operand") @@ -1763,11 +2117,11 @@ (match_operator 3 "comparison_operator" [(match_operand:VDQ 4 "register_operand") (match_operand:VDQ 5 "nonmemory_operand")]) - (match_operand:VDQ 1 "register_operand") - (match_operand:VDQ 2 "register_operand")))] + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] "TARGET_SIMD" { - emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], + emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; @@ -1785,45 +2139,50 @@ DONE; }) -(define_insn "aarch64_get_lane_signed" - [(set (match_operand: 0 "register_operand" "=r") - (sign_extend: +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI (vec_select: - (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "smov\\t%0, %1.[%2]" + "smov\\t%0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") - (set_attr "simd_mode" "")] + (set_attr "simd_mode" "")] ) -(define_insn "aarch64_get_lane_unsigned" - [(set (match_operand: 0 "register_operand" "=r") - (zero_extend: +(define_insn "*aarch64_get_lane_zero_extendsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select: - (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQQH 1 "register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SIMD" - "umov\\t%0, %1.[%2]" + "umov\\t%w0, %1.[%2]" [(set_attr "simd_type" "simd_movgp") (set_attr "simd_mode" "")] ) +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. (define_insn "aarch64_get_lane" - [(set (match_operand: 0 "register_operand" "=w") + [(set (match_operand: 0 "register_operand" "=r, w") (vec_select: - (match_operand:VDQF 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VALL 1 "register_operand" "w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])))] "TARGET_SIMD" - "mov\\t%0.[0], %1.[%2]" - [(set_attr "simd_type" "simd_ins") + "@ + umov\\t%0, %1.[%2] + dup\\t%0, %1.[%2]" + [(set_attr "simd_type" "simd_movgp, simd_dup") (set_attr "simd_mode" "")] ) (define_expand "aarch64_get_lanedi" - [(match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[2], 0, 1); @@ -1944,16 +2303,30 @@ (set_attr "simd_mode" "")] ) -(define_insn "aarch64_combine" +(define_insn_and_split "aarch64_combine" [(set (match_operand: 0 "register_operand" "=&w") (vec_concat: (match_operand:VDC 1 "register_operand" "w") (match_operand:VDC 2 "register_operand" "w")))] "TARGET_SIMD" - "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]" - [(set_attr "simd_type" "simd_ins") - (set_attr "simd_mode" "")] -) + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + DONE; +}) +(define_expand "aarch64_simd_combine" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + { + emit_insn (gen_move_lo_quad_ (operands[0], operands[1])); + emit_insn (gen_move_hi_quad_ (operands[0], operands[2])); + DONE; + }) + ;; l. (define_insn "aarch64_l2_internal" @@ -2861,28 +3234,6 @@ (set_attr "simd_mode" "")] ) -;; vshl_n - -(define_expand "aarch64_sshl_n" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashl3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "aarch64_ushl_n" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashl3 (operands[0], operands[1], operands[2])); - DONE; -}) - ;; vshll_n (define_insn "aarch64_shll_n" @@ -2927,28 +3278,6 @@ (set_attr "simd_mode" "")] ) -;; vshr_n - -(define_expand "aarch64_sshr_n" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashr3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "aarch64_ushr_n" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_lshr3 (operands[0], operands[1], operands[2])); - DONE; -}) - ;; vrshr_n (define_insn "aarch64_shr_n" @@ -3059,7 +3388,8 @@ (COMPARISONS:DI (match_operand:DI 1 "register_operand" "w,w,r") (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") - )))] + ))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cm\t%d0, %d, %d @@ -3070,15 +3400,7 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (neg:DI - (COMPARISONS:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); @@ -3111,7 +3433,8 @@ (UCOMPARISONS:DI (match_operand:DI 1 "register_operand" "w,r") (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") - )))] + ))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cm\t%d0, %d, %d @@ -3121,17 +3444,9 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_dup 1) - (match_dup 2))) - (set (match_dup 0) - (neg:DI - (UCOMPARISONS:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { - enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); + enum machine_mode mode = CCmode; rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); @@ -3164,7 +3479,8 @@ (and:DI (match_operand:DI 1 "register_operand" "w,r") (match_operand:DI 2 "register_operand" "w,r")) - (const_int 0))))] + (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" "@ cmtst\t%d0, %d1, %d2 @@ -3174,16 +3490,7 @@ happening in the 'w' constraint cases. */ && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ - (and:DI (match_dup 1) - (match_dup 2)) - (const_int 0))) - (set (match_dup 0) - (neg:DI - (ne:DI - (match_operand 3 "cc_register" "") - (const_int 0))))] + [(const_int 0)] { rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); @@ -3213,6 +3520,23 @@ (set_attr "simd_mode" "")] ) +;; fac(ge|gt) +;; Note we can also handle what would be fac(le|lt) by +;; generating fac(ge|gt). + +(define_insn "*aarch64_fac" + [(set (match_operand: 0 "register_operand" "=w") + (neg: + (FAC_COMPARISONS: + (abs:VALLF (match_operand:VALLF 1 "register_operand" "w")) + (abs:VALLF (match_operand:VALLF 2 "register_operand" "w")) + )))] + "TARGET_SIMD" + "fac\t%0, %, %" + [(set_attr "simd_type" "simd_fcmp") + (set_attr "simd_mode" "")] +) + ;; addp (define_insn "aarch64_addp" @@ -3238,30 +3562,6 @@ (set_attr "simd_mode" "DI")] ) -;; v(max|min) - -(define_expand "aarch64_" - [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") - (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") - (match_operand:VDQ_BHSI 2 "register_operand" "w")))] - "TARGET_SIMD" -{ - emit_insn (gen_3 (operands[0], operands[1], operands[2])); - DONE; -}) - - -(define_insn "aarch64_" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") - (match_operand:VDQF 2 "register_operand" "w")] - FMAXMIN))] - "TARGET_SIMD" - "\t%0., %1., %2." - [(set_attr "simd_type" "simd_fminmax") - (set_attr "simd_mode" "")] -) - ;; sqrt (define_insn "sqrt2" @@ -3273,16 +3573,6 @@ (set_attr "simd_mode" "")] ) -(define_expand "aarch64_sqrt" - [(match_operand:VDQF 0 "register_operand" "=w") - (match_operand:VDQF 1 "register_operand" "w")] - "TARGET_SIMD" -{ - emit_insn (gen_sqrt2 (operands[0], operands[1])); - DONE; -}) - - ;; Patterns for vector struct loads and stores. (define_insn "vec_load_lanesoi" @@ -3869,3 +4159,147 @@ "ld1r\\t{%0.}, %1" [(set_attr "simd_type" "simd_load1r") (set_attr "simd_mode" "")]) + +(define_insn "aarch64_frecpe" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + UNSPEC_FRECPE))] + "TARGET_SIMD" + "frecpe\\t%0., %1." + [(set_attr "simd_type" "simd_frecpe") + (set_attr "simd_mode" "")] +) + +(define_insn "aarch64_frecps" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_SIMD" + "frecps\\t%0., %1., %2." + [(set_attr "simd_type" "simd_frecps") + (set_attr "simd_mode" "")] +) + +;; aes + +(define_insn "aarch64_crypto_aesv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w")] + CRYPTO_AES))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes\\t%0.16b, %2.16b" + [(set_attr "simd_type" "simd_crypto_aes") + (set_attr "simd_mode" "V16QI")]) + +(define_insn "aarch64_crypto_aesv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] + CRYPTO_AESMC))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes\\t%0.16b, %1.16b" + [(set_attr "simd_type" "simd_crypto_aes") + (set_attr "simd_mode" "V16QI")]) + +;; sha1 + +(define_insn "aarch64_crypto_sha1hsi" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 + "register_operand" "w")] + UNSPEC_SHA1H))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1h\\t%s0, %s1" + [(set_attr "simd_type" "simd_crypto_sha1_fast") + (set_attr "simd_mode" "SI")]) + +(define_insn "aarch64_crypto_sha1su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA1SU1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su1\\t%0.4s, %2.4s" + [(set_attr "simd_type" "simd_crypto_sha1_fast") + (set_attr "simd_mode" "V4SI")]) + +(define_insn "aarch64_crypto_sha1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1\\t%q0, %s2, %3.4s" + [(set_attr "simd_type" "simd_crypto_sha1_slow") + (set_attr "simd_mode" "V4SI")]) + +(define_insn "aarch64_crypto_sha1su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA1SU0))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su0\\t%0.4s, %2.4s, %3.4s" + [(set_attr "simd_type" "simd_crypto_sha1_xor") + (set_attr "simd_mode" "V4SI")]) + + +;; sha256 + +(define_insn "aarch64_crypto_sha256hv4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA256))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha256h\\t%q0, %q2, %3.4s" + [(set_attr "simd_type" "simd_crypto_sha256_slow") + (set_attr "simd_mode" "V4SI")]) + +(define_insn "aarch64_crypto_sha256su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA256SU0))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su0\\t%0.4s, %2.4s" + [(set_attr "simd_type" "simd_crypto_sha256_fast") + (set_attr "simd_mode" "V4SI")]) + +(define_insn "aarch64_crypto_sha256su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA256SU1))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su1\\t%0.4s, %2.4s, %3.4s" + [(set_attr "simd_type""simd_crypto_sha256_slow") + (set_attr "simd_mode" "V4SI")]) + + +;; pmull + +(define_insn "aarch64_crypto_pmulldi" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")] + UNSPEC_PMULL))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull\\t%0.1q, %1.1d, %2.1d" + [(set_attr "simd_type" "simd_mul_d_long") + (set_attr "simd_mode" "TI")]) + +(define_insn "aarch64_crypto_pmullv2di" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") + (match_operand:V2DI 2 "register_operand" "w")] + UNSPEC_PMULL2))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull2\\t%0.1q, %1.2d, %2.2d" + [(set_attr "simd_type" "simd_mul_d_long") + (set_attr "simd_mode" "TI")]) \ No newline at end of file --- a/src/gcc/config/aarch64/predicates.md +++ b/src/gcc/config/aarch64/predicates.md @@ -115,16 +115,11 @@ (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, 0)"))) -(define_predicate "aarch64_const_address" - (and (match_code "symbol_ref") - (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)"))) - (define_predicate "aarch64_valid_symref" (match_code "const, symbol_ref, label_ref") { - enum aarch64_symbol_type symbol_type; - return (aarch64_symbolic_constant_p (op, SYMBOL_CONTEXT_ADR, &symbol_type) - && symbol_type != SYMBOL_FORCE_TO_MEM); + return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM); }) (define_predicate "aarch64_tls_ie_symref" @@ -170,15 +165,10 @@ }) (define_predicate "aarch64_mov_operand" - (and (match_code "reg,subreg,mem,const_int,symbol_ref,high") + (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high") (ior (match_operand 0 "register_operand") (ior (match_operand 0 "memory_operand") - (ior (match_test "GET_CODE (op) == HIGH - && aarch64_valid_symref (XEXP (op, 0), - GET_MODE (XEXP (op, 0)))") - (ior (match_test "CONST_INT_P (op) - && aarch64_move_imm (INTVAL (op), mode)") - (match_test "aarch64_const_address (op, mode)"))))))) + (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)"))))) (define_predicate "aarch64_movti_operand" (and (match_code "reg,subreg,mem,const_int") --- a/src/gcc/config/aarch64/aarch64-elf.h +++ b/src/gcc/config/aarch64/aarch64-elf.h @@ -106,7 +106,6 @@ #define ASM_COMMENT_START "//" -#define REGISTER_PREFIX "" #define LOCAL_LABEL_PREFIX "." #define USER_LABEL_PREFIX "" --- a/src/gcc/config/aarch64/arm_neon.h +++ b/src/gcc/config/aarch64/arm_neon.h @@ -29,6 +29,9 @@ #include +#define __AARCH64_UINT64_C(__C) ((uint64_t) __C) +#define __AARCH64_INT64_C(__C) ((int64_t) __C) + typedef __builtin_aarch64_simd_qi int8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_aarch64_simd_hi int16x4_t @@ -72,6 +75,8 @@ __attribute__ ((__vector_size__ (16))); typedef __builtin_aarch64_simd_poly16 poly16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_poly64 poly64x2_t + __attribute__ ((__vector_size__ (16))); typedef __builtin_aarch64_simd_uqi uint8x16_t __attribute__ ((__vector_size__ (16))); typedef __builtin_aarch64_simd_uhi uint16x8_t @@ -85,6 +90,8 @@ typedef double float64_t; typedef __builtin_aarch64_simd_poly8 poly8_t; typedef __builtin_aarch64_simd_poly16 poly16_t; +typedef __builtin_aarch64_simd_poly64 poly64_t; +typedef __builtin_aarch64_simd_poly128 poly128_t; typedef struct int8x8x2_t { @@ -446,7 +453,66 @@ poly16x8_t val[4]; } poly16x8x4_t; +/* vget_lane internal macros. */ +#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ + (__cast_ret \ + __builtin_aarch64_get_lane##__size (__cast_a __a, __b)) + +#define __aarch64_vget_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v2sf, , , __a, __b) +#define __aarch64_vget_lane_f64(__a, __b) (__a) + +#define __aarch64_vget_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) + +#define __aarch64_vget_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, , ,__a, __b) +#define __aarch64_vget_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, , ,__a, __b) +#define __aarch64_vget_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v2si, , ,__a, __b) +#define __aarch64_vget_lane_s64(__a, __b) (__a) + +#define __aarch64_vget_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) +#define __aarch64_vget_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) +#define __aarch64_vget_lane_u64(__a, __b) (__a) + +#define __aarch64_vgetq_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v4sf, , , __a, __b) +#define __aarch64_vgetq_lane_f64(__a, __b) \ + __aarch64_vget_lane_any (v2df, , , __a, __b) + +#define __aarch64_vgetq_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) + +#define __aarch64_vgetq_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, , ,__a, __b) +#define __aarch64_vgetq_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, , ,__a, __b) +#define __aarch64_vgetq_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v4si, , ,__a, __b) +#define __aarch64_vgetq_lane_s64(__a, __b) \ + __aarch64_vget_lane_any (v2di, , ,__a, __b) + +#define __aarch64_vgetq_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) +#define __aarch64_vgetq_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) +#define __aarch64_vgetq_lane_u64(__a, __b) \ + __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) + +/* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { @@ -2307,155 +2373,156 @@ return (poly16x4_t) __a; } +/* vget_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vget_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vget_lane_s8 (int8x8_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b); + return __aarch64_vget_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vget_lane_s16 (int16x4_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b); + return __aarch64_vget_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vget_lane_s32 (int32x2_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b); + return __aarch64_vget_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vget_lane_f32 (float32x2_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) { - return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b); + return __aarch64_vget_lane_s64 (__a, __b); } __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vget_lane_u8 (uint8x8_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vget_lane_u16 (uint16x4_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vget_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vget_lane_u32 (uint32x2_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a, - __b); + return __aarch64_vget_lane_u32 (__a, __b); } -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vget_lane_p8 (poly8x8_t __a, const int __b) +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) { - return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a, - __b); + return __aarch64_vget_lane_u64 (__a, __b); } -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vget_lane_p16 (poly16x4_t __a, const int __b) +/* vgetq_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) { - return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a, - __b); + return __aarch64_vgetq_lane_f32 (__a, __b); } -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vget_lane_s64 (int64x1_t __a, const int __b) +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vgetq_lane_f64 (float64x2_t __a, const int __b) { - return (int64_t) __builtin_aarch64_get_lanedi (__a, __b); + return __aarch64_vgetq_lane_f64 (__a, __b); } -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vget_lane_u64 (uint64x1_t __a, const int __b) +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b); + return __aarch64_vgetq_lane_p8 (__a, __b); } +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p16 (__a, __b); +} + __extension__ static __inline int8_t __attribute__ ((__always_inline__)) vgetq_lane_s8 (int8x16_t __a, const int __b) { - return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b); + return __aarch64_vgetq_lane_s8 (__a, __b); } __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vgetq_lane_s16 (int16x8_t __a, const int __b) { - return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b); + return __aarch64_vgetq_lane_s16 (__a, __b); } __extension__ static __inline int32_t __attribute__ ((__always_inline__)) vgetq_lane_s32 (int32x4_t __a, const int __b) { - return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b); + return __aarch64_vgetq_lane_s32 (__a, __b); } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vgetq_lane_f32 (float32x4_t __a, const int __b) +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) { - return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b); + return __aarch64_vgetq_lane_s64 (__a, __b); } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vgetq_lane_f64 (float64x2_t __a, const int __b) -{ - return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b); -} - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) vgetq_lane_u8 (uint8x16_t __a, const int __b) { - return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); + return __aarch64_vgetq_lane_u8 (__a, __b); } __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) vgetq_lane_u16 (uint16x8_t __a, const int __b) { - return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); + return __aarch64_vgetq_lane_u16 (__a, __b); } __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) vgetq_lane_u32 (uint32x4_t __a, const int __b) { - return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a, - __b); + return __aarch64_vgetq_lane_u32 (__a, __b); } -__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -vgetq_lane_p8 (poly8x16_t __a, const int __b) -{ - return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a, - __b); -} - -__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -vgetq_lane_p16 (poly16x8_t __a, const int __b) -{ - return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a, - __b); -} - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vgetq_lane_s64 (int64x2_t __a, const int __b) -{ - return __builtin_aarch64_get_lane_unsignedv2di (__a, __b); -} - __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) vgetq_lane_u64 (uint64x2_t __a, const int __b) { - return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a, - __b); + return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vreinterpret */ + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s8 (int8x8_t __a) { @@ -3805,6 +3872,85 @@ return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); } +#define __GET_LOW(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64_t lo = vgetq_lane_u64 (tmp, 0); \ + return vreinterpret_##__TYPE##_u64 (lo); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_low_f32 (float32x4_t __a) +{ + __GET_LOW (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_low_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_low_p8 (poly8x16_t __a) +{ + __GET_LOW (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_low_p16 (poly16x8_t __a) +{ + __GET_LOW (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_low_s8 (int8x16_t __a) +{ + __GET_LOW (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_low_s16 (int16x8_t __a) +{ + __GET_LOW (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_low_s32 (int32x4_t __a) +{ + __GET_LOW (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_low_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_low_u8 (uint8x16_t __a) +{ + __GET_LOW (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_low_u16 (uint16x8_t __a) +{ + __GET_LOW (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_low_u32 (uint32x4_t __a) +{ + __GET_LOW (u32); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_low_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 (__a, 0); +} + +#undef __GET_LOW + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -4468,160 +4614,6 @@ return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vabs_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("fabs %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vabs_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("abs %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vabs_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("abs %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vabs_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("abs %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vabsq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("fabs %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vabsq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("fabs %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vabsq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("abs %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vabsq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("abs %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vabsq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("abs %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vabsq_s64 (int64x2_t a) -{ - int64x2_t result; - __asm__ ("abs %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vacged_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("facge %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vacges_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("facge %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vacgtd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("facgt %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vacgts_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("facgt %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline int16_t __attribute__ ((__always_inline__)) vaddlv_s8 (int8x8_t a) { @@ -4732,116 +4724,6 @@ return result; } -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddv_s8 (int8x8_t a) -{ - int8_t result; - __asm__ ("addv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddv_s16 (int16x4_t a) -{ - int16_t result; - __asm__ ("addv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddv_u8 (uint8x8_t a) -{ - uint8_t result; - __asm__ ("addv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddv_u16 (uint16x4_t a) -{ - uint16_t result; - __asm__ ("addv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vaddvq_s8 (int8x16_t a) -{ - int8_t result; - __asm__ ("addv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vaddvq_s16 (int16x8_t a) -{ - int16_t result; - __asm__ ("addv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddvq_s32 (int32x4_t a) -{ - int32_t result; - __asm__ ("addv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vaddvq_u8 (uint8x16_t a) -{ - uint8_t result; - __asm__ ("addv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vaddvq_u16 (uint16x8_t a) -{ - uint16_t result; - __asm__ ("addv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddvq_u32 (uint32x4_t a) -{ - uint32_t result; - __asm__ ("addv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c) { @@ -5095,358 +4977,6 @@ return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcage_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("facge %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcageq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("facge %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcageq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("facge %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcagt_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("facgt %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcagtq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("facgt %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcagtq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("facgt %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcale_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("facge %0.2s, %2.2s, %1.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcaleq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("facge %0.4s, %2.4s, %1.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcaleq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("facge %0.2d, %2.2d, %1.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcalt_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("facgt %0.2s, %2.2s, %1.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcaltq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("facgt %0.4s, %2.4s, %1.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcaltq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("facgt %0.2d, %2.2d, %1.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vceq_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("fcmeq %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vceq_f64 (float64x1_t a, float64x1_t b) -{ - uint64x1_t result; - __asm__ ("fcmeq %d0, %d1, %d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vceqd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("fcmeq %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vceqq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("fcmeq %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vceqq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("fcmeq %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vceqs_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("fcmeq %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vceqzd_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcmeq %d0,%d1,#0" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vceqzs_f32 (float32_t a) -{ - float32_t result; - __asm__ ("fcmeq %s0,%s1,#0" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcge_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("fcmge %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcge_f64 (float64x1_t a, float64x1_t b) -{ - uint64x1_t result; - __asm__ ("fcmge %d0, %d1, %d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgeq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("fcmge %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgeq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("fcmge %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcgt_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("fcmgt %0.2s, %1.2s, %2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcgt_f64 (float64x1_t a, float64x1_t b) -{ - uint64x1_t result; - __asm__ ("fcmgt %d0, %d1, %d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcgtq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("fcmgt %0.4s, %1.4s, %2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcgtq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("fcmgt %0.2d, %1.2d, %2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcle_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("fcmge %0.2s, %2.2s, %1.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vcle_f64 (float64x1_t a, float64x1_t b) -{ - uint64x1_t result; - __asm__ ("fcmge %d0, %d2, %d1" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcleq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("fcmge %0.4s, %2.4s, %1.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcleq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("fcmge %0.2d, %2.2d, %1.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vcls_s8 (int8x8_t a) { @@ -5513,50 +5043,6 @@ return result; } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vclt_f32 (float32x2_t a, float32x2_t b) -{ - uint32x2_t result; - __asm__ ("fcmgt %0.2s, %2.2s, %1.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vclt_f64 (float64x1_t a, float64x1_t b) -{ - uint64x1_t result; - __asm__ ("fcmgt %d0, %d2, %d1" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcltq_f32 (float32x4_t a, float32x4_t b) -{ - uint32x4_t result; - __asm__ ("fcmgt %0.4s, %2.4s, %1.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcltq_f64 (float64x2_t a, float64x2_t b) -{ - uint64x2_t result; - __asm__ ("fcmgt %0.2d, %2.2d, %1.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vclz_s8 (int8x8_t a) { @@ -5915,72 +5401,6 @@ /* vcvt_f32_f16 not supported */ -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_f64 (float64x2_t a) -{ - float32x2_t result; - __asm__ ("fcvtn %0.2s,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_s32 (int32x2_t a) -{ - float32x2_t result; - __asm__ ("scvtf %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vcvt_f32_u32 (uint32x2_t a) -{ - float32x2_t result; - __asm__ ("ucvtf %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvt_f64_f32 (float32x2_t a) -{ - float64x2_t result; - __asm__ ("fcvtl %0.2d,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vcvt_f64_s64 (uint64x1_t a) -{ - float64x1_t result; - __asm__ ("scvtf %d0, %d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vcvt_f64_u64 (uint64x1_t a) -{ - float64x1_t result; - __asm__ ("ucvtf %d0, %d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - /* vcvt_high_f16_f32 not supported */ /* vcvt_high_f32_f16 not supported */ @@ -5987,28 +5407,6 @@ static float32x2_t vdup_n_f32 (float32_t); -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvt_high_f32_f64 (float32x2_t a, float64x2_t b) -{ - float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f)); - __asm__ ("fcvtn2 %0.4s,%2.2d" - : "+w"(result) - : "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvt_high_f64_f32 (float32x4_t a) -{ - float64x2_t result; - __asm__ ("fcvtl2 %0.2d,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vcvt_n_f32_s32(a, b) \ __extension__ \ ({ \ @@ -6057,160 +5455,6 @@ result; \ }) -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvt_s32_f32 (float32x2_t a) -{ - int32x2_t result; - __asm__ ("fcvtzs %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvt_u32_f32 (float32x2_t a) -{ - uint32x2_t result; - __asm__ ("fcvtzu %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvta_s32_f32 (float32x2_t a) -{ - int32x2_t result; - __asm__ ("fcvtas %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvta_u32_f32 (float32x2_t a) -{ - uint32x2_t result; - __asm__ ("fcvtau %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtad_s64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtas %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtad_u64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtau %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtaq_s32_f32 (float32x4_t a) -{ - int32x4_t result; - __asm__ ("fcvtas %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtaq_s64_f64 (float64x2_t a) -{ - int64x2_t result; - __asm__ ("fcvtas %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtaq_u32_f32 (float32x4_t a) -{ - uint32x4_t result; - __asm__ ("fcvtau %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtaq_u64_f64 (float64x2_t a) -{ - uint64x2_t result; - __asm__ ("fcvtau %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtas_s64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtas %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtas_u64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtau %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -vcvtd_f64_s64 (int64_t a) -{ - int64_t result; - __asm__ ("scvtf %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -vcvtd_f64_u64 (uint64_t a) -{ - uint64_t result; - __asm__ ("ucvtf %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vcvtd_n_f64_s64(a, b) \ __extension__ \ ({ \ @@ -6259,402 +5503,6 @@ result; \ }) -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtd_s64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtzs %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtd_u64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtzu %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtm_s32_f32 (float32x2_t a) -{ - int32x2_t result; - __asm__ ("fcvtms %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtm_u32_f32 (float32x2_t a) -{ - uint32x2_t result; - __asm__ ("fcvtmu %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtmd_s64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtms %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtmd_u64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtmu %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtmq_s32_f32 (float32x4_t a) -{ - int32x4_t result; - __asm__ ("fcvtms %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtmq_s64_f64 (float64x2_t a) -{ - int64x2_t result; - __asm__ ("fcvtms %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtmq_u32_f32 (float32x4_t a) -{ - uint32x4_t result; - __asm__ ("fcvtmu %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtmq_u64_f64 (float64x2_t a) -{ - uint64x2_t result; - __asm__ ("fcvtmu %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtms_s64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtms %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtms_u64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtmu %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtn_s32_f32 (float32x2_t a) -{ - int32x2_t result; - __asm__ ("fcvtns %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtn_u32_f32 (float32x2_t a) -{ - uint32x2_t result; - __asm__ ("fcvtnu %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtnd_s64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtns %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtnd_u64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtnu %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtnq_s32_f32 (float32x4_t a) -{ - int32x4_t result; - __asm__ ("fcvtns %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtnq_s64_f64 (float64x2_t a) -{ - int64x2_t result; - __asm__ ("fcvtns %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtnq_u32_f32 (float32x4_t a) -{ - uint32x4_t result; - __asm__ ("fcvtnu %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtnq_u64_f64 (float64x2_t a) -{ - uint64x2_t result; - __asm__ ("fcvtnu %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtns_s64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtns %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtns_u64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtnu %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcvtp_s32_f32 (float32x2_t a) -{ - int32x2_t result; - __asm__ ("fcvtps %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vcvtp_u32_f32 (float32x2_t a) -{ - uint32x2_t result; - __asm__ ("fcvtpu %0.2s, %1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtpd_s64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtps %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vcvtpd_u64_f64 (float64_t a) -{ - float64_t result; - __asm__ ("fcvtpu %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtpq_s32_f32 (float32x4_t a) -{ - int32x4_t result; - __asm__ ("fcvtps %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtpq_s64_f64 (float64x2_t a) -{ - int64x2_t result; - __asm__ ("fcvtps %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtpq_u32_f32 (float32x4_t a) -{ - uint32x4_t result; - __asm__ ("fcvtpu %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtpq_u64_f64 (float64x2_t a) -{ - uint64x2_t result; - __asm__ ("fcvtpu %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtps_s64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtps %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvtps_u64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtpu %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvtq_f32_s32 (int32x4_t a) -{ - float32x4_t result; - __asm__ ("scvtf %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vcvtq_f32_u32 (uint32x4_t a) -{ - float32x4_t result; - __asm__ ("ucvtf %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvtq_f64_s64 (int64x2_t a) -{ - float64x2_t result; - __asm__ ("scvtf %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vcvtq_f64_u64 (uint64x2_t a) -{ - float64x2_t result; - __asm__ ("ucvtf %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vcvtq_n_f32_s32(a, b) \ __extension__ \ ({ \ @@ -6751,72 +5599,6 @@ result; \ }) -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vcvtq_s32_f32 (float32x4_t a) -{ - int32x4_t result; - __asm__ ("fcvtzs %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vcvtq_s64_f64 (float64x2_t a) -{ - int64x2_t result; - __asm__ ("fcvtzs %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vcvtq_u32_f32 (float32x4_t a) -{ - uint32x4_t result; - __asm__ ("fcvtzu %0.4s, %1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vcvtq_u64_f64 (float64x2_t a) -{ - uint64x2_t result; - __asm__ ("fcvtzu %0.2d, %1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vcvts_f64_s32 (int32_t a) -{ - int32_t result; - __asm__ ("scvtf %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vcvts_f64_u32 (uint32_t a) -{ - uint32_t result; - __asm__ ("ucvtf %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vcvts_n_f32_s32(a, b) \ __extension__ \ ({ \ @@ -6865,28 +5647,6 @@ result; \ }) -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvts_s64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtzs %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vcvts_u64_f64 (float32_t a) -{ - float32_t result; - __asm__ ("fcvtzu %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vcvtx_f32_f64 (float64x2_t a) { @@ -8110,151 +6870,7 @@ return result; } -#define vget_lane_f64(a, b) \ - __extension__ \ - ({ \ - float64x1_t a_ = (a); \ - float64_t result; \ - __asm__ ("umov %x0, %1.d[%2]" \ - : "=r"(result) \ - : "w"(a_), "i"(b) \ - : /* No clobbers */); \ - result; \ - }) - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_low_f32 (float32x4_t a) -{ - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_low_f64 (float64x2_t a) -{ - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_low_p8 (poly8x16_t a) -{ - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_low_p16 (poly16x8_t a) -{ - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_low_s8 (int8x16_t a) -{ - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_low_s16 (int16x8_t a) -{ - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_low_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_low_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_low_u8 (uint8x16_t a) -{ - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_low_u16 (uint16x8_t a) -{ - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_low_u32 (uint32x4_t a) -{ - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_low_u64 (uint64x2_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[0]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vhsub_s8 (int8x8_t a, int8x8_t b) { int8x8_t result; @@ -8962,303 +7578,6 @@ result; \ }) -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vmaxnm_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vmaxnmq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vmaxnmq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmaxnmvq_f32 (float32x4_t a) -{ - float32_t result; - __asm__ ("fmaxnmv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vmaxv_s8 (int8x8_t a) -{ - int8_t result; - __asm__ ("smaxv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vmaxv_s16 (int16x4_t a) -{ - int16_t result; - __asm__ ("smaxv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vmaxv_u8 (uint8x8_t a) -{ - uint8_t result; - __asm__ ("umaxv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vmaxv_u16 (uint16x4_t a) -{ - uint16_t result; - __asm__ ("umaxv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmaxvq_f32 (float32x4_t a) -{ - float32_t result; - __asm__ ("fmaxv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vmaxvq_s8 (int8x16_t a) -{ - int8_t result; - __asm__ ("smaxv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vmaxvq_s16 (int16x8_t a) -{ - int16_t result; - __asm__ ("smaxv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vmaxvq_s32 (int32x4_t a) -{ - int32_t result; - __asm__ ("smaxv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vmaxvq_u8 (uint8x16_t a) -{ - uint8_t result; - __asm__ ("umaxv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vmaxvq_u16 (uint16x8_t a) -{ - uint16_t result; - __asm__ ("umaxv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vmaxvq_u32 (uint32x4_t a) -{ - uint32_t result; - __asm__ ("umaxv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vminnmvq_f32 (float32x4_t a) -{ - float32_t result; - __asm__ ("fminnmv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vminv_s8 (int8x8_t a) -{ - int8_t result; - __asm__ ("sminv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vminv_s16 (int16x4_t a) -{ - int16_t result; - __asm__ ("sminv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vminv_u8 (uint8x8_t a) -{ - uint8_t result; - __asm__ ("uminv %b0,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vminv_u16 (uint16x4_t a) -{ - uint16_t result; - __asm__ ("uminv %h0,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vminvq_f32 (float32x4_t a) -{ - float32_t result; - __asm__ ("fminv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -vminvq_s8 (int8x16_t a) -{ - int8_t result; - __asm__ ("sminv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -vminvq_s16 (int16x8_t a) -{ - int16_t result; - __asm__ ("sminv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vminvq_s32 (int32x4_t a) -{ - int32_t result; - __asm__ ("sminv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -vminvq_u8 (uint8x16_t a) -{ - uint8_t result; - __asm__ ("uminv %b0,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -vminvq_u16 (uint16x8_t a) -{ - uint16_t result; - __asm__ ("uminv %h0,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vminvq_u32 (uint32x4_t a) -{ - uint32_t result; - __asm__ ("uminv %s0,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vmla_lane_f32(a, b, c, d) \ __extension__ \ ({ \ @@ -11382,7 +9701,7 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vmovn_high_s16 (int8x8_t a, int16x8_t b) { - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.16b,%1.8h" : "+w"(result) : "w"(b) @@ -11393,7 +9712,7 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmovn_high_s32 (int16x4_t a, int32x4_t b) { - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.8h,%1.4s" : "+w"(result) : "w"(b) @@ -11404,7 +9723,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vmovn_high_s64 (int32x2_t a, int64x2_t b) { - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.4s,%1.2d" : "+w"(result) : "w"(b) @@ -11415,7 +9734,7 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vmovn_high_u16 (uint8x8_t a, uint16x8_t b) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.16b,%1.8h" : "+w"(result) : "w"(b) @@ -11426,7 +9745,7 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vmovn_high_u32 (uint16x4_t a, uint32x4_t b) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.8h,%1.4s" : "+w"(result) : "w"(b) @@ -11437,7 +9756,7 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vmovn_high_u64 (uint32x2_t a, uint64x2_t b) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("xtn2 %0.4s,%1.2d" : "+w"(result) : "w"(b) @@ -13856,7 +12175,7 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vqmovn_high_s16 (int8x8_t a, int16x8_t b) { - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.16b, %1.8h" : "+w"(result) : "w"(b) @@ -13867,7 +12186,7 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vqmovn_high_s32 (int16x4_t a, int32x4_t b) { - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.8h, %1.4s" : "+w"(result) : "w"(b) @@ -13878,7 +12197,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqmovn_high_s64 (int32x2_t a, int64x2_t b) { - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtn2 %0.4s, %1.2d" : "+w"(result) : "w"(b) @@ -13889,7 +12208,7 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.16b, %1.8h" : "+w"(result) : "w"(b) @@ -13900,7 +12219,7 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.8h, %1.4s" : "+w"(result) : "w"(b) @@ -13911,7 +12230,7 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("uqxtn2 %0.4s, %1.2d" : "+w"(result) : "w"(b) @@ -13922,7 +12241,7 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqmovun_high_s16 (uint8x8_t a, int16x8_t b) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.16b, %1.8h" : "+w"(result) : "w"(b) @@ -13933,7 +12252,7 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqmovun_high_s32 (uint16x4_t a, int32x4_t b) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.8h, %1.4s" : "+w"(result) : "w"(b) @@ -13944,7 +12263,7 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqmovun_high_s64 (uint32x2_t a, int64x2_t b) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("sqxtun2 %0.4s, %1.2d" : "+w"(result) : "w"(b) @@ -14002,7 +12321,8 @@ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14016,7 +12336,8 @@ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14030,7 +12351,8 @@ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14044,7 +12366,8 @@ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14058,7 +12381,8 @@ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14072,7 +12396,8 @@ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14086,7 +12411,8 @@ int16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14100,7 +12426,8 @@ int32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14114,7 +12441,8 @@ int64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14128,7 +12456,8 @@ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14142,7 +12471,8 @@ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14156,7 +12486,8 @@ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14170,7 +12501,8 @@ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14184,7 +12516,8 @@ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14198,7 +12531,8 @@ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14212,7 +12546,8 @@ int16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14226,7 +12561,8 @@ int32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14240,7 +12576,8 @@ int64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -14292,17 +12629,6 @@ return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrecpe_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frecpe %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrecpe_u32 (uint32x2_t a) { @@ -14314,39 +12640,6 @@ return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecped_f64 (float64_t a) -{ - float64_t result; - __asm__ ("frecpe %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrecpeq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frecpe %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrecpeq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frecpe %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrecpeq_u32 (uint32x4_t a) { @@ -14358,94 +12651,6 @@ return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpes_f32 (float32_t a) -{ - float32_t result; - __asm__ ("frecpe %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrecps_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("frecps %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecpsd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("frecps %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrecpsq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("frecps %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrecpsq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("frecps %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpss_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("frecps %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecpxd_f64 (float64_t a) -{ - float64_t result; - __asm__ ("frecpe %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpxs_f32 (float32_t a) -{ - float32_t result; - __asm__ ("frecpe %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vrev16_p8 (poly8x8_t a) { @@ -14842,171 +13047,6 @@ return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnd_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintz %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrnda_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frinta %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndm_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintm %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndn_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintn %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrndp_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frintp %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintz %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintz %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqa_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frinta %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqa_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frinta %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqm_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintm %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqm_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintm %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqn_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintn %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqn_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintn %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrndqp_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frintp %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrndqp_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frintp %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ @@ -15013,7 +13053,8 @@ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15027,7 +13068,8 @@ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15041,7 +13083,8 @@ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15055,7 +13098,8 @@ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15069,7 +13113,8 @@ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15083,7 +13128,8 @@ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15320,7 +13366,7 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) { - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -15331,7 +13377,7 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) { - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -15342,7 +13388,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) { - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -15353,7 +13399,7 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -15364,7 +13410,7 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -15375,7 +13421,7 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -15767,7 +13813,8 @@ int16x8_t b_ = (b); \ int8x8_t a_ = (a); \ int8x16_t result = vcombine_s8 \ - (a_, vcreate_s8 (UINT64_C (0x0))); \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15781,7 +13828,8 @@ int32x4_t b_ = (b); \ int16x4_t a_ = (a); \ int16x8_t result = vcombine_s16 \ - (a_, vcreate_s16 (UINT64_C (0x0))); \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15795,7 +13843,8 @@ int64x2_t b_ = (b); \ int32x2_t a_ = (a); \ int32x4_t result = vcombine_s32 \ - (a_, vcreate_s32 (UINT64_C (0x0))); \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15809,7 +13858,8 @@ uint16x8_t b_ = (b); \ uint8x8_t a_ = (a); \ uint8x16_t result = vcombine_u8 \ - (a_, vcreate_u8 (UINT64_C (0x0))); \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15823,7 +13873,8 @@ uint32x4_t b_ = (b); \ uint16x4_t a_ = (a); \ uint16x8_t result = vcombine_u16 \ - (a_, vcreate_u16 (UINT64_C (0x0))); \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -15837,7 +13888,8 @@ uint64x2_t b_ = (b); \ uint32x2_t a_ = (a); \ uint32x4_t result = vcombine_u32 \ - (a_, vcreate_u32 (UINT64_C (0x0))); \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ : "+w"(result) \ : "w"(b_), "i"(c) \ @@ -16289,7 +14341,7 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) { - int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0))); + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -16300,7 +14352,7 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) { - int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0))); + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -16311,7 +14363,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) { - int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0))); + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -16322,7 +14374,7 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) { - uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" : "+w"(result) : "w"(b), "w"(c) @@ -16333,7 +14385,7 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) { - uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0))); + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" : "+w"(result) : "w"(b), "w"(c) @@ -16344,7 +14396,7 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) { - uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0))); + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" : "+w"(result) : "w"(b), "w"(c) @@ -18309,86 +16361,6 @@ return result; } -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vaddv_s32 (int32x2_t a) -{ - int32_t result; - __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vaddv_u32 (uint32x2_t a) -{ - uint32_t result; - __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vmaxnmv_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vminnmv_f32 (float32x2_t a) -{ - float32_t result; - __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vmaxnmvq_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vmaxv_s32 (int32x2_t a) -{ - int32_t result; - __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vmaxv_u32 (uint32x2_t a) -{ - uint32_t result; - __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vminnmvq_f64 (float64x2_t a) -{ - float64_t result; - __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -vminv_s32 (int32x2_t a) -{ - int32_t result; - __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - -__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -vminv_u32 (uint32x2_t a) -{ - uint32_t result; - __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : ); - return result; -} - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vpaddd_s64 (int64x2_t __a) { @@ -19022,7 +16994,7 @@ vtbl1_s8 (int8x8_t tab, int8x8_t idx) { int8x8_t result; - int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); + int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) @@ -19034,7 +17006,7 @@ vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) { uint8x8_t result; - uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) @@ -19046,7 +17018,7 @@ vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) { poly8x8_t result; - poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); + poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" : "=w"(result) : "w"(temp), "w"(idx) @@ -19096,7 +17068,7 @@ int8x8_t result; int8x16x2_t temp; temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); + temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" : "=w"(result) @@ -19111,7 +17083,7 @@ uint8x8_t result; uint8x16x2_t temp; temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); + temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" : "=w"(result) @@ -19126,7 +17098,7 @@ poly8x8_t result; poly8x16x2_t temp; temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); + temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" : "=w"(result) @@ -19185,7 +17157,7 @@ { int8x8_t result; int8x8_t tmp1; - int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0))); + int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("movi %0.8b, 8\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" @@ -19201,7 +17173,7 @@ { uint8x8_t result; uint8x8_t tmp1; - uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0))); + uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("movi %0.8b, 8\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" @@ -19217,7 +17189,7 @@ { poly8x8_t result; poly8x8_t tmp1; - poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0))); + poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); __asm__ ("movi %0.8b, 8\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" "tbl %1.8b, {%2.16b}, %3.8b\n\t" @@ -19271,7 +17243,7 @@ int8x8_t tmp1; int8x16x2_t temp; temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0))); + temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" "movi %0.8b, 24\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" @@ -19290,7 +17262,7 @@ uint8x8_t tmp1; uint8x16x2_t temp; temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0))); + temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" "movi %0.8b, 24\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" @@ -19309,7 +17281,7 @@ poly8x8_t tmp1; poly8x16x2_t temp; temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); - temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0))); + temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t" "movi %0.8b, 24\n\t" "cmhs %0.8b, %3.8b, %0.8b\n\t" @@ -19370,6 +17342,80 @@ /* Start of optimal implementations in approved order. */ +/* vabs */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabs_f32 (float32x2_t __a) +{ + return __builtin_aarch64_absv2sf (__a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vabs_f64 (float64x1_t __a) +{ + return __builtin_fabs (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return __builtin_aarch64_absv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return __builtin_aarch64_absv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return __builtin_aarch64_absv2si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vabs_s64 (int64x1_t __a) +{ + return __builtin_llabs (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabsq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_absv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vabsq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_absv2df (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_absv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_absv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_absv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabsq_s64 (int64x2_t __a) +{ + return __builtin_aarch64_absv2di (__a); +} + /* vadd */ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -19384,8 +17430,269 @@ return __a + __b; } -/* vceq */ +/* vaddv */ +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 ((uint64x2_t) + __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddv_f32 (float32x2_t __a) +{ + float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a); + return vget_lane_f32 (t, 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddvq_f32 (float32x4_t __a) +{ + float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a); + return vgetq_lane_f32 (t, 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vaddvq_f64 (float64x2_t __a) +{ + float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a); + return vgetq_lane_f64 (t, 0); +} + +#ifdef __ARM_FEATURE_CRYPTO + +/* vaes */ + +static __inline uint8x16_t +vaeseq_u8 (uint8x16_t data, uint8x16_t key) +{ + return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); +} + +static __inline uint8x16_t +__attribute__ ((__always_inline__)) +vaesdq_u8 (uint8x16_t data, uint8x16_t key) +{ + return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); +} + +static __inline uint8x16_t +vaesmcq_u8 (uint8x16_t data) +{ + return __builtin_aarch64_crypto_aesmcv16qi_uu (data); +} + +static __inline uint8x16_t +vaesimcq_u8 (uint8x16_t data) +{ + return __builtin_aarch64_crypto_aesimcv16qi_uu (data); +} + +#endif + +/* vcage */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcages_f32 (float32_t __a, float32_t __b) +{ + return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcage_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) >= vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcageq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) >= vabsq_f32 (__b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcaged_f64 (float64_t __a, float64_t __b) +{ + return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcageq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) >= vabsq_f64 (__b); +} + +/* vcagt */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcagts_f32 (float32_t __a, float32_t __b) +{ + return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcagt_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) > vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcagtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) > vabsq_f32 (__b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcagtd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcagtq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) > vabsq_f64 (__b); +} + +/* vcale */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcale_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) <= vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) <= vabsq_f32 (__b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaleq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) <= vabsq_f64 (__b); +} + +/* vcalt */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcalt_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) < vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) < vabsq_f32 (__b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaltq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) < vabsq_f64 (__b); +} + +/* vceq - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vceq_p8 (poly8x8_t __a, poly8x8_t __b) { @@ -19414,7 +17721,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vceq_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b); + return __a == __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -19441,10 +17748,21 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vceq_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a, - (int64x1_t) __b); + return __a == __b ? -1ll : 0ll; } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); +} + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vceqq_p8 (poly8x16_t __a, poly8x16_t __b) { @@ -19504,27 +17822,245 @@ (int64x2_t) __b); } +/* vceq - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqs_f32 (float32_t __a, float32_t __b) +{ + return __a == __b ? -1 : 0; +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vceqd_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b); + return __a == __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vceqd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b); + return __a == __b ? -1ll : 0ll; } +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqd_f64 (float64_t __a, float64_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +/* vceqz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_f64 (float64x1_t __a) +{ + return __a == 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_s64 (int64x1_t __a) +{ + return __a == 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_u64 (uint64x1_t __a) +{ + return __a == 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vceqz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqzs_f32 (float32_t __a) +{ + return __a == 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vceqzd_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0); + return __a == 0 ? -1ll : 0ll; } -/* vcge */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqzd_u64 (int64x1_t __a) +{ + return __a == 0 ? -1ll : 0ll; +} +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqzd_f64 (float64_t __a) +{ + return __a == 0.0 ? -1ll : 0ll; +} + +/* vcge - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcge_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); @@ -19545,7 +18081,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcge_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b); + return __a >= __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -19572,11 +18108,29 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcge_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a, - (int64x1_t) __b); + return __a >= __b ? -1ll : 0ll; } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); +} + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgeq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); @@ -19628,28 +18182,245 @@ (int64x2_t) __b); } +/* vcge - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcges_f32 (float32_t __a, float32_t __b) +{ + return __a >= __b ? -1 : 0; +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcged_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b); + return __a >= __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcged_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __a, - (int64x1_t) __b); + return __a >= __b ? -1ll : 0ll; } +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcged_f64 (float64_t __a, float64_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +/* vcgez - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_f64 (float64x1_t __a) +{ + return __a >= 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_s64 (int64x1_t __a) +{ + return __a >= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_u64 (uint64x1_t __a) +{ + return __a >= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcgez - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgezs_f32 (float32_t __a) +{ + return __a >= 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgezd_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0); + return __a >= 0 ? -1ll : 0ll; } -/* vcgt */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgezd_u64 (int64x1_t __a) +{ + return __a >= 0 ? -1ll : 0ll; +} +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgezd_f64 (float64_t __a) +{ + return __a >= 0.0 ? -1ll : 0ll; +} + +/* vcgt - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgt_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcgt_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); @@ -19670,7 +18441,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgt_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b); + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -19697,11 +18468,29 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgt_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a, - (int64x1_t) __b); + return __a > __b ? -1ll : 0ll; } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); +} + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgtq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); @@ -19753,28 +18542,245 @@ (int64x2_t) __b); } +/* vcgt - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgts_f32 (float32_t __a, float32_t __b) +{ + return __a > __b ? -1 : 0; +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgtd_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b); + return __a > __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __a, - (int64x1_t) __b); + return __a > __b ? -1ll : 0ll; } +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgtd_f64 (float64_t __a, float64_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +/* vcgtz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_f64 (float64x1_t __a) +{ + return __a > 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgtz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_s64 (int64x1_t __a) +{ + return __a > 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgtz_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_u64 (uint64x1_t __a) +{ + return __a > 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtzq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcgtz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgtzs_f32 (float32_t __a) +{ + return __a > 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcgtzd_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0); + return __a > 0 ? -1ll : 0ll; } -/* vcle */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtzd_u64 (int64x1_t __a) +{ + return __a > 0 ? -1ll : 0ll; +} +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgtzd_f64 (float64_t __a) +{ + return __a > 0.0 ? -1ll : 0ll; +} + +/* vcle - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcle_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcle_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); @@ -19795,7 +18801,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcle_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a); + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -19822,11 +18828,29 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcle_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgeudi ((int64x1_t) __b, - (int64x1_t) __a); + return __a <= __b ? -1ll : 0ll; } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcleq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); +} + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcleq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); @@ -19878,21 +18902,188 @@ (int64x2_t) __a); } +/* vcle - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcles_f32 (float32_t __a, float32_t __b) +{ + return __a <= __b ? -1 : 0; +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcled_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a); + return __a <= __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcled_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcled_f64 (float64_t __a, float64_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +/* vclez - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclez_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_f64 (float64x1_t __a) +{ + return __a <= 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclez_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclez_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclez_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclez_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_s64 (int64x1_t __a) +{ + return __a <= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_u64 (uint64x1_t __a) +{ + return __a <= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclezq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vclezq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclezq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclezq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclezq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclezq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vclezq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); +} + +/* vclez - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vclezs_f32 (float32_t __a) +{ + return __a <= 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vclezd_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0); + return __a <= 0 ? -1ll : 0ll; } -/* vclt */ +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclezd_u64 (int64x1_t __a) +{ + return __a <= 0 ? -1ll : 0ll; +} +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vclezd_f64 (float64_t __a) +{ + return __a <= 0.0 ? -1ll : 0ll; +} + +/* vclt - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclt_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vclt_s8 (int8x8_t __a, int8x8_t __b) { return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); @@ -19913,7 +19104,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vclt_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a); + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -19940,11 +19131,29 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vclt_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtudi ((int64x1_t) __b, - (int64x1_t) __a); + return __a < __b ? -1ll : 0ll; } +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); +} + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcltq_s8 (int8x16_t __a, int8x16_t __b) { return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); @@ -19996,66 +19205,639 @@ (int64x2_t) __a); } +/* vclt - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vclts_f32 (float32_t __a, float32_t __b) +{ + return __a < __b ? -1 : 0; +} + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcltd_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a); + return __a < __b ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcltd_f64 (float64_t __a, float64_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +/* vcltz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcltz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltz_f64 (float64x1_t __a) +{ + return __a < 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcltz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcltz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcltz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcltz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltz_s64 (int64x1_t __a) +{ + return __a < 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); +} + +/* vcltz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcltzs_f32 (float32_t __a) +{ + return __a < 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vcltzd_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0); + return __a < 0 ? -1ll : 0ll; } +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltzd_u64 (int64x1_t __a) +{ + return __a < 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcltzd_f64 (float64_t __a) +{ + return __a < 0.0 ? -1ll : 0ll; +} + +/* vcvt (double -> float). */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_f64 (float64x2_t __a) +{ + return __builtin_aarch64_float_truncate_lo_v2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); +} + +/* vcvt (float -> double). */ + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_f64_f32 (float32x2_t __a) +{ + + return __builtin_aarch64_float_extend_lo_v2df (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_high_f64_f32 (float32x4_t __a) +{ + return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); +} + +/* vcvt (int -> float) */ + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_s64 (int64_t __a) +{ + return (float64_t) __a; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_u64 (uint64_t __a) +{ + return (float64_t) __a; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_s32 (int32_t __a) +{ + return (float32_t) __a; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_u32 (uint32_t __a) +{ + return (float32_t) __a; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_s32 (int32x2_t __a) +{ + return __builtin_aarch64_floatv2siv2sf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_u32 (uint32x2_t __a) +{ + return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_s32 (int32x4_t __a) +{ + return __builtin_aarch64_floatv4siv4sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_u32 (uint32x4_t __a) +{ + return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_s64 (int64x2_t __a) +{ + return __builtin_aarch64_floatv2div2df (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_u64 (uint64x2_t __a) +{ + return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); +} + +/* vcvt (float -> int) */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtd_s64_f64 (float64_t __a) +{ + return (int64_t) __a; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtd_u64_f64 (float64_t __a) +{ + return (uint64_t) __a; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvts_s32_f32 (float32_t __a) +{ + return (int32_t) __a; +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvts_u32_f32 (float32_t __a) +{ + return (uint32_t) __a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lbtruncv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lbtruncv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lbtruncv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a); +} + +/* vcvta */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtad_s64_f64 (float64_t __a) +{ + return __builtin_aarch64_lrounddfdi (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtad_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lroundudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtas_s32_f32 (float32_t __a) +{ + return __builtin_aarch64_lroundsfsi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtas_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lroundusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvta_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lroundv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvta_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtaq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lroundv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtaq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtaq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lroundv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtaq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a); +} + +/* vcvtm */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtmd_s64_f64 (float64_t __a) +{ + return __builtin_lfloor (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtmd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfloorudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtms_s32_f32 (float32_t __a) +{ + return __builtin_ifloorf (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtms_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfloorusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtm_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfloorv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtm_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtmq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lfloorv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtmq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtmq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lfloorv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtmq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a); +} + +/* vcvtn */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtnd_s64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfrintndfdi (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtnd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfrintnudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtns_s32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfrintnsfsi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtns_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfrintnusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtn_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfrintnv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtn_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtnq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lfrintnv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtnq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtnq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lfrintnv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtnq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a); +} + +/* vcvtp */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtpd_s64_f64 (float64_t __a) +{ + return __builtin_lceil (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtpd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lceiludfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtps_s32_f32 (float32_t __a) +{ + return __builtin_iceilf (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtps_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lceilusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtp_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lceilv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtp_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtpq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lceilv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtpq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtpq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lceilv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtpq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a); +} + /* vdup */ __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vdupb_lane_s8 (int8x16_t a, int const b) { - return __builtin_aarch64_dup_laneqi (a, b); + return __aarch64_vgetq_lane_s8 (a, b); } __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vdupb_lane_u8 (uint8x16_t a, int const b) { - return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b); + return __aarch64_vgetq_lane_u8 (a, b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vduph_lane_s16 (int16x8_t a, int const b) { - return __builtin_aarch64_dup_lanehi (a, b); + return __aarch64_vgetq_lane_s16 (a, b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vduph_lane_u16 (uint16x8_t a, int const b) { - return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b); + return __aarch64_vgetq_lane_u16 (a, b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vdups_lane_s32 (int32x4_t a, int const b) { - return __builtin_aarch64_dup_lanesi (a, b); + return __aarch64_vgetq_lane_s32 (a, b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vdups_lane_u32 (uint32x4_t a, int const b) { - return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b); + return __aarch64_vgetq_lane_u32 (a, b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdupd_lane_s64 (int64x2_t a, int const b) { - return __builtin_aarch64_dup_lanedi (a, b); + return __aarch64_vgetq_lane_s64 (a, b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vdupd_lane_u64 (uint64x2_t a, int const b) { - return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b); + return __aarch64_vgetq_lane_u64 (a, b); } /* vld1 */ @@ -21088,7 +20870,7 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmax_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_fmaxv2sf (__a, __b); + return __builtin_aarch64_smax_nanv2sf (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -21133,13 +20915,13 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmaxq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_fmaxv4sf (__a, __b); + return __builtin_aarch64_smax_nanv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vmaxq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_fmaxv2df (__a, __b); + return __builtin_aarch64_smax_nanv2df (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -21181,12 +20963,150 @@ (int32x4_t) __b); } -/* vmin */ +/* vmaxnm */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmaxnm_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_smaxv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_smaxv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_smaxv2df (__a, __b); +} + +/* vmaxv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vmaxv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vmaxv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vmaxv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vmaxv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vmaxv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vmaxv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmaxvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vmaxvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vmaxvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vmaxvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vmaxvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vmaxvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vmaxvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0); +} + +/* vmaxnmv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxnmv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxnmvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmaxnmvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0); +} + +/* vmin */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmin_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_fminv2sf (__a, __b); + return __builtin_aarch64_smin_nanv2sf (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -21231,13 +21151,13 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vminq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_fminv4sf (__a, __b); + return __builtin_aarch64_smin_nanv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vminq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_fminv2df (__a, __b); + return __builtin_aarch64_smin_nanv2df (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -21279,6 +21199,144 @@ (int32x4_t) __b); } +/* vminnm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vminnm_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_sminv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_sminv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vminnmq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_sminv2df (__a, __b); +} + +/* vminv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vminv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vminv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vminv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vminv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vminv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vminv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vminvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vminvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vminvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vminvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vminvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vminvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vminvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0); +} + +/* vminnmv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminnmv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminnmvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vminnmvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0); +} + /* vmla */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -21430,7 +21488,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); } @@ -21481,7 +21539,7 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); } @@ -21558,7 +21616,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) { - int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); } @@ -21609,7 +21667,7 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) { - int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); } @@ -21734,7 +21792,7 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); + int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); } @@ -21783,7 +21841,7 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); + int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); } @@ -22795,6 +22853,223 @@ return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); } +/* vrecpe */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpes_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpesf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecped_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpedf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecpe_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frecpev2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpeq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frecpev4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpeq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frecpev2df (__a); +} + +/* vrecps */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpss_f32 (float32_t __a, float32_t __b) +{ + return __builtin_aarch64_frecpssf (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpsd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_aarch64_frecpsdf (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecps_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_frecpsv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_frecpsv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpsq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_frecpsv2df (__a, __b); +} + +/* vrecpx */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpxs_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpxsf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpxd_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpxdf (__a); +} + +/* vrnd */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnd_f32 (float32x2_t __a) +{ + return __builtin_aarch64_btruncv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_btruncv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_btruncv2df (__a); +} + +/* vrnda */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnda_f32 (float32x2_t __a) +{ + return __builtin_aarch64_roundv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndaq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_roundv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndaq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_roundv2df (__a); +} + +/* vrndi */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndi_f32 (float32x2_t __a) +{ + return __builtin_aarch64_nearbyintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndiq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_nearbyintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndiq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_nearbyintv2df (__a); +} + +/* vrndm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndm_f32 (float32x2_t __a) +{ + return __builtin_aarch64_floorv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndmq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_floorv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndmq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_floorv2df (__a); +} + +/* vrndn */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndn_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frintnv2sf (__a); +} +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndnq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frintnv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndnq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frintnv2df (__a); +} + +/* vrndp */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndp_f32 (float32x2_t __a) +{ + return __builtin_aarch64_ceilv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndpq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_ceilv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndpq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_ceilv2df (__a); +} + +/* vrndx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndx_f32 (float32x2_t __a) +{ + return __builtin_aarch64_rintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndxq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_rintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndxq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_rintv2df (__a); +} + /* vrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -23133,114 +23408,191 @@ return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO + +/* vsha1 */ + +static __inline uint32x4_t +vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); +} +static __inline uint32x4_t +vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); +} +static __inline uint32x4_t +vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); +} + +static __inline uint32_t +vsha1h_u32 (uint32_t hash_e) +{ + return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); +} + +static __inline uint32x4_t +vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) +{ + return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); +} + +static __inline uint32x4_t +vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) +{ + return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); +} + +static __inline uint32x4_t +vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); +} + +static __inline uint32x4_t +vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); +} + +static __inline uint32x4_t +vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) +{ + return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); +} + +static __inline uint32x4_t +vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) +{ + return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); +} + +static __inline poly128_t +vmull_p64 (poly64_t a, poly64_t b) +{ + return + __builtin_aarch64_crypto_pmulldi_ppp (a, b); +} + +static __inline poly128_t +vmull_high_p64 (poly64x2_t a, poly64x2_t b) +{ + return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); +} + +#endif + /* vshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vshl_n_s8 (int8x8_t __a, const int __b) { - return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vshl_n_s16 (int16x4_t __a, const int __b) { - return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vshl_n_s32 (int32x2_t __a, const int __b) { - return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshl_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vshl_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b); + return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vshl_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b); + return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vshl_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b); + return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshl_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b); + return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vshlq_n_s8 (int8x16_t __a, const int __b) { - return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vshlq_n_s16 (int16x8_t __a, const int __b) { - return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vshlq_n_s32 (int32x4_t __a, const int __b) { - return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vshlq_n_s64 (int64x2_t __a, const int __b) { - return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vshlq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b); + return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshlq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b); + return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshlq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b); + return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshlq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b); + return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshld_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshld_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b); + return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -23428,109 +23780,109 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vshr_n_s8 (int8x8_t __a, const int __b) { - return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vshr_n_s16 (int16x4_t __a, const int __b) { - return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vshr_n_s32 (int32x2_t __a, const int __b) { - return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshr_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vshr_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b); + return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vshr_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b); + return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vshr_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b); + return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshr_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b); + return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vshrq_n_s8 (int8x16_t __a, const int __b) { - return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vshrq_n_s16 (int16x8_t __a, const int __b) { - return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vshrq_n_s32 (int32x4_t __a, const int __b) { - return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vshrq_n_s64 (int64x2_t __a, const int __b) { - return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vshrq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b); + return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshrq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b); + return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshrq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b); + return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshrq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b); + return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshrd_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshrd_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b); + return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b); } /* vsli */ @@ -24153,8 +24505,8 @@ { __builtin_aarch64_simd_oi __o; int64x2x2_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); @@ -24165,8 +24517,8 @@ { __builtin_aarch64_simd_oi __o; uint64x2x2_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); @@ -24177,8 +24529,8 @@ { __builtin_aarch64_simd_oi __o; float64x2x2_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); @@ -24189,8 +24541,8 @@ { __builtin_aarch64_simd_oi __o; int8x16x2_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24201,8 +24553,8 @@ { __builtin_aarch64_simd_oi __o; poly8x16x2_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24213,8 +24565,8 @@ { __builtin_aarch64_simd_oi __o; int16x8x2_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24225,8 +24577,8 @@ { __builtin_aarch64_simd_oi __o; poly16x8x2_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24237,8 +24589,8 @@ { __builtin_aarch64_simd_oi __o; int32x4x2_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); @@ -24249,8 +24601,8 @@ { __builtin_aarch64_simd_oi __o; uint8x16x2_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); @@ -24261,8 +24613,8 @@ { __builtin_aarch64_simd_oi __o; uint16x8x2_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); @@ -24273,8 +24625,8 @@ { __builtin_aarch64_simd_oi __o; uint32x4x2_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); @@ -24285,8 +24637,8 @@ { __builtin_aarch64_simd_oi __o; float32x4x2_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); @@ -24405,9 +24757,9 @@ { __builtin_aarch64_simd_ci __o; int64x2x3_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); - temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24419,9 +24771,9 @@ { __builtin_aarch64_simd_ci __o; uint64x2x3_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); - temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24433,9 +24785,9 @@ { __builtin_aarch64_simd_ci __o; float64x2x3_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); - temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); @@ -24447,9 +24799,9 @@ { __builtin_aarch64_simd_ci __o; int8x16x3_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); - temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24461,9 +24813,9 @@ { __builtin_aarch64_simd_ci __o; poly8x16x3_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); - temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24475,9 +24827,9 @@ { __builtin_aarch64_simd_ci __o; int16x8x3_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); - temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24489,9 +24841,9 @@ { __builtin_aarch64_simd_ci __o; poly16x8x3_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); - temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24503,9 +24855,9 @@ { __builtin_aarch64_simd_ci __o; int32x4x3_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); - temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24517,9 +24869,9 @@ { __builtin_aarch64_simd_ci __o; uint8x16x3_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); - temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24531,9 +24883,9 @@ { __builtin_aarch64_simd_ci __o; uint16x8x3_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); - temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24545,9 +24897,9 @@ { __builtin_aarch64_simd_ci __o; uint32x4x3_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); - temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24559,9 +24911,9 @@ { __builtin_aarch64_simd_ci __o; float32x4x3_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); - temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); @@ -24693,10 +25045,10 @@ { __builtin_aarch64_simd_xi __o; int64x2x4_t temp; - temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0))); - temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0))); - temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0))); - temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0))); + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24709,10 +25061,10 @@ { __builtin_aarch64_simd_xi __o; uint64x2x4_t temp; - temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0))); - temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0))); - temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0))); - temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0))); + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); @@ -24725,10 +25077,10 @@ { __builtin_aarch64_simd_xi __o; float64x2x4_t temp; - temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0))); - temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0))); - temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0))); - temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0))); + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); @@ -24741,10 +25093,10 @@ { __builtin_aarch64_simd_xi __o; int8x16x4_t temp; - temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0))); - temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0))); - temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0))); - temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0))); + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24757,10 +25109,10 @@ { __builtin_aarch64_simd_xi __o; poly8x16x4_t temp; - temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0))); - temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0))); - temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0))); - temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0))); + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24773,10 +25125,10 @@ { __builtin_aarch64_simd_xi __o; int16x8x4_t temp; - temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0))); - temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0))); - temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0))); - temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0))); + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24789,10 +25141,10 @@ { __builtin_aarch64_simd_xi __o; poly16x8x4_t temp; - temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0))); - temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0))); - temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0))); - temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0))); + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24805,10 +25157,10 @@ { __builtin_aarch64_simd_xi __o; int32x4x4_t temp; - temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0))); - temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0))); - temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0))); - temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0))); + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24821,10 +25173,10 @@ { __builtin_aarch64_simd_xi __o; uint8x16x4_t temp; - temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0))); - temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0))); - temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0))); - temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0))); + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); @@ -24837,10 +25189,10 @@ { __builtin_aarch64_simd_xi __o; uint16x8x4_t temp; - temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0))); - temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0))); - temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0))); - temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0))); + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); @@ -24853,10 +25205,10 @@ { __builtin_aarch64_simd_xi __o; uint32x4x4_t temp; - temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0))); - temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0))); - temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0))); - temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0))); + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); @@ -24869,10 +25221,10 @@ { __builtin_aarch64_simd_xi __o; float32x4x4_t temp; - temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0))); - temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0))); - temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0))); - temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0))); + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); @@ -25159,7 +25511,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vtst_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b); + return (__a & __b) ? -1ll : 0ll; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -25186,8 +25538,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vtst_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a, - (int64x1_t) __b); + return (__a & __b) ? -1ll : 0ll; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -25245,14 +25596,13 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vtstd_s64 (int64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b); + return (__a & __b) ? -1ll : 0ll; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vtstd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a, - (int64x1_t) __b); + return (__a & __b) ? -1ll : 0ll; } /* vuqadd */ @@ -25371,4 +25721,31 @@ /* End of optimal implementations in approved order. */ +#undef __aarch64_vget_lane_any +#undef __aarch64_vget_lane_f32 +#undef __aarch64_vget_lane_f64 +#undef __aarch64_vget_lane_p8 +#undef __aarch64_vget_lane_p16 +#undef __aarch64_vget_lane_s8 +#undef __aarch64_vget_lane_s16 +#undef __aarch64_vget_lane_s32 +#undef __aarch64_vget_lane_s64 +#undef __aarch64_vget_lane_u8 +#undef __aarch64_vget_lane_u16 +#undef __aarch64_vget_lane_u32 +#undef __aarch64_vget_lane_u64 + +#undef __aarch64_vgetq_lane_f32 +#undef __aarch64_vgetq_lane_f64 +#undef __aarch64_vgetq_lane_p8 +#undef __aarch64_vgetq_lane_p16 +#undef __aarch64_vgetq_lane_s8 +#undef __aarch64_vgetq_lane_s16 +#undef __aarch64_vgetq_lane_s32 +#undef __aarch64_vgetq_lane_s64 +#undef __aarch64_vgetq_lane_u8 +#undef __aarch64_vgetq_lane_u16 +#undef __aarch64_vgetq_lane_u32 +#undef __aarch64_vgetq_lane_u64 + #endif --- a/src/gcc/config/aarch64/aarch64.md +++ b/src/gcc/config/aarch64/aarch64.md @@ -68,14 +68,19 @@ (define_c_enum "unspec" [ UNSPEC_CASESI UNSPEC_CLS + UNSPEC_FRECPE + UNSPEC_FRECPS + UNSPEC_FRECPX UNSPEC_FRINTA UNSPEC_FRINTI UNSPEC_FRINTM + UNSPEC_FRINTN UNSPEC_FRINTP UNSPEC_FRINTX UNSPEC_FRINTZ UNSPEC_GOTSMALLPIC UNSPEC_GOTSMALLTLS + UNSPEC_GOTTINYPIC UNSPEC_LD2 UNSPEC_LD3 UNSPEC_LD4 @@ -230,6 +235,9 @@ fmovf2i,\ fmovi2f,\ fmul,\ + frecpe,\ + frecps,\ + frecpx,\ frint,\ fsqrt,\ load_acq,\ @@ -763,19 +771,41 @@ ) (define_insn "*mov_aarch64" - [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,r,m, r,*w") - (match_operand:SHORT 1 "general_operand" " r,M,m,rZ,*w,r"))] + [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w") + (match_operand:SHORT 1 "general_operand" " r,M,D,m, m,rZ,*w,*w, r,*w"))] "(register_operand (operands[0], mode) || aarch64_reg_or_zero (operands[1], mode))" - "@ - mov\\t%w0, %w1 - mov\\t%w0, %1 - ldr\\t%w0, %1 - str\\t%w1, %0 - umov\\t%w0, %1.[0] - dup\\t%0., %w1" - [(set_attr "v8type" "move,alu,load1,store1,*,*") - (set_attr "simd_type" "*,*,*,*,simd_movgp,simd_dupgp") +{ + switch (which_alternative) + { + case 0: + return "mov\t%w0, %w1"; + case 1: + return "mov\t%w0, %1"; + case 2: + return aarch64_output_scalar_simd_mov_immediate (operands[1], + mode); + case 3: + return "ldr\t%w0, %1"; + case 4: + return "ldr\t%0, %1"; + case 5: + return "str\t%w1, %0"; + case 6: + return "str\t%1, %0"; + case 7: + return "umov\t%w0, %1.[0]"; + case 8: + return "dup\t%0., %w1"; + case 9: + return "dup\t%0, %1.[0]"; + default: + gcc_unreachable (); + } +} + [(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*") + (set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup") + (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") (set_attr "mode" "") (set_attr "simd_mode" "")] ) @@ -797,8 +827,8 @@ ) (define_insn "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,m, *w, r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,M,m,rZ,rZ,*w,*w"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,*w,m, m,*w, r,*w") + (match_operand:SI 1 "aarch64_mov_operand" " r,M,m, m,rZ,*w,rZ,*w,*w"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -805,18 +835,20 @@ mov\\t%w0, %w1 mov\\t%w0, %1 ldr\\t%w0, %1 + ldr\\t%s0, %1 str\\t%w1, %0 + str\\t%s1, %0 fmov\\t%s0, %w1 fmov\\t%w0, %s1 fmov\\t%s0, %s1" - [(set_attr "v8type" "move,alu,load1,store1,fmov,fmov,fmov") + [(set_attr "v8type" "move,alu,load1,load1,store1,store1,fmov,fmov,fmov") (set_attr "mode" "SI") - (set_attr "fp" "*,*,*,*,yes,yes,yes")] + (set_attr "fp" "*,*,*,yes,*,yes,yes,yes,yes")] ) (define_insn "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,m, r, r, *w, r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m,rZ,Usa,Ush,rZ,*w,*w,Dd"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -825,7 +857,9 @@ mov\\t%x0, %1 mov\\t%x0, %1 ldr\\t%x0, %1 + ldr\\t%d0, %1 str\\t%x1, %0 + str\\t%d1, %0 adr\\t%x0, %a1 adrp\\t%x0, %A1 fmov\\t%d0, %x1 @@ -832,10 +866,10 @@ fmov\\t%x0, %d1 fmov\\t%d0, %d1 movi\\t%d0, %1" - [(set_attr "v8type" "move,move,move,alu,load1,store1,adr,adr,fmov,fmov,fmov,fmov") + [(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov") (set_attr "mode" "DI") - (set_attr "fp" "*,*,*,*,*,*,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,yes")] + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm" @@ -843,9 +877,8 @@ (const_int 16) (match_operand:GPI 1 "const_int_operand" "n")) (match_operand:GPI 2 "const_int_operand" "n"))] - "INTVAL (operands[1]) < GET_MODE_BITSIZE (mode) - && INTVAL (operands[1]) % 16 == 0 - && UINTVAL (operands[2]) <= 0xffff" + "UINTVAL (operands[1]) < GET_MODE_BITSIZE (mode) + && UINTVAL (operands[1]) % 16 == 0" "movk\\t%0, %X2, lsl %1" [(set_attr "v8type" "movk") (set_attr "mode" "")] @@ -982,9 +1015,9 @@ || register_operand (operands[1], TFmode))" "@ orr\\t%0.16b, %1.16b, %1.16b - mov\\t%0, %1\;mov\\t%H0, %H1 - fmov\\t%d0, %Q1\;fmov\\t%0.d[1], %R1 - fmov\\t%Q0, %d1\;fmov\\t%R0, %1.d[1] + # + # + # movi\\t%0.2d, #0 fmov\\t%s0, wzr ldr\\t%q0, %1 @@ -998,6 +1031,17 @@ (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")] ) +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (match_operand:TF 1 "aarch64_reg_or_imm" ""))] + "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" + [(const_int 0)] + { + aarch64_split_128bit_move (operands[0], operands[1]); + DONE; + } +) + ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "load_pair" @@ -1150,13 +1194,14 @@ ) (define_insn "*zero_extend2_aarch64" - [(set (match_operand:GPI 0 "register_operand" "=r,r") - (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))] + [(set (match_operand:GPI 0 "register_operand" "=r,r,*w") + (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))] "" "@ uxt\t%0, %w1 - ldr\t%w0, %1" - [(set_attr "v8type" "extend,load1") + ldr\t%w0, %1 + ldr\t%0, %1" + [(set_attr "v8type" "extend,load1,load1") (set_attr "mode" "")] ) @@ -1287,6 +1332,112 @@ (set_attr "mode" "SI")] ) +(define_insn "*adds_mul_imm_" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (mult:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_" "n")) + (match_operand:GPI 3 "register_operand" "rk")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3)))] + "" + "adds\\t%0, %3, %1, lsl %p2" + [(set_attr "v8type" "alus_shift") + (set_attr "mode" "")] +) + +(define_insn "*subs_mul_imm_" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 1 "register_operand" "rk") + (mult:GPI + (match_operand:GPI 2 "register_operand" "r") + (match_operand:QI 3 "aarch64_pwr_2_" "n"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) + (mult:GPI (match_dup 2) (match_dup 3))))] + "" + "subs\\t%0, %1, %2, lsl %p3" + [(set_attr "v8type" "alus_shift") + (set_attr "mode" "")] +) + +(define_insn "*adds__" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI + (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))] + "" + "adds\\t%0, %2, %1, xt" + [(set_attr "v8type" "alus_ext") + (set_attr "mode" "")] +) + +(define_insn "*subs__" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))] + "" + "subs\\t%0, %1, %2, xt" + [(set_attr "v8type" "alus_ext") + (set_attr "mode" "")] +) + +(define_insn "*adds__multp2" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)) + (match_operand:GPI 4 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3) + (const_int 0)) + (match_dup 4)))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "adds\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "v8type" "alus_ext") + (set_attr "mode" "")] +) + +(define_insn "*subs__multp2" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI + (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3) + (const_int 0))))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "subs\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "v8type" "alus_ext") + (set_attr "mode" "")] +) + (define_insn "*add3nr_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1302,12 +1453,12 @@ ) (define_insn "*compare_neg" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_operand:GPI 0 "register_operand" "r") - (neg:GPI (match_operand:GPI 1 "register_operand" "r"))))] + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP + (neg:GPI (match_operand:GPI 0 "register_operand" "r")) + (match_operand:GPI 1 "register_operand" "r")))] "" - "cmn\\t%0, %1" + "cmn\\t%1, %0" [(set_attr "v8type" "alus") (set_attr "mode" "")] ) @@ -1791,6 +1942,34 @@ (set_attr "mode" "SI")] ) +(define_insn "*sub3_carryin" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (minus:GPI + (match_operand:GPI 1 "register_operand" "r") + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "sbc\\t%0, %1, %2" + [(set_attr "v8type" "adc") + (set_attr "mode" "")] +) + +;; zero_extend version of the above +(define_insn "*subsi3_carryin_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (minus:SI + (match_operand:SI 1 "register_operand" "r") + (ltu:SI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:SI 2 "register_operand" "r"))))] + "" + "sbc\\t%w0, %w1, %w2" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*sub_uxt_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "r") @@ -1825,6 +2004,38 @@ (set_attr "mode" "SI")] ) +(define_insn_and_split "absdi2" + [(set (match_operand:DI 0 "register_operand" "=r,w") + (abs:DI (match_operand:DI 1 "register_operand" "r,w"))) + (clobber (match_scratch:DI 2 "=&r,X"))] + "" + "@ + # + abs\\t%d0, %d1" + "reload_completed + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + emit_insn (gen_rtx_SET (VOIDmode, operands[2], + gen_rtx_XOR (DImode, + gen_rtx_ASHIFTRT (DImode, + operands[1], + GEN_INT (63)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (DImode, + operands[2], + gen_rtx_ASHIFTRT (DImode, + operands[1], + GEN_INT (63))))); + DONE; + } + [(set_attr "v8type" "alu") + (set_attr "mode" "DI")] +) + (define_insn "neg2" [(set (match_operand:GPI 0 "register_operand" "=r") (neg:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -1844,6 +2055,27 @@ (set_attr "mode" "SI")] ) +(define_insn "*ngc" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "ngc\\t%0, %1" + [(set_attr "v8type" "adc") + (set_attr "mode" "")] +) + +(define_insn "*ngcsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:SI 1 "register_operand" "r"))))] + "" + "ngc\\t%w0, %w1" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*neg2_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r")) @@ -1869,6 +2101,21 @@ (set_attr "mode" "SI")] ) +(define_insn "*neg_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (neg:GPI (ASHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))] + "" + "negs\\t%0, %1, %2" + [(set_attr "v8type" "alus_shift") + (set_attr "mode" "")] +) + (define_insn "*neg__2" [(set (match_operand:GPI 0 "register_operand" "=r") (neg:GPI (ASHIFT:GPI @@ -2158,6 +2405,18 @@ (set_attr "mode" "")] ) +(define_insn "*cmp_swp__shft_" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ashift:GPI + (ANY_EXTEND:GPI + (match_operand:ALLX 0 "register_operand" "r")) + (match_operand 1 "aarch64_imm3" "Ui3")) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "cmp\\t%2, %0, xt %1" + [(set_attr "v8type" "alus_ext") + (set_attr "mode" "")] +) ;; ------------------------------------------------------------------- ;; Store-flag and conditional select insns @@ -2434,6 +2693,69 @@ [(set_attr "v8type" "logic,logic_imm") (set_attr "mode" "SI")]) +(define_insn "*and3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_operand:GPI 1 "register_operand" "%r,r") + (match_operand:GPI 2 "aarch64_logical_operand" "r,")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (and:GPI (match_dup 1) (match_dup 2)))] + "" + "ands\\t%0, %1, %2" + [(set_attr "v8type" "logics,logics_imm") + (set_attr "mode" "")] +) + +;; zero_extend version of above +(define_insn "*andsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "aarch64_logical_operand" "r,K")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "" + "ands\\t%w0, %w1, %w2" + [(set_attr "v8type" "logics,logics_imm") + (set_attr "mode" "SI")] +) + +(define_insn "*and_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))] + "" + "ands\\t%0, %3, %1, %2" + [(set_attr "v8type" "logics_shift") + (set_attr "mode" "")] +) + +;; zero_extend version of above +(define_insn "*and_si3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2)) + (match_dup 3))))] + "" + "ands\\t%w0, %w3, %w1, %2" + [(set_attr "v8type" "logics_shift") + (set_attr "mode" "SI")] +) + (define_insn "*_3" [(set (match_operand:GPI 0 "register_operand" "=r") (LOGICAL:GPI (SHIFT:GPI @@ -2485,6 +2807,35 @@ [(set_attr "v8type" "logic") (set_attr "mode" "")]) +(define_insn "*and_one_cmpl3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (not:GPI + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (not:GPI (match_dup 1)) (match_dup 2)))] + "" + "bics\\t%0, %2, %1" + [(set_attr "v8type" "logics") + (set_attr "mode" "")]) + +;; zero_extend version of above +(define_insn "*and_one_cmplsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (not:SI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (not:SI (match_dup 1)) (match_dup 2))))] + "" + "bics\\t%w0, %w2, %w1" + [(set_attr "v8type" "logics") + (set_attr "mode" "SI")]) + (define_insn "*_one_cmpl_3" [(set (match_operand:GPI 0 "register_operand" "=r") (LOGICAL:GPI (not:GPI @@ -2497,6 +2848,43 @@ [(set_attr "v8type" "logic_shift") (set_attr "mode" "")]) +(define_insn "*and_one_cmpl_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (not:GPI + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))) + (match_operand:GPI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (not:GPI + (SHIFT:GPI + (match_dup 1) (match_dup 2))) (match_dup 3)))] + "" + "bics\\t%0, %3, %1, %2" + [(set_attr "v8type" "logics_shift") + (set_attr "mode" "")]) + +;; zero_extend version of above +(define_insn "*and_one_cmpl_si3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (not:SI + (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n"))) + (match_operand:SI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI + (not:SI + (SHIFT:SI (match_dup 1) (match_dup 2))) (match_dup 3))))] + "" + "bics\\t%w0, %w3, %w1, %2" + [(set_attr "v8type" "logics_shift") + (set_attr "mode" "SI")]) + (define_insn "clz2" [(set (match_operand:GPI 0 "register_operand" "=r") (clz:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -2704,6 +3092,62 @@ (set_attr "mode" "")] ) +(define_insn "*extr5_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")) + (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r") + (match_operand 4 "const_int_operand" "n"))))] + "UINTVAL (operands[3]) < GET_MODE_BITSIZE (mode) && + (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (mode))" + "extr\\t%0, %1, %2, %4" + [(set_attr "v8type" "shift") + (set_attr "mode" "")] +) + +;; zero_extend version of the above +(define_insn "*extrsi5_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 4 "const_int_operand" "n")))))] + "UINTVAL (operands[3]) < 32 && + (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" + "extr\\t%w0, %w1, %w2, %4" + [(set_attr "v8type" "shift") + (set_attr "mode" "SI")] +) + +(define_insn "*ror3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (rotate:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n")))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "ror\\t%0, %1, %3"; +} + [(set_attr "v8type" "shift") + (set_attr "mode" "")] +) + +;; zero_extend version of the above +(define_insn "*rorsi3_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "UINTVAL (operands[2]) < 32" +{ + operands[3] = GEN_INT (32 - UINTVAL (operands[2])); + return "ror\\t%w0, %w1, %3"; +} + [(set_attr "v8type" "shift") + (set_attr "mode" "SI")] +) + (define_insn "*_ashl" [(set (match_operand:GPI 0 "register_operand" "=r") (ANY_EXTEND:GPI @@ -2770,6 +3214,65 @@ (set_attr "mode" "")] ) +;; Bitfield Insert (insv) +(define_expand "insv" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand:GPI 3 "general_operand"))] + "" +{ + unsigned HOST_WIDE_INT width = UINTVAL (operands[1]); + unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]); + rtx value = operands[3]; + + if (width == 0 || (pos + width) > GET_MODE_BITSIZE (mode)) + FAIL; + + if (CONST_INT_P (value)) + { + unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1; + + /* Prefer AND/OR for inserting all zeros or all ones. */ + if ((UINTVAL (value) & mask) == 0 + || (UINTVAL (value) & mask) == mask) + FAIL; + + /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */ + if (width == 16 && (pos % 16) == 0) + DONE; + } + operands[3] = force_reg (mode, value); +}) + +(define_insn "*insv_reg" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (match_operand:GPI 3 "register_operand" "r"))] + "!(UINTVAL (operands[1]) == 0 + || (UINTVAL (operands[2]) + UINTVAL (operands[1]) + > GET_MODE_BITSIZE (mode)))" + "bfi\\t%0, %3, %2, %1" + [(set_attr "v8type" "bfm") + (set_attr "mode" "")] +) + +(define_insn "*extr_insv_lower_reg" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (const_int 0)) + (zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r") + (match_dup 1) + (match_operand 3 "const_int_operand" "n")))] + "!(UINTVAL (operands[1]) == 0 + || (UINTVAL (operands[3]) + UINTVAL (operands[1]) + > GET_MODE_BITSIZE (mode)))" + "bfxil\\t%0, %2, %3, %1" + [(set_attr "v8type" "bfm") + (set_attr "mode" "")] +) + (define_insn "*_shft_" [(set (match_operand:GPI 0 "register_operand" "=r") (ashift:GPI (ANY_EXTEND:GPI @@ -3090,6 +3593,27 @@ (set_attr "mode" "")] ) +(define_insn "aarch64_frecp" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRECP))] + "TARGET_FLOAT" + "frecp\\t%0, %1" + [(set_attr "v8type" "frecp") + (set_attr "mode" "")] +) + +(define_insn "aarch64_frecps" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_FLOAT" + "frecps\\t%0, %1, %2" + [(set_attr "v8type" "frecps") + (set_attr "mode" "")] +) + ;; ------------------------------------------------------------------- ;; Reload support ;; ------------------------------------------------------------------- @@ -3146,9 +3670,9 @@ ;; after or during reload as we don't want these patterns to start ;; kicking in during the combiner. -(define_insn "aarch64_movdi_tilow" +(define_insn "aarch64_movdi_low" [(set (match_operand:DI 0 "register_operand" "=r") - (truncate:DI (match_operand:TI 1 "register_operand" "w")))] + (truncate:DI (match_operand:TX 1 "register_operand" "w")))] "reload_completed || reload_in_progress" "fmov\\t%x0, %d1" [(set_attr "v8type" "fmovf2i") @@ -3156,10 +3680,10 @@ (set_attr "length" "4") ]) -(define_insn "aarch64_movdi_tihigh" +(define_insn "aarch64_movdi_high" [(set (match_operand:DI 0 "register_operand" "=r") (truncate:DI - (lshiftrt:TI (match_operand:TI 1 "register_operand" "w") + (lshiftrt:TX (match_operand:TX 1 "register_operand" "w") (const_int 64))))] "reload_completed || reload_in_progress" "fmov\\t%x0, %1.d[1]" @@ -3168,24 +3692,22 @@ (set_attr "length" "4") ]) -(define_insn "aarch64_movtihigh_di" - [(set (zero_extract:TI (match_operand:TI 0 "register_operand" "+w") +(define_insn "aarch64_movhigh_di" + [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w") (const_int 64) (const_int 64)) - (zero_extend:TI (match_operand:DI 1 "register_operand" "r")))] + (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] "reload_completed || reload_in_progress" "fmov\\t%0.d[1], %x1" - [(set_attr "v8type" "fmovi2f") (set_attr "mode" "DI") (set_attr "length" "4") ]) -(define_insn "aarch64_movtilow_di" - [(set (match_operand:TI 0 "register_operand" "=w") - (zero_extend:TI (match_operand:DI 1 "register_operand" "r")))] +(define_insn "aarch64_movlow_di" + [(set (match_operand:TX 0 "register_operand" "=w") + (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] "reload_completed || reload_in_progress" "fmov\\t%d0, %x1" - [(set_attr "v8type" "fmovi2f") (set_attr "mode" "DI") (set_attr "length" "4") @@ -3197,7 +3719,6 @@ (truncate:DI (match_operand:TI 1 "register_operand" "w"))))] "reload_completed || reload_in_progress" "fmov\\t%d0, %d1" - [(set_attr "v8type" "fmovi2f") (set_attr "mode" "DI") (set_attr "length" "4") @@ -3231,6 +3752,16 @@ (set_attr "mode" "DI")] ) +(define_insn "ldr_got_tiny" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] + UNSPEC_GOTTINYPIC))] + "" + "ldr\\t%0, %L1" + [(set_attr "v8type" "load1") + (set_attr "mode" "DI")] +) + (define_insn "aarch64_load_tp_hard" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_TLS))] --- a/src/gcc/config/aarch64/aarch64-option-extensions.def +++ b/src/gcc/config/aarch64/aarch64-option-extensions.def @@ -35,3 +35,4 @@ AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO) AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) --- a/src/gcc/config/aarch64/aarch64-builtins.c +++ b/src/gcc/config/aarch64/aarch64-builtins.c @@ -30,6 +30,7 @@ #include "langhooks.h" #include "diagnostic-core.h" #include "optabs.h" +#include "gimple.h" enum aarch64_simd_builtin_type_mode { @@ -50,6 +51,7 @@ T_OI, T_XI, T_SI, + T_SF, T_HI, T_QI, T_MAX @@ -72,172 +74,251 @@ #define oi_UP T_OI #define xi_UP T_XI #define si_UP T_SI +#define sf_UP T_SF #define hi_UP T_HI #define qi_UP T_QI #define UP(X) X##_UP -typedef enum +#define SIMD_MAX_BUILTIN_ARGS 5 + +enum aarch64_type_qualifiers { - AARCH64_SIMD_BINOP, - AARCH64_SIMD_TERNOP, - AARCH64_SIMD_QUADOP, - AARCH64_SIMD_UNOP, - AARCH64_SIMD_GETLANE, - AARCH64_SIMD_SETLANE, - AARCH64_SIMD_CREATE, - AARCH64_SIMD_DUP, - AARCH64_SIMD_DUPLANE, - AARCH64_SIMD_COMBINE, - AARCH64_SIMD_SPLIT, - AARCH64_SIMD_LANEMUL, - AARCH64_SIMD_LANEMULL, - AARCH64_SIMD_LANEMULH, - AARCH64_SIMD_LANEMAC, - AARCH64_SIMD_SCALARMUL, - AARCH64_SIMD_SCALARMULL, - AARCH64_SIMD_SCALARMULH, - AARCH64_SIMD_SCALARMAC, - AARCH64_SIMD_CONVERT, - AARCH64_SIMD_FIXCONV, - AARCH64_SIMD_SELECT, - AARCH64_SIMD_RESULTPAIR, - AARCH64_SIMD_REINTERP, - AARCH64_SIMD_VTBL, - AARCH64_SIMD_VTBX, - AARCH64_SIMD_LOAD1, - AARCH64_SIMD_LOAD1LANE, - AARCH64_SIMD_STORE1, - AARCH64_SIMD_STORE1LANE, - AARCH64_SIMD_LOADSTRUCT, - AARCH64_SIMD_LOADSTRUCTLANE, - AARCH64_SIMD_STORESTRUCT, - AARCH64_SIMD_STORESTRUCTLANE, - AARCH64_SIMD_LOGICBINOP, - AARCH64_SIMD_SHIFTINSERT, - AARCH64_SIMD_SHIFTIMM, - AARCH64_SIMD_SHIFTACC -} aarch64_simd_itype; + /* T foo. */ + qualifier_none = 0x0, + /* unsigned T foo. */ + qualifier_unsigned = 0x1, /* 1 << 0 */ + /* const T foo. */ + qualifier_const = 0x2, /* 1 << 1 */ + /* T *foo. */ + qualifier_pointer = 0x4, /* 1 << 2 */ + /* const T *foo. */ + qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer */ + /* Used when expanding arguments if an operand could + be an immediate. */ + qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_maybe_immediate = 0x10, /* 1 << 4 */ + /* void foo (...). */ + qualifier_void = 0x20, /* 1 << 5 */ + /* Some patterns may have internal operands, this qualifier is an + instruction to the initialisation code to skip this operand. */ + qualifier_internal = 0x40, /* 1 << 6 */ + /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum + rather than using the type of the operand. */ + qualifier_map_mode = 0x80, /* 1 << 7 */ + /* qualifier_pointer | qualifier_map_mode */ + qualifier_pointer_map_mode = 0x84, + /* qualifier_const_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ + qualifier_poly = 0x100 +}; typedef struct { const char *name; - const aarch64_simd_itype itype; enum aarch64_simd_builtin_type_mode mode; const enum insn_code code; unsigned int fcode; + enum aarch64_type_qualifiers *qualifiers; } aarch64_simd_builtin_datum; -#define CF(N, X) CODE_FOR_aarch64_##N##X +static enum aarch64_type_qualifiers +aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none }; +#define TYPES_UNOP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned }; +#define TYPES_UNOPU (aarch64_types_unopu_qualifiers) +#define TYPES_CREATE (aarch64_types_unop_qualifiers) +#define TYPES_REINTERP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; +#define TYPES_BINOP (aarch64_types_binop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; +#define TYPES_BINOPU (aarch64_types_binopu_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_poly, qualifier_poly }; +#define TYPES_BINOPP (aarch64_types_binopp_qualifiers) -#define VAR1(T, N, A) \ - {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0}, -#define VAR2(T, N, A, B) \ - VAR1 (T, N, A) \ - VAR1 (T, N, B) -#define VAR3(T, N, A, B, C) \ - VAR2 (T, N, A, B) \ - VAR1 (T, N, C) -#define VAR4(T, N, A, B, C, D) \ - VAR3 (T, N, A, B, C) \ - VAR1 (T, N, D) -#define VAR5(T, N, A, B, C, D, E) \ - VAR4 (T, N, A, B, C, D) \ - VAR1 (T, N, E) -#define VAR6(T, N, A, B, C, D, E, F) \ - VAR5 (T, N, A, B, C, D, E) \ - VAR1 (T, N, F) -#define VAR7(T, N, A, B, C, D, E, F, G) \ - VAR6 (T, N, A, B, C, D, E, F) \ - VAR1 (T, N, G) -#define VAR8(T, N, A, B, C, D, E, F, G, H) \ - VAR7 (T, N, A, B, C, D, E, F, G) \ - VAR1 (T, N, H) -#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ - VAR8 (T, N, A, B, C, D, E, F, G, H) \ - VAR1 (T, N, I) -#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR9 (T, N, A, B, C, D, E, F, G, H, I) \ - VAR1 (T, N, J) -#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \ - VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR1 (T, N, K) -#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \ - VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ - VAR1 (T, N, L) +static enum aarch64_type_qualifiers +aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_TERNOP (aarch64_types_ternop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned }; +#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_none }; +#define TYPES_QUADOP (aarch64_types_quadop_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_GETLANE (aarch64_types_getlane_qualifiers) +#define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_SETLANE (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTINSERT (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_COMBINE (aarch64_types_combine_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_const_pointer_map_mode }; +#define TYPES_LOAD1 (aarch64_types_load1_qualifiers) +#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) + +/* The first argument (return type) of a store should be void type, + which we represent with qualifier_void. Their first operand will be + a DImode pointer to the location to store to, so we must use + qualifier_map_mode | qualifier_pointer to build a pointer to the + element type of the vector. */ +static enum aarch64_type_qualifiers +aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; +#define TYPES_STORE1 (aarch64_types_store1_qualifiers) +#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) + +#define CF0(N, X) CODE_FOR_aarch64_##N##X +#define CF1(N, X) CODE_FOR_##N##X##1 +#define CF2(N, X) CODE_FOR_##N##X##2 +#define CF3(N, X) CODE_FOR_##N##X##3 +#define CF4(N, X) CODE_FOR_##N##X##4 +#define CF10(N, X) CODE_FOR_##N##X + +#define VAR1(T, N, MAP, A) \ + {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T}, +#define VAR2(T, N, MAP, A, B) \ + VAR1 (T, N, MAP, A) \ + VAR1 (T, N, MAP, B) +#define VAR3(T, N, MAP, A, B, C) \ + VAR2 (T, N, MAP, A, B) \ + VAR1 (T, N, MAP, C) +#define VAR4(T, N, MAP, A, B, C, D) \ + VAR3 (T, N, MAP, A, B, C) \ + VAR1 (T, N, MAP, D) +#define VAR5(T, N, MAP, A, B, C, D, E) \ + VAR4 (T, N, MAP, A, B, C, D) \ + VAR1 (T, N, MAP, E) +#define VAR6(T, N, MAP, A, B, C, D, E, F) \ + VAR5 (T, N, MAP, A, B, C, D, E) \ + VAR1 (T, N, MAP, F) +#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ + VAR6 (T, N, MAP, A, B, C, D, E, F) \ + VAR1 (T, N, MAP, G) +#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ + VAR1 (T, N, MAP, H) +#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR1 (T, N, MAP, I) +#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR1 (T, N, MAP, J) +#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, MAP, K) +#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, MAP, L) + /* BUILTIN_ macros should expand to cover the same range of modes as is given for each define_mode_iterator in config/aarch64/iterators.md. */ -#define BUILTIN_DX(T, N) \ - VAR2 (T, N, di, df) -#define BUILTIN_SDQ_I(T, N) \ - VAR4 (T, N, qi, hi, si, di) -#define BUILTIN_SD_HSI(T, N) \ - VAR2 (T, N, hi, si) -#define BUILTIN_V2F(T, N) \ - VAR2 (T, N, v2sf, v2df) -#define BUILTIN_VALL(T, N) \ - VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df) -#define BUILTIN_VB(T, N) \ - VAR2 (T, N, v8qi, v16qi) -#define BUILTIN_VD(T, N) \ - VAR4 (T, N, v8qi, v4hi, v2si, v2sf) -#define BUILTIN_VDC(T, N) \ - VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df) -#define BUILTIN_VDIC(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VDN(T, N) \ - VAR3 (T, N, v4hi, v2si, di) -#define BUILTIN_VDQ(T, N) \ - VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) -#define BUILTIN_VDQF(T, N) \ - VAR3 (T, N, v2sf, v4sf, v2df) -#define BUILTIN_VDQHS(T, N) \ - VAR4 (T, N, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQIF(T, N) \ - VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) -#define BUILTIN_VDQM(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQV(T, N) \ - VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si) -#define BUILTIN_VDQ_BHSI(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQ_I(T, N) \ - VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) -#define BUILTIN_VDW(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VD_BHSI(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VD_HSI(T, N) \ - VAR2 (T, N, v4hi, v2si) -#define BUILTIN_VD_RE(T, N) \ - VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df) -#define BUILTIN_VQ(T, N) \ - VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df) -#define BUILTIN_VQN(T, N) \ - VAR3 (T, N, v8hi, v4si, v2di) -#define BUILTIN_VQW(T, N) \ - VAR3 (T, N, v16qi, v8hi, v4si) -#define BUILTIN_VQ_HSI(T, N) \ - VAR2 (T, N, v8hi, v4si) -#define BUILTIN_VQ_S(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VSDQ_HSI(T, N) \ - VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si) -#define BUILTIN_VSDQ_I(T, N) \ - VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) -#define BUILTIN_VSDQ_I_BHSI(T, N) \ - VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) -#define BUILTIN_VSDQ_I_DI(T, N) \ - VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) -#define BUILTIN_VSD_HSI(T, N) \ - VAR4 (T, N, v4hi, v2si, hi, si) -#define BUILTIN_VSQN_HSDI(T, N) \ - VAR6 (T, N, v8hi, v4si, v2di, hi, si, di) -#define BUILTIN_VSTRUCT(T, N) \ - VAR3 (T, N, oi, ci, xi) +#define BUILTIN_DX(T, N, MAP) \ + VAR2 (T, N, MAP, di, df) +#define BUILTIN_GPF(T, N, MAP) \ + VAR2 (T, N, MAP, sf, df) +#define BUILTIN_SDQ_I(T, N, MAP) \ + VAR4 (T, N, MAP, qi, hi, si, di) +#define BUILTIN_SD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, hi, si) +#define BUILTIN_V2F(T, N, MAP) \ + VAR2 (T, N, MAP, v2sf, v2df) +#define BUILTIN_VALL(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df) +#define BUILTIN_VALLDI(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df, di) +#define BUILTIN_VB(T, N, MAP) \ + VAR2 (T, N, MAP, v8qi, v16qi) +#define BUILTIN_VD(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf) +#define BUILTIN_VDC(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VDIC(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VDN(T, N, MAP) \ + VAR3 (T, N, MAP, v4hi, v2si, di) +#define BUILTIN_VDQ(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDQF(T, N, MAP) \ + VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQH(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v8hi) +#define BUILTIN_VDQHS(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQIF(T, N, MAP) \ + VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) +#define BUILTIN_VDQM(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQV(T, N, MAP) \ + VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si) +#define BUILTIN_VDQ_BHSI(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQ_I(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDW(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_BHSI(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v2si) +#define BUILTIN_VD_RE(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VQ(T, N, MAP) \ + VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df) +#define BUILTIN_VQN(T, N, MAP) \ + VAR3 (T, N, MAP, v8hi, v4si, v2di) +#define BUILTIN_VQW(T, N, MAP) \ + VAR3 (T, N, MAP, v16qi, v8hi, v4si) +#define BUILTIN_VQ_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v8hi, v4si) +#define BUILTIN_VQ_S(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VSDQ_HSI(T, N, MAP) \ + VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si) +#define BUILTIN_VSDQ_I(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) +#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) +#define BUILTIN_VSDQ_I_DI(T, N, MAP) \ + VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) +#define BUILTIN_VSD_HSI(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v2si, hi, si) +#define BUILTIN_VSQN_HSDI(T, N, MAP) \ + VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di) +#define BUILTIN_VSTRUCT(T, N, MAP) \ + VAR3 (T, N, MAP, oi, ci, xi) static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { #include "aarch64-simd-builtins.def" @@ -244,8 +325,8 @@ }; #undef VAR1 -#define VAR1(T, N, A) \ - AARCH64_SIMD_BUILTIN_##N##A, +#define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, enum aarch64_builtins { @@ -257,171 +338,218 @@ AARCH64_BUILTIN_MAX }; -#undef BUILTIN_DX -#undef BUILTIN_SDQ_I -#undef BUILTIN_SD_HSI -#undef BUILTIN_V2F -#undef BUILTIN_VALL -#undef BUILTIN_VB -#undef BUILTIN_VD -#undef BUILTIN_VDC -#undef BUILTIN_VDIC -#undef BUILTIN_VDN -#undef BUILTIN_VDQ -#undef BUILTIN_VDQF -#undef BUILTIN_VDQHS -#undef BUILTIN_VDQIF -#undef BUILTIN_VDQM -#undef BUILTIN_VDQV -#undef BUILTIN_VDQ_BHSI -#undef BUILTIN_VDQ_I -#undef BUILTIN_VDW -#undef BUILTIN_VD_BHSI -#undef BUILTIN_VD_HSI -#undef BUILTIN_VD_RE -#undef BUILTIN_VQ -#undef BUILTIN_VQN -#undef BUILTIN_VQW -#undef BUILTIN_VQ_HSI -#undef BUILTIN_VQ_S -#undef BUILTIN_VSDQ_HSI -#undef BUILTIN_VSDQ_I -#undef BUILTIN_VSDQ_I_BHSI -#undef BUILTIN_VSDQ_I_DI -#undef BUILTIN_VSD_HSI -#undef BUILTIN_VSQN_HSDI -#undef BUILTIN_VSTRUCT -#undef CF -#undef VAR1 -#undef VAR2 -#undef VAR3 -#undef VAR4 -#undef VAR5 -#undef VAR6 -#undef VAR7 -#undef VAR8 -#undef VAR9 -#undef VAR10 -#undef VAR11 - static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; #define NUM_DREG_TYPES 6 #define NUM_QREG_TYPES 6 -static void -aarch64_init_simd_builtins (void) +/* Return a tree for a signed or unsigned argument of either + the mode specified by MODE, or the inner mode of MODE. */ +tree +aarch64_build_scalar_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) { - unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1; +#undef INT_TYPES +#define INT_TYPES \ + AARCH64_TYPE_BUILDER (QI) \ + AARCH64_TYPE_BUILDER (HI) \ + AARCH64_TYPE_BUILDER (SI) \ + AARCH64_TYPE_BUILDER (DI) \ + AARCH64_TYPE_BUILDER (EI) \ + AARCH64_TYPE_BUILDER (OI) \ + AARCH64_TYPE_BUILDER (CI) \ + AARCH64_TYPE_BUILDER (XI) \ + AARCH64_TYPE_BUILDER (TI) \ - /* Scalar type nodes. */ - tree aarch64_simd_intQI_type_node; - tree aarch64_simd_intHI_type_node; - tree aarch64_simd_polyQI_type_node; - tree aarch64_simd_polyHI_type_node; - tree aarch64_simd_intSI_type_node; - tree aarch64_simd_intDI_type_node; - tree aarch64_simd_float_type_node; - tree aarch64_simd_double_type_node; +/* Statically declare all the possible types we might need. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_p = NULL; \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; - /* Pointer to scalar type nodes. */ - tree intQI_pointer_node; - tree intHI_pointer_node; - tree intSI_pointer_node; - tree intDI_pointer_node; - tree float_pointer_node; - tree double_pointer_node; + INT_TYPES - /* Const scalar type nodes. */ - tree const_intQI_node; - tree const_intHI_node; - tree const_intSI_node; - tree const_intDI_node; - tree const_float_node; - tree const_double_node; + static tree float_aarch64_type_node = NULL; + static tree double_aarch64_type_node = NULL; - /* Pointer to const scalar type nodes. */ - tree const_intQI_pointer_node; - tree const_intHI_pointer_node; - tree const_intSI_pointer_node; - tree const_intDI_pointer_node; - tree const_float_pointer_node; - tree const_double_pointer_node; + gcc_assert (!VECTOR_MODE_P (mode)); - /* Vector type nodes. */ - tree V8QI_type_node; - tree V4HI_type_node; - tree V2SI_type_node; - tree V2SF_type_node; - tree V16QI_type_node; - tree V8HI_type_node; - tree V4SI_type_node; - tree V4SF_type_node; - tree V2DI_type_node; - tree V2DF_type_node; +/* If we've already initialised this type, don't initialise it again, + otherwise ask for a new type of the correct size. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return (X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else if (poly_p) \ + return (X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else \ + return (X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = make_signed_type (GET_MODE_PRECISION (mode))); \ + break; - /* Scalar unsigned type nodes. */ - tree intUQI_type_node; - tree intUHI_type_node; - tree intUSI_type_node; - tree intUDI_type_node; + switch (mode) + { + INT_TYPES + case SFmode: + if (!float_aarch64_type_node) + { + float_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (float_aarch64_type_node) = FLOAT_TYPE_SIZE; + layout_type (float_aarch64_type_node); + } + return float_aarch64_type_node; + break; + case DFmode: + if (!double_aarch64_type_node) + { + double_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (double_aarch64_type_node) = DOUBLE_TYPE_SIZE; + layout_type (double_aarch64_type_node); + } + return double_aarch64_type_node; + break; + default: + gcc_unreachable (); + } +} - /* Opaque integer types for structures of vectors. */ - tree intEI_type_node; - tree intOI_type_node; - tree intCI_type_node; - tree intXI_type_node; +tree +aarch64_build_vector_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) +{ + tree eltype; - /* Pointer to vector type nodes. */ - tree V8QI_pointer_node; - tree V4HI_pointer_node; - tree V2SI_pointer_node; - tree V2SF_pointer_node; - tree V16QI_pointer_node; - tree V8HI_pointer_node; - tree V4SI_pointer_node; - tree V4SF_pointer_node; - tree V2DI_pointer_node; - tree V2DF_pointer_node; +#define VECTOR_TYPES \ + AARCH64_TYPE_BUILDER (V16QI) \ + AARCH64_TYPE_BUILDER (V8HI) \ + AARCH64_TYPE_BUILDER (V4SI) \ + AARCH64_TYPE_BUILDER (V2DI) \ + AARCH64_TYPE_BUILDER (V8QI) \ + AARCH64_TYPE_BUILDER (V4HI) \ + AARCH64_TYPE_BUILDER (V2SI) \ + \ + AARCH64_TYPE_BUILDER (V4SF) \ + AARCH64_TYPE_BUILDER (V2DF) \ + AARCH64_TYPE_BUILDER (V2SF) \ +/* Declare our "cache" of values. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; \ + static tree X##_aarch64_type_node_p = NULL; - /* Operations which return results as pairs. */ - tree void_ftype_pv8qi_v8qi_v8qi; - tree void_ftype_pv4hi_v4hi_v4hi; - tree void_ftype_pv2si_v2si_v2si; - tree void_ftype_pv2sf_v2sf_v2sf; - tree void_ftype_pdi_di_di; - tree void_ftype_pv16qi_v16qi_v16qi; - tree void_ftype_pv8hi_v8hi_v8hi; - tree void_ftype_pv4si_v4si_v4si; - tree void_ftype_pv4sf_v4sf_v4sf; - tree void_ftype_pv2di_v2di_v2di; - tree void_ftype_pv2df_v2df_v2df; + VECTOR_TYPES - tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; - tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; - tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; + gcc_assert (VECTOR_MODE_P (mode)); - /* Create distinguished type nodes for AARCH64_SIMD vector element types, - and pointers to values of such types, so we can detect them later. */ - aarch64_simd_intQI_type_node = - make_signed_type (GET_MODE_PRECISION (QImode)); - aarch64_simd_intHI_type_node = - make_signed_type (GET_MODE_PRECISION (HImode)); - aarch64_simd_polyQI_type_node = - make_signed_type (GET_MODE_PRECISION (QImode)); - aarch64_simd_polyHI_type_node = - make_signed_type (GET_MODE_PRECISION (HImode)); - aarch64_simd_intSI_type_node = - make_signed_type (GET_MODE_PRECISION (SImode)); - aarch64_simd_intDI_type_node = - make_signed_type (GET_MODE_PRECISION (DImode)); - aarch64_simd_float_type_node = make_node (REAL_TYPE); - aarch64_simd_double_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (aarch64_simd_float_type_node) = FLOAT_TYPE_SIZE; - TYPE_PRECISION (aarch64_simd_double_type_node) = DOUBLE_TYPE_SIZE; - layout_type (aarch64_simd_float_type_node); - layout_type (aarch64_simd_double_type_node); +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else if (poly_p) \ + return X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else \ + return X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + break; + switch (mode) + { + default: + eltype = aarch64_build_scalar_type (GET_MODE_INNER (mode), + unsigned_p, poly_p); + return build_vector_type_for_mode (eltype, mode); + break; + VECTOR_TYPES + } +} + +tree +aarch64_build_type (enum machine_mode mode, bool unsigned_p, bool poly_p) +{ + if (VECTOR_MODE_P (mode)) + return aarch64_build_vector_type (mode, unsigned_p, poly_p); + else + return aarch64_build_scalar_type (mode, unsigned_p, poly_p); +} + +tree +aarch64_build_signed_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, false); +} + +tree +aarch64_build_unsigned_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, true, false); +} + +tree +aarch64_build_poly_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, true); +} + +static void +aarch64_init_simd_builtins (void) +{ + unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1; + + /* Signed scalar type nodes. */ + tree aarch64_simd_intQI_type_node = aarch64_build_signed_type (QImode); + tree aarch64_simd_intHI_type_node = aarch64_build_signed_type (HImode); + tree aarch64_simd_intSI_type_node = aarch64_build_signed_type (SImode); + tree aarch64_simd_intDI_type_node = aarch64_build_signed_type (DImode); + tree aarch64_simd_intTI_type_node = aarch64_build_signed_type (TImode); + tree aarch64_simd_intEI_type_node = aarch64_build_signed_type (EImode); + tree aarch64_simd_intOI_type_node = aarch64_build_signed_type (OImode); + tree aarch64_simd_intCI_type_node = aarch64_build_signed_type (CImode); + tree aarch64_simd_intXI_type_node = aarch64_build_signed_type (XImode); + + /* Unsigned scalar type nodes. */ + tree aarch64_simd_intUQI_type_node = aarch64_build_unsigned_type (QImode); + tree aarch64_simd_intUHI_type_node = aarch64_build_unsigned_type (HImode); + tree aarch64_simd_intUSI_type_node = aarch64_build_unsigned_type (SImode); + tree aarch64_simd_intUDI_type_node = aarch64_build_unsigned_type (DImode); + + /* Poly scalar type nodes. */ + tree aarch64_simd_polyQI_type_node = aarch64_build_poly_type (QImode); + tree aarch64_simd_polyHI_type_node = aarch64_build_poly_type (HImode); + tree aarch64_simd_polyDI_type_node = aarch64_build_poly_type (DImode); + tree aarch64_simd_polyTI_type_node = aarch64_build_poly_type (TImode); + + /* Float type nodes. */ + tree aarch64_simd_float_type_node = aarch64_build_signed_type (SFmode); + tree aarch64_simd_double_type_node = aarch64_build_signed_type (DFmode); + /* Define typedefs which exactly correspond to the modes we are basing vector types on. If you change these names you'll need to change the table used by aarch64_mangle_type too. */ @@ -441,518 +569,139 @@ "__builtin_aarch64_simd_poly8"); (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node, "__builtin_aarch64_simd_poly16"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyDI_type_node, + "__builtin_aarch64_simd_poly64"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyTI_type_node, + "__builtin_aarch64_simd_poly128"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node, + "__builtin_aarch64_simd_ti"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node, + "__builtin_aarch64_simd_ei"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intOI_type_node, + "__builtin_aarch64_simd_oi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intCI_type_node, + "__builtin_aarch64_simd_ci"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intXI_type_node, + "__builtin_aarch64_simd_xi"); - intQI_pointer_node = build_pointer_type (aarch64_simd_intQI_type_node); - intHI_pointer_node = build_pointer_type (aarch64_simd_intHI_type_node); - intSI_pointer_node = build_pointer_type (aarch64_simd_intSI_type_node); - intDI_pointer_node = build_pointer_type (aarch64_simd_intDI_type_node); - float_pointer_node = build_pointer_type (aarch64_simd_float_type_node); - double_pointer_node = build_pointer_type (aarch64_simd_double_type_node); - - /* Next create constant-qualified versions of the above types. */ - const_intQI_node = build_qualified_type (aarch64_simd_intQI_type_node, - TYPE_QUAL_CONST); - const_intHI_node = build_qualified_type (aarch64_simd_intHI_type_node, - TYPE_QUAL_CONST); - const_intSI_node = build_qualified_type (aarch64_simd_intSI_type_node, - TYPE_QUAL_CONST); - const_intDI_node = build_qualified_type (aarch64_simd_intDI_type_node, - TYPE_QUAL_CONST); - const_float_node = build_qualified_type (aarch64_simd_float_type_node, - TYPE_QUAL_CONST); - const_double_node = build_qualified_type (aarch64_simd_double_type_node, - TYPE_QUAL_CONST); - - const_intQI_pointer_node = build_pointer_type (const_intQI_node); - const_intHI_pointer_node = build_pointer_type (const_intHI_node); - const_intSI_pointer_node = build_pointer_type (const_intSI_node); - const_intDI_pointer_node = build_pointer_type (const_intDI_node); - const_float_pointer_node = build_pointer_type (const_float_node); - const_double_pointer_node = build_pointer_type (const_double_node); - - /* Now create vector types based on our AARCH64 SIMD element types. */ - /* 64-bit vectors. */ - V8QI_type_node = - build_vector_type_for_mode (aarch64_simd_intQI_type_node, V8QImode); - V4HI_type_node = - build_vector_type_for_mode (aarch64_simd_intHI_type_node, V4HImode); - V2SI_type_node = - build_vector_type_for_mode (aarch64_simd_intSI_type_node, V2SImode); - V2SF_type_node = - build_vector_type_for_mode (aarch64_simd_float_type_node, V2SFmode); - /* 128-bit vectors. */ - V16QI_type_node = - build_vector_type_for_mode (aarch64_simd_intQI_type_node, V16QImode); - V8HI_type_node = - build_vector_type_for_mode (aarch64_simd_intHI_type_node, V8HImode); - V4SI_type_node = - build_vector_type_for_mode (aarch64_simd_intSI_type_node, V4SImode); - V4SF_type_node = - build_vector_type_for_mode (aarch64_simd_float_type_node, V4SFmode); - V2DI_type_node = - build_vector_type_for_mode (aarch64_simd_intDI_type_node, V2DImode); - V2DF_type_node = - build_vector_type_for_mode (aarch64_simd_double_type_node, V2DFmode); - /* Unsigned integer types for various mode sizes. */ - intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); - intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); - intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); - intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); - - (*lang_hooks.types.register_builtin_type) (intUQI_type_node, + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUQI_type_node, "__builtin_aarch64_simd_uqi"); - (*lang_hooks.types.register_builtin_type) (intUHI_type_node, + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUHI_type_node, "__builtin_aarch64_simd_uhi"); - (*lang_hooks.types.register_builtin_type) (intUSI_type_node, + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUSI_type_node, "__builtin_aarch64_simd_usi"); - (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUDI_type_node, "__builtin_aarch64_simd_udi"); - /* Opaque integer types for structures of vectors. */ - intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); - intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode)); - intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode)); - intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode)); - - (*lang_hooks.types.register_builtin_type) (intTI_type_node, - "__builtin_aarch64_simd_ti"); - (*lang_hooks.types.register_builtin_type) (intEI_type_node, - "__builtin_aarch64_simd_ei"); - (*lang_hooks.types.register_builtin_type) (intOI_type_node, - "__builtin_aarch64_simd_oi"); - (*lang_hooks.types.register_builtin_type) (intCI_type_node, - "__builtin_aarch64_simd_ci"); - (*lang_hooks.types.register_builtin_type) (intXI_type_node, - "__builtin_aarch64_simd_xi"); - - /* Pointers to vector types. */ - V8QI_pointer_node = build_pointer_type (V8QI_type_node); - V4HI_pointer_node = build_pointer_type (V4HI_type_node); - V2SI_pointer_node = build_pointer_type (V2SI_type_node); - V2SF_pointer_node = build_pointer_type (V2SF_type_node); - V16QI_pointer_node = build_pointer_type (V16QI_type_node); - V8HI_pointer_node = build_pointer_type (V8HI_type_node); - V4SI_pointer_node = build_pointer_type (V4SI_type_node); - V4SF_pointer_node = build_pointer_type (V4SF_type_node); - V2DI_pointer_node = build_pointer_type (V2DI_type_node); - V2DF_pointer_node = build_pointer_type (V2DF_type_node); - - /* Operations which return results as pairs. */ - void_ftype_pv8qi_v8qi_v8qi = - build_function_type_list (void_type_node, V8QI_pointer_node, - V8QI_type_node, V8QI_type_node, NULL); - void_ftype_pv4hi_v4hi_v4hi = - build_function_type_list (void_type_node, V4HI_pointer_node, - V4HI_type_node, V4HI_type_node, NULL); - void_ftype_pv2si_v2si_v2si = - build_function_type_list (void_type_node, V2SI_pointer_node, - V2SI_type_node, V2SI_type_node, NULL); - void_ftype_pv2sf_v2sf_v2sf = - build_function_type_list (void_type_node, V2SF_pointer_node, - V2SF_type_node, V2SF_type_node, NULL); - void_ftype_pdi_di_di = - build_function_type_list (void_type_node, intDI_pointer_node, - aarch64_simd_intDI_type_node, - aarch64_simd_intDI_type_node, NULL); - void_ftype_pv16qi_v16qi_v16qi = - build_function_type_list (void_type_node, V16QI_pointer_node, - V16QI_type_node, V16QI_type_node, NULL); - void_ftype_pv8hi_v8hi_v8hi = - build_function_type_list (void_type_node, V8HI_pointer_node, - V8HI_type_node, V8HI_type_node, NULL); - void_ftype_pv4si_v4si_v4si = - build_function_type_list (void_type_node, V4SI_pointer_node, - V4SI_type_node, V4SI_type_node, NULL); - void_ftype_pv4sf_v4sf_v4sf = - build_function_type_list (void_type_node, V4SF_pointer_node, - V4SF_type_node, V4SF_type_node, NULL); - void_ftype_pv2di_v2di_v2di = - build_function_type_list (void_type_node, V2DI_pointer_node, - V2DI_type_node, V2DI_type_node, NULL); - void_ftype_pv2df_v2df_v2df = - build_function_type_list (void_type_node, V2DF_pointer_node, - V2DF_type_node, V2DF_type_node, NULL); - - dreg_types[0] = V8QI_type_node; - dreg_types[1] = V4HI_type_node; - dreg_types[2] = V2SI_type_node; - dreg_types[3] = V2SF_type_node; - dreg_types[4] = aarch64_simd_intDI_type_node; - dreg_types[5] = aarch64_simd_double_type_node; - - qreg_types[0] = V16QI_type_node; - qreg_types[1] = V8HI_type_node; - qreg_types[2] = V4SI_type_node; - qreg_types[3] = V4SF_type_node; - qreg_types[4] = V2DI_type_node; - qreg_types[5] = V2DF_type_node; - - /* If NUM_DREG_TYPES != NUM_QREG_TYPES, we will need separate nested loops - for qreg and dreg reinterp inits. */ - for (i = 0; i < NUM_DREG_TYPES; i++) - { - int j; - for (j = 0; j < NUM_DREG_TYPES; j++) - { - reinterp_ftype_dreg[i][j] - = build_function_type_list (dreg_types[i], dreg_types[j], NULL); - reinterp_ftype_qreg[i][j] - = build_function_type_list (qreg_types[i], qreg_types[j], NULL); - } - } - for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) { + bool print_type_signature_p = false; + char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; const char *const modenames[] = - { - "v8qi", "v4hi", "v2si", "v2sf", "di", "df", - "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", - "ti", "ei", "oi", "xi", "si", "hi", "qi" - }; + { + "v8qi", "v4hi", "v2si", "v2sf", "di", "df", + "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", + "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi" + }; + const enum machine_mode modes[] = + { + V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode, + V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode, + V2DFmode, TImode, EImode, OImode, XImode, SImode, + SFmode, HImode, QImode + }; char namebuf[60]; tree ftype = NULL; tree fndecl = NULL; - int is_load = 0; - int is_store = 0; gcc_assert (ARRAY_SIZE (modenames) == T_MAX); d->fcode = fcode; - switch (d->itype) + /* We must track two variables here. op_num is + the operand number as in the RTL pattern. This is + required to access the mode (e.g. V4SF mode) of the + argument, from which the base type can be derived. + arg_num is an index in to the qualifiers data, which + gives qualifiers to the type (e.g. const unsigned). + The reason these two variables may differ by one is the + void return type. While all return types take the 0th entry + in the qualifiers array, there is no operand for them in the + RTL pattern. */ + int op_num = insn_data[d->code].n_operands - 1; + int arg_num = d->qualifiers[0] & qualifier_void + ? op_num + 1 + : op_num; + tree return_type = void_type_node, args = void_list_node; + tree eltype; + + /* Build a function type directly from the insn_data for this + builtin. The build_function_type () function takes care of + removing duplicates for us. */ + for (; op_num >= 0; arg_num--, op_num--) { - case AARCH64_SIMD_LOAD1: - case AARCH64_SIMD_LOAD1LANE: - case AARCH64_SIMD_LOADSTRUCT: - case AARCH64_SIMD_LOADSTRUCTLANE: - is_load = 1; - /* Fall through. */ - case AARCH64_SIMD_STORE1: - case AARCH64_SIMD_STORE1LANE: - case AARCH64_SIMD_STORESTRUCT: - case AARCH64_SIMD_STORESTRUCTLANE: - if (!is_load) - is_store = 1; - /* Fall through. */ - case AARCH64_SIMD_UNOP: - case AARCH64_SIMD_BINOP: - case AARCH64_SIMD_TERNOP: - case AARCH64_SIMD_QUADOP: - case AARCH64_SIMD_COMBINE: - case AARCH64_SIMD_CONVERT: - case AARCH64_SIMD_CREATE: - case AARCH64_SIMD_DUP: - case AARCH64_SIMD_DUPLANE: - case AARCH64_SIMD_FIXCONV: - case AARCH64_SIMD_GETLANE: - case AARCH64_SIMD_LANEMAC: - case AARCH64_SIMD_LANEMUL: - case AARCH64_SIMD_LANEMULH: - case AARCH64_SIMD_LANEMULL: - case AARCH64_SIMD_LOGICBINOP: - case AARCH64_SIMD_SCALARMAC: - case AARCH64_SIMD_SCALARMUL: - case AARCH64_SIMD_SCALARMULH: - case AARCH64_SIMD_SCALARMULL: - case AARCH64_SIMD_SELECT: - case AARCH64_SIMD_SETLANE: - case AARCH64_SIMD_SHIFTACC: - case AARCH64_SIMD_SHIFTIMM: - case AARCH64_SIMD_SHIFTINSERT: - case AARCH64_SIMD_SPLIT: - case AARCH64_SIMD_VTBL: - case AARCH64_SIMD_VTBX: - { - int k; - tree return_type = void_type_node, args = void_list_node; - tree eltype; - /* Build a function type directly from the insn_data for this - builtin. The build_function_type () function takes care of - removing duplicates for us. */ + enum machine_mode op_mode = insn_data[d->code].operand[op_num].mode; + enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; - for (k = insn_data[d->code].n_operands -1; k >= 0; k--) - { - /* Skip an internal operand for vget_{low, high}. */ - if (k == 2 && d->itype == AARCH64_SIMD_SPLIT) - continue; + if (qualifiers & qualifier_unsigned) + { + type_signature[arg_num] = 'u'; + print_type_signature_p = true; + } + else if (qualifiers & qualifier_poly) + { + type_signature[arg_num] = 'p'; + print_type_signature_p = true; + } + else + type_signature[arg_num] = 's'; - if (is_load && k == 1) - { - /* AdvSIMD load patterns always have the memory operand - (a DImode pointer) in the operand 1 position. We - want a const pointer to the element type in that - position. */ - gcc_assert (insn_data[d->code].operand[k].mode == DImode); + /* Skip an internal operand for vget_{low, high}. */ + if (qualifiers & qualifier_internal) + continue; - switch (d->mode) - { - case T_V8QI: - case T_V16QI: - eltype = const_intQI_pointer_node; - break; + /* Some builtins have different user-facing types + for certain arguments, encoded in d->mode. */ + if (qualifiers & qualifier_map_mode) + op_mode = modes[d->mode]; - case T_V4HI: - case T_V8HI: - eltype = const_intHI_pointer_node; - break; + /* For pointers, we want a pointer to the basic type + of the vector. */ + if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) + op_mode = GET_MODE_INNER (op_mode); - case T_V2SI: - case T_V4SI: - eltype = const_intSI_pointer_node; - break; + eltype = aarch64_build_type (op_mode, + qualifiers & qualifier_unsigned, + qualifiers & qualifier_poly); - case T_V2SF: - case T_V4SF: - eltype = const_float_pointer_node; - break; + /* Add qualifiers. */ + if (qualifiers & qualifier_const) + eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); - case T_DI: - case T_V2DI: - eltype = const_intDI_pointer_node; - break; + if (qualifiers & qualifier_pointer) + eltype = build_pointer_type (eltype); - case T_DF: - case T_V2DF: - eltype = const_double_pointer_node; - break; + /* If we have reached arg_num == 0, we are at a non-void + return type. Otherwise, we are still processing + arguments. */ + if (arg_num == 0) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } - default: - gcc_unreachable (); - } - } - else if (is_store && k == 0) - { - /* Similarly, AdvSIMD store patterns use operand 0 as - the memory location to store to (a DImode pointer). - Use a pointer to the element type of the store in - that position. */ - gcc_assert (insn_data[d->code].operand[k].mode == DImode); + ftype = build_function_type (return_type, args); - switch (d->mode) - { - case T_V8QI: - case T_V16QI: - eltype = intQI_pointer_node; - break; - - case T_V4HI: - case T_V8HI: - eltype = intHI_pointer_node; - break; - - case T_V2SI: - case T_V4SI: - eltype = intSI_pointer_node; - break; - - case T_V2SF: - case T_V4SF: - eltype = float_pointer_node; - break; - - case T_DI: - case T_V2DI: - eltype = intDI_pointer_node; - break; - - case T_DF: - case T_V2DF: - eltype = double_pointer_node; - break; - - default: - gcc_unreachable (); - } - } - else - { - switch (insn_data[d->code].operand[k].mode) - { - case VOIDmode: - eltype = void_type_node; - break; - /* Scalars. */ - case QImode: - eltype = aarch64_simd_intQI_type_node; - break; - case HImode: - eltype = aarch64_simd_intHI_type_node; - break; - case SImode: - eltype = aarch64_simd_intSI_type_node; - break; - case SFmode: - eltype = aarch64_simd_float_type_node; - break; - case DFmode: - eltype = aarch64_simd_double_type_node; - break; - case DImode: - eltype = aarch64_simd_intDI_type_node; - break; - case TImode: - eltype = intTI_type_node; - break; - case EImode: - eltype = intEI_type_node; - break; - case OImode: - eltype = intOI_type_node; - break; - case CImode: - eltype = intCI_type_node; - break; - case XImode: - eltype = intXI_type_node; - break; - /* 64-bit vectors. */ - case V8QImode: - eltype = V8QI_type_node; - break; - case V4HImode: - eltype = V4HI_type_node; - break; - case V2SImode: - eltype = V2SI_type_node; - break; - case V2SFmode: - eltype = V2SF_type_node; - break; - /* 128-bit vectors. */ - case V16QImode: - eltype = V16QI_type_node; - break; - case V8HImode: - eltype = V8HI_type_node; - break; - case V4SImode: - eltype = V4SI_type_node; - break; - case V4SFmode: - eltype = V4SF_type_node; - break; - case V2DImode: - eltype = V2DI_type_node; - break; - case V2DFmode: - eltype = V2DF_type_node; - break; - default: - gcc_unreachable (); - } - } - - if (k == 0 && !is_store) - return_type = eltype; - else - args = tree_cons (NULL_TREE, eltype, args); - } - ftype = build_function_type (return_type, args); - } - break; - - case AARCH64_SIMD_RESULTPAIR: - { - switch (insn_data[d->code].operand[1].mode) - { - case V8QImode: - ftype = void_ftype_pv8qi_v8qi_v8qi; - break; - case V4HImode: - ftype = void_ftype_pv4hi_v4hi_v4hi; - break; - case V2SImode: - ftype = void_ftype_pv2si_v2si_v2si; - break; - case V2SFmode: - ftype = void_ftype_pv2sf_v2sf_v2sf; - break; - case DImode: - ftype = void_ftype_pdi_di_di; - break; - case V16QImode: - ftype = void_ftype_pv16qi_v16qi_v16qi; - break; - case V8HImode: - ftype = void_ftype_pv8hi_v8hi_v8hi; - break; - case V4SImode: - ftype = void_ftype_pv4si_v4si_v4si; - break; - case V4SFmode: - ftype = void_ftype_pv4sf_v4sf_v4sf; - break; - case V2DImode: - ftype = void_ftype_pv2di_v2di_v2di; - break; - case V2DFmode: - ftype = void_ftype_pv2df_v2df_v2df; - break; - default: - gcc_unreachable (); - } - } - break; - - case AARCH64_SIMD_REINTERP: - { - /* We iterate over 6 doubleword types, then 6 quadword - types. */ - int rhs_d = d->mode % NUM_DREG_TYPES; - int rhs_q = (d->mode - NUM_DREG_TYPES) % NUM_QREG_TYPES; - switch (insn_data[d->code].operand[0].mode) - { - case V8QImode: - ftype = reinterp_ftype_dreg[0][rhs_d]; - break; - case V4HImode: - ftype = reinterp_ftype_dreg[1][rhs_d]; - break; - case V2SImode: - ftype = reinterp_ftype_dreg[2][rhs_d]; - break; - case V2SFmode: - ftype = reinterp_ftype_dreg[3][rhs_d]; - break; - case DImode: - ftype = reinterp_ftype_dreg[4][rhs_d]; - break; - case DFmode: - ftype = reinterp_ftype_dreg[5][rhs_d]; - break; - case V16QImode: - ftype = reinterp_ftype_qreg[0][rhs_q]; - break; - case V8HImode: - ftype = reinterp_ftype_qreg[1][rhs_q]; - break; - case V4SImode: - ftype = reinterp_ftype_qreg[2][rhs_q]; - break; - case V4SFmode: - ftype = reinterp_ftype_qreg[3][rhs_q]; - break; - case V2DImode: - ftype = reinterp_ftype_qreg[4][rhs_q]; - break; - case V2DFmode: - ftype = reinterp_ftype_qreg[5][rhs_q]; - break; - default: - gcc_unreachable (); - } - } - break; - - default: - gcc_unreachable (); - } gcc_assert (ftype != NULL); - snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s", - d->name, modenames[d->mode]); + if (print_type_signature_p) + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s", + d->name, modenames[d->mode], type_signature); + else + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s", + d->name, modenames[d->mode]); fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, NULL_TREE); @@ -983,8 +732,6 @@ SIMD_ARG_STOP } builtin_simd_arg; -#define SIMD_MAX_BUILTIN_ARGS 5 - static rtx aarch64_simd_expand_args (rtx target, int icode, int have_retval, tree exp, ...) @@ -1110,99 +857,58 @@ { aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)]; - aarch64_simd_itype itype = d->itype; enum insn_code icode = d->code; + builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS]; + int num_args = insn_data[d->code].n_operands; + int is_void = 0; + int k; - switch (itype) - { - case AARCH64_SIMD_UNOP: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_STOP); + is_void = !!(d->qualifiers[0] & qualifier_void); - case AARCH64_SIMD_BINOP: - { - rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1)); - /* Handle constants only if the predicate allows it. */ - bool op1_const_int_p = - (CONST_INT_P (arg2) - && (*insn_data[icode].operand[2].predicate) - (arg2, insn_data[icode].operand[2].mode)); - return aarch64_simd_expand_args - (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - op1_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG, - SIMD_ARG_STOP); - } + num_args += is_void; - case AARCH64_SIMD_TERNOP: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_STOP); + for (k = 1; k < num_args; k++) + { + /* We have four arrays of data, each indexed in a different fashion. + qualifiers - element 0 always describes the function return type. + operands - element 0 is either the operand for return value (if + the function has a non-void return type) or the operand for the + first argument. + expr_args - element 0 always holds the first argument. + args - element 0 is always used for the return type. */ + int qualifiers_k = k; + int operands_k = k - is_void; + int expr_args_k = k - 1; - case AARCH64_SIMD_QUADOP: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_STOP); - case AARCH64_SIMD_LOAD1: - case AARCH64_SIMD_LOADSTRUCT: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); + if (d->qualifiers[qualifiers_k] & qualifier_immediate) + args[k] = SIMD_ARG_CONSTANT; + else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) + { + rtx arg + = expand_normal (CALL_EXPR_ARG (exp, + (expr_args_k))); + /* Handle constants only if the predicate allows it. */ + bool op_const_int_p = + (CONST_INT_P (arg) + && (*insn_data[icode].operand[operands_k].predicate) + (arg, insn_data[icode].operand[operands_k].mode)); + args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; + } + else + args[k] = SIMD_ARG_COPY_TO_REG; - case AARCH64_SIMD_STORE1: - case AARCH64_SIMD_STORESTRUCT: - return aarch64_simd_expand_args (target, icode, 0, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); + } + args[k] = SIMD_ARG_STOP; - case AARCH64_SIMD_REINTERP: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); - - case AARCH64_SIMD_CREATE: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); - - case AARCH64_SIMD_COMBINE: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, SIMD_ARG_STOP); - - case AARCH64_SIMD_GETLANE: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_CONSTANT, - SIMD_ARG_STOP); - - case AARCH64_SIMD_SETLANE: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_CONSTANT, - SIMD_ARG_STOP); - - case AARCH64_SIMD_SHIFTIMM: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_CONSTANT, - SIMD_ARG_STOP); - - case AARCH64_SIMD_SHIFTACC: - case AARCH64_SIMD_SHIFTINSERT: - return aarch64_simd_expand_args (target, icode, 1, exp, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_COPY_TO_REG, - SIMD_ARG_CONSTANT, - SIMD_ARG_STOP); - - default: - gcc_unreachable (); - } + /* The interface to aarch64_simd_expand_args expects a 0 if + the function is void, and a 1 if it is not. */ + return aarch64_simd_expand_args + (target, icode, !is_void, exp, + args[1], + args[2], + args[3], + args[4], + SIMD_ARG_STOP); } /* Expand an expression EXP that calls a built-in function, @@ -1242,11 +948,11 @@ #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 #define AARCH64_FIND_FRINT_VARIANT(N) \ (AARCH64_CHECK_BUILTIN_MODE (2, D) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_##N##v2df] \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_##N##v4sf] \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_##N##v2sf] \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ : NULL_TREE))) if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) { @@ -1259,30 +965,82 @@ && in_mode == N##Fmode && in_n == C) case BUILT_IN_FLOOR: case BUILT_IN_FLOORF: - return AARCH64_FIND_FRINT_VARIANT (frintm); + return AARCH64_FIND_FRINT_VARIANT (floor); case BUILT_IN_CEIL: case BUILT_IN_CEILF: - return AARCH64_FIND_FRINT_VARIANT (frintp); + return AARCH64_FIND_FRINT_VARIANT (ceil); case BUILT_IN_TRUNC: case BUILT_IN_TRUNCF: - return AARCH64_FIND_FRINT_VARIANT (frintz); + return AARCH64_FIND_FRINT_VARIANT (btrunc); case BUILT_IN_ROUND: case BUILT_IN_ROUNDF: - return AARCH64_FIND_FRINT_VARIANT (frinta); + return AARCH64_FIND_FRINT_VARIANT (round); case BUILT_IN_NEARBYINT: case BUILT_IN_NEARBYINTF: - return AARCH64_FIND_FRINT_VARIANT (frinti); + return AARCH64_FIND_FRINT_VARIANT (nearbyint); case BUILT_IN_SQRT: case BUILT_IN_SQRTF: return AARCH64_FIND_FRINT_VARIANT (sqrt); #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == SImode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_CLZ: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; + return NULL_TREE; + } +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ && in_mode == N##Fmode && in_n == C) case BUILT_IN_LFLOOR: - return AARCH64_FIND_FRINT_VARIANT (fcvtms); + case BUILT_IN_IFLOORF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } case BUILT_IN_LCEIL: - return AARCH64_FIND_FRINT_VARIANT (fcvtps); + case BUILT_IN_ICEILF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + case BUILT_IN_LROUND: + case BUILT_IN_IROUNDF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + default: return NULL_TREE; } @@ -1290,5 +1048,160 @@ return NULL_TREE; } + +#undef VAR1 +#define VAR1(T, N, MAP, A) \ + case AARCH64_SIMD_BUILTIN_##T##_##N##A: + +tree +aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, + bool ignore ATTRIBUTE_UNUSED) +{ + int fcode = DECL_FUNCTION_CODE (fndecl); + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + + switch (fcode) + { + BUILTIN_VALLDI (UNOP, abs, 2) + return fold_build1 (ABS_EXPR, type, args[0]); + break; + BUILTIN_VALLDI (BINOP, cmge, 0) + return fold_build2 (GE_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmgt, 0) + return fold_build2 (GT_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmeq, 0) + return fold_build2 (EQ_EXPR, type, args[0], args[1]); + break; + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + { + tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]); + tree vec_zero_node = build_zero_cst (type); + return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); + break; + } + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + return fold_build1 (FLOAT_EXPR, type, args[0]); + default: + break; + } + + return NULL_TREE; +} + +bool +aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + bool changed = false; + gimple stmt = gsi_stmt (*gsi); + tree call = gimple_call_fn (stmt); + tree fndecl; + gimple new_stmt = NULL; + if (call) + { + fndecl = gimple_call_fndecl (stmt); + if (fndecl) + { + int fcode = DECL_FUNCTION_CODE (fndecl); + int nargs = gimple_call_num_args (stmt); + tree *args = (nargs > 0 + ? gimple_call_arg_ptr (stmt, 0) + : &error_mark_node); + + switch (fcode) + { + BUILTIN_VALL (UNOP, reduc_splus_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_PLUS_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MAX_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MIN_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + + default: + break; + } + } + } + + if (new_stmt) + { + gsi_replace (gsi, new_stmt, true); + changed = true; + } + + return changed; +} + #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT +#undef BUILTIN_DX +#undef BUILTIN_SDQ_I +#undef BUILTIN_SD_HSI +#undef BUILTIN_V2F +#undef BUILTIN_VALL +#undef BUILTIN_VB +#undef BUILTIN_VD +#undef BUILTIN_VDC +#undef BUILTIN_VDIC +#undef BUILTIN_VDN +#undef BUILTIN_VDQ +#undef BUILTIN_VDQF +#undef BUILTIN_VDQH +#undef BUILTIN_VDQHS +#undef BUILTIN_VDQIF +#undef BUILTIN_VDQM +#undef BUILTIN_VDQV +#undef BUILTIN_VDQ_BHSI +#undef BUILTIN_VDQ_I +#undef BUILTIN_VDW +#undef BUILTIN_VD_BHSI +#undef BUILTIN_VD_HSI +#undef BUILTIN_VD_RE +#undef BUILTIN_VQ +#undef BUILTIN_VQN +#undef BUILTIN_VQW +#undef BUILTIN_VQ_HSI +#undef BUILTIN_VQ_S +#undef BUILTIN_VSDQ_HSI +#undef BUILTIN_VSDQ_I +#undef BUILTIN_VSDQ_I_BHSI +#undef BUILTIN_VSDQ_I_DI +#undef BUILTIN_VSD_HSI +#undef BUILTIN_VSQN_HSDI +#undef BUILTIN_VSTRUCT +#undef CF0 +#undef CF1 +#undef CF2 +#undef CF3 +#undef CF4 +#undef CF10 +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 +#undef VAR11 + --- a/src/gcc/config/aarch64/aarch64-protos.h +++ b/src/gcc/config/aarch64/aarch64-protos.h @@ -68,6 +68,24 @@ Each of of these represents a thread-local symbol, and corresponds to the thread local storage relocation operator for the symbol being referred to. + SYMBOL_TINY_ABSOLUTE + + Generate symbol accesses as a PC relative address using a single + instruction. To compute the address of symbol foo, we generate: + + ADR x0, foo + + SYMBOL_TINY_GOT + + Generate symbol accesses via the GOT using a single PC relative + instruction. To compute the address of symbol foo, we generate: + + ldr t0, :got:foo + + The value of foo can subsequently read using: + + ldrb t0, [t0] + SYMBOL_FORCE_TO_MEM : Global variables are addressed using constant pool. All variable addresses are spilled into constant pools. The constant pools themselves are addressed using PC @@ -81,6 +99,8 @@ SYMBOL_SMALL_TLSDESC, SYMBOL_SMALL_GOTTPREL, SYMBOL_SMALL_TPREL, + SYMBOL_TINY_ABSOLUTE, + SYMBOL_TINY_GOT, SYMBOL_FORCE_TO_MEM }; @@ -126,25 +146,55 @@ const int FP2FP; }; +/* Cost for vector insn classes. */ +struct cpu_vector_cost +{ + const int scalar_stmt_cost; /* Cost of any scalar operation, + excluding load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, + excluding load, store, + vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector + operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector store. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch. */ + const int cond_not_taken_branch_cost; /* Cost of not taken branch. */ +}; + struct tune_params { const struct cpu_rtx_cost_table *const insn_extra_cost; const struct cpu_addrcost_table *const addr_cost; const struct cpu_regmove_cost *const regmove_cost; + const struct cpu_vector_cost *const vec_costs; const int memmov_cost; }; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); bool aarch64_constant_address_p (rtx); bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); bool aarch64_gen_movmemqi (rtx *); +bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); bool aarch64_is_long_call_p (rtx); bool aarch64_label_mentioned_p (rtx); bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); +bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, + enum machine_mode); +char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); +char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); bool aarch64_pad_arg_upward (enum machine_mode, const_tree); bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); @@ -151,10 +201,11 @@ bool aarch64_regno_ok_for_index_p (int, bool); bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); +bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool); +bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool, + struct simd_immediate_info *); bool aarch64_symbolic_address_p (rtx); -bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context, - enum aarch64_symbol_type *); bool aarch64_uimm12_shift (HOST_WIDE_INT); const char *aarch64_output_casesi (rtx *); enum aarch64_symbol_type aarch64_classify_symbol (rtx, @@ -165,9 +216,6 @@ int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); int aarch64_hard_regno_nregs (unsigned, enum machine_mode); int aarch64_simd_attr_length_move (rtx); -int aarch64_simd_immediate_valid_for_move (rtx, enum machine_mode, rtx *, - int *, unsigned char *, int *, - int *); int aarch64_uxt_size (int, HOST_WIDE_INT); rtx aarch64_final_eh_return_addr (void); rtx aarch64_legitimize_reload_address (rtx *, enum machine_mode, int, int, int); @@ -177,6 +225,7 @@ bool aarch64_simd_mem_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool); rtx aarch64_tls_get_addr (void); +tree aarch64_fold_builtin (tree, int, tree *, bool); unsigned aarch64_dbx_register_number (unsigned); unsigned aarch64_trampoline_size (void); void aarch64_asm_output_labelref (FILE *, const char *); @@ -216,6 +265,10 @@ bool aarch64_split_128bit_move_p (rtx, rtx); +void aarch64_split_simd_combine (rtx, rtx, rtx); + +void aarch64_split_simd_move (rtx, rtx); + /* Check for a legitimate floating point constant for FMOV. */ bool aarch64_float_const_representable_p (rtx); @@ -249,6 +302,4 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); - -char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned); #endif /* GCC_AARCH64_PROTOS_H */ --- a/src/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def @@ -18,248 +18,367 @@ along with GCC; see the file COPYING3. If not see . */ -/* In the list below, the BUILTIN_ macros should - correspond to the iterator used to construct the instruction's - patterns in aarch64-simd.md. A helpful idiom to follow when - adding new builtins is to add a line for each pattern in the md - file. Thus, ADDP, which has one pattern defined for the VD_BHSI - iterator, and one for DImode, has two entries below. */ +/* In the list below, the BUILTIN_ macros expand to create + builtins for each of the modes described by . When adding + new builtins to this list, a helpful idiom to follow is to add + a line for each pattern in the md file. Thus, ADDP, which has one + pattern defined for the VD_BHSI iterator, and one for DImode, has two + entries below. - BUILTIN_VD_RE (CREATE, create) - BUILTIN_VQ_S (GETLANE, get_lane_signed) - BUILTIN_VDQ (GETLANE, get_lane_unsigned) - BUILTIN_VDQF (GETLANE, get_lane) - VAR1 (GETLANE, get_lane, di) - BUILTIN_VDC (COMBINE, combine) - BUILTIN_VB (BINOP, pmul) - BUILTIN_VDQF (UNOP, sqrt) - BUILTIN_VD_BHSI (BINOP, addp) - VAR1 (UNOP, addp, di) + Parameter 1 is the 'type' of the intrinsic. This is used to + describe the type modifiers (for example; unsigned) applied to + each of the parameters to the intrinsic function. - BUILTIN_VD_RE (REINTERP, reinterpretdi) - BUILTIN_VDC (REINTERP, reinterpretv8qi) - BUILTIN_VDC (REINTERP, reinterpretv4hi) - BUILTIN_VDC (REINTERP, reinterpretv2si) - BUILTIN_VDC (REINTERP, reinterpretv2sf) - BUILTIN_VQ (REINTERP, reinterpretv16qi) - BUILTIN_VQ (REINTERP, reinterpretv8hi) - BUILTIN_VQ (REINTERP, reinterpretv4si) - BUILTIN_VQ (REINTERP, reinterpretv4sf) - BUILTIN_VQ (REINTERP, reinterpretv2di) - BUILTIN_VQ (REINTERP, reinterpretv2df) + Parameter 2 is the name of the intrinsic. This is appended + to `__builtin_aarch64_` to give the intrinsic name + as exported to the front-ends. - BUILTIN_VDQ_I (BINOP, dup_lane) - BUILTIN_SDQ_I (BINOP, dup_lane) + Parameter 3 describes how to map from the name to the CODE_FOR_ + macro holding the RTL pattern for the intrinsic. This mapping is: + 0 - CODE_FOR_aarch64_ + 1-9 - CODE_FOR_<1-9> + 10 - CODE_FOR_. */ + + BUILTIN_VD_RE (CREATE, create, 0) + BUILTIN_VDC (COMBINE, combine, 0) + BUILTIN_VB (BINOP, pmul, 0) + BUILTIN_VDQF (UNOP, sqrt, 2) + BUILTIN_VD_BHSI (BINOP, addp, 0) + VAR1 (UNOP, addp, 0, di) + VAR1 (UNOP, clz, 2, v4si) + + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) + BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) + BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) + BUILTIN_VDC (REINTERP, reinterpretv2si, 0) + BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) + BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) + BUILTIN_VQ (REINTERP, reinterpretv4si, 0) + BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv2di, 0) + BUILTIN_VQ (REINTERP, reinterpretv2df, 0) + + BUILTIN_VDQ_I (BINOP, dup_lane, 0) /* Implemented by aarch64_qshl. */ - BUILTIN_VSDQ_I (BINOP, sqshl) - BUILTIN_VSDQ_I (BINOP, uqshl) - BUILTIN_VSDQ_I (BINOP, sqrshl) - BUILTIN_VSDQ_I (BINOP, uqrshl) + BUILTIN_VSDQ_I (BINOP, sqshl, 0) + BUILTIN_VSDQ_I (BINOP, uqshl, 0) + BUILTIN_VSDQ_I (BINOP, sqrshl, 0) + BUILTIN_VSDQ_I (BINOP, uqrshl, 0) /* Implemented by aarch64_. */ - BUILTIN_VSDQ_I (BINOP, sqadd) - BUILTIN_VSDQ_I (BINOP, uqadd) - BUILTIN_VSDQ_I (BINOP, sqsub) - BUILTIN_VSDQ_I (BINOP, uqsub) + BUILTIN_VSDQ_I (BINOP, sqadd, 0) + BUILTIN_VSDQ_I (BINOP, uqadd, 0) + BUILTIN_VSDQ_I (BINOP, sqsub, 0) + BUILTIN_VSDQ_I (BINOP, uqsub, 0) /* Implemented by aarch64_qadd. */ - BUILTIN_VSDQ_I (BINOP, suqadd) - BUILTIN_VSDQ_I (BINOP, usqadd) + BUILTIN_VSDQ_I (BINOP, suqadd, 0) + BUILTIN_VSDQ_I (BINOP, usqadd, 0) /* Implemented by aarch64_get_dreg. */ - BUILTIN_VDC (GETLANE, get_dregoi) - BUILTIN_VDC (GETLANE, get_dregci) - BUILTIN_VDC (GETLANE, get_dregxi) + BUILTIN_VDC (GETLANE, get_dregoi, 0) + BUILTIN_VDC (GETLANE, get_dregci, 0) + BUILTIN_VDC (GETLANE, get_dregxi, 0) /* Implemented by aarch64_get_qreg. */ - BUILTIN_VQ (GETLANE, get_qregoi) - BUILTIN_VQ (GETLANE, get_qregci) - BUILTIN_VQ (GETLANE, get_qregxi) + BUILTIN_VQ (GETLANE, get_qregoi, 0) + BUILTIN_VQ (GETLANE, get_qregci, 0) + BUILTIN_VQ (GETLANE, get_qregxi, 0) /* Implemented by aarch64_set_qreg. */ - BUILTIN_VQ (SETLANE, set_qregoi) - BUILTIN_VQ (SETLANE, set_qregci) - BUILTIN_VQ (SETLANE, set_qregxi) + BUILTIN_VQ (SETLANE, set_qregoi, 0) + BUILTIN_VQ (SETLANE, set_qregci, 0) + BUILTIN_VQ (SETLANE, set_qregxi, 0) /* Implemented by aarch64_ld. */ - BUILTIN_VDC (LOADSTRUCT, ld2) - BUILTIN_VDC (LOADSTRUCT, ld3) - BUILTIN_VDC (LOADSTRUCT, ld4) + BUILTIN_VDC (LOADSTRUCT, ld2, 0) + BUILTIN_VDC (LOADSTRUCT, ld3, 0) + BUILTIN_VDC (LOADSTRUCT, ld4, 0) /* Implemented by aarch64_ld. */ - BUILTIN_VQ (LOADSTRUCT, ld2) - BUILTIN_VQ (LOADSTRUCT, ld3) - BUILTIN_VQ (LOADSTRUCT, ld4) + BUILTIN_VQ (LOADSTRUCT, ld2, 0) + BUILTIN_VQ (LOADSTRUCT, ld3, 0) + BUILTIN_VQ (LOADSTRUCT, ld4, 0) /* Implemented by aarch64_st. */ - BUILTIN_VDC (STORESTRUCT, st2) - BUILTIN_VDC (STORESTRUCT, st3) - BUILTIN_VDC (STORESTRUCT, st4) + BUILTIN_VDC (STORESTRUCT, st2, 0) + BUILTIN_VDC (STORESTRUCT, st3, 0) + BUILTIN_VDC (STORESTRUCT, st4, 0) /* Implemented by aarch64_st. */ - BUILTIN_VQ (STORESTRUCT, st2) - BUILTIN_VQ (STORESTRUCT, st3) - BUILTIN_VQ (STORESTRUCT, st4) + BUILTIN_VQ (STORESTRUCT, st2, 0) + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) - BUILTIN_VQW (BINOP, saddl2) - BUILTIN_VQW (BINOP, uaddl2) - BUILTIN_VQW (BINOP, ssubl2) - BUILTIN_VQW (BINOP, usubl2) - BUILTIN_VQW (BINOP, saddw2) - BUILTIN_VQW (BINOP, uaddw2) - BUILTIN_VQW (BINOP, ssubw2) - BUILTIN_VQW (BINOP, usubw2) + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) + BUILTIN_VQW (BINOP, ssubl2, 0) + BUILTIN_VQW (BINOP, usubl2, 0) + BUILTIN_VQW (BINOP, saddw2, 0) + BUILTIN_VQW (BINOP, uaddw2, 0) + BUILTIN_VQW (BINOP, ssubw2, 0) + BUILTIN_VQW (BINOP, usubw2, 0) /* Implemented by aarch64_l. */ - BUILTIN_VDW (BINOP, saddl) - BUILTIN_VDW (BINOP, uaddl) - BUILTIN_VDW (BINOP, ssubl) - BUILTIN_VDW (BINOP, usubl) + BUILTIN_VDW (BINOP, saddl, 0) + BUILTIN_VDW (BINOP, uaddl, 0) + BUILTIN_VDW (BINOP, ssubl, 0) + BUILTIN_VDW (BINOP, usubl, 0) /* Implemented by aarch64_w. */ - BUILTIN_VDW (BINOP, saddw) - BUILTIN_VDW (BINOP, uaddw) - BUILTIN_VDW (BINOP, ssubw) - BUILTIN_VDW (BINOP, usubw) + BUILTIN_VDW (BINOP, saddw, 0) + BUILTIN_VDW (BINOP, uaddw, 0) + BUILTIN_VDW (BINOP, ssubw, 0) + BUILTIN_VDW (BINOP, usubw, 0) /* Implemented by aarch64_h. */ - BUILTIN_VQ_S (BINOP, shadd) - BUILTIN_VQ_S (BINOP, uhadd) - BUILTIN_VQ_S (BINOP, srhadd) - BUILTIN_VQ_S (BINOP, urhadd) + BUILTIN_VQ_S (BINOP, shadd, 0) + BUILTIN_VQ_S (BINOP, uhadd, 0) + BUILTIN_VQ_S (BINOP, srhadd, 0) + BUILTIN_VQ_S (BINOP, urhadd, 0) /* Implemented by aarch64_hn. */ - BUILTIN_VQN (BINOP, addhn) - BUILTIN_VQN (BINOP, raddhn) + BUILTIN_VQN (BINOP, addhn, 0) + BUILTIN_VQN (BINOP, raddhn, 0) /* Implemented by aarch64_hn2. */ - BUILTIN_VQN (TERNOP, addhn2) - BUILTIN_VQN (TERNOP, raddhn2) + BUILTIN_VQN (TERNOP, addhn2, 0) + BUILTIN_VQN (TERNOP, raddhn2, 0) - BUILTIN_VSQN_HSDI (UNOP, sqmovun) + BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0) /* Implemented by aarch64_qmovn. */ - BUILTIN_VSQN_HSDI (UNOP, sqmovn) - BUILTIN_VSQN_HSDI (UNOP, uqmovn) + BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) + BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) /* Implemented by aarch64_s. */ - BUILTIN_VSDQ_I_BHSI (UNOP, sqabs) - BUILTIN_VSDQ_I_BHSI (UNOP, sqneg) + BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) + BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) - BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane) - BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane) - BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq) - BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2) - BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane) - BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane) - BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq) - BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0) /* Implemented by aarch64_sqdmll. */ - BUILTIN_VSD_HSI (TERNOP, sqdmlal) - BUILTIN_VSD_HSI (TERNOP, sqdmlsl) + BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0) /* Implemented by aarch64_sqdmll_n. */ - BUILTIN_VD_HSI (TERNOP, sqdmlal_n) - BUILTIN_VD_HSI (TERNOP, sqdmlsl_n) + BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0) + BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0) - BUILTIN_VSD_HSI (BINOP, sqdmull) - BUILTIN_VSD_HSI (TERNOP, sqdmull_lane) - BUILTIN_VD_HSI (TERNOP, sqdmull_laneq) - BUILTIN_VD_HSI (BINOP, sqdmull_n) - BUILTIN_VQ_HSI (BINOP, sqdmull2) - BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane) - BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq) - BUILTIN_VQ_HSI (BINOP, sqdmull2_n) + BUILTIN_VSD_HSI (BINOP, sqdmull, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0) + BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0) + BUILTIN_VD_HSI (BINOP, sqdmull_n, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0) /* Implemented by aarch64_sqdmulh. */ - BUILTIN_VSDQ_HSI (BINOP, sqdmulh) - BUILTIN_VSDQ_HSI (BINOP, sqrdmulh) + BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0) + BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0) /* Implemented by aarch64_sqdmulh_lane. */ - BUILTIN_VDQHS (TERNOP, sqdmulh_lane) - BUILTIN_VDQHS (TERNOP, sqdmulh_laneq) - BUILTIN_VDQHS (TERNOP, sqrdmulh_lane) - BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq) - BUILTIN_SD_HSI (TERNOP, sqdmulh_lane) - BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane) + BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0) + BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0) + BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0) - BUILTIN_VSDQ_I_DI (BINOP, sshl_n) - BUILTIN_VSDQ_I_DI (BINOP, ushl_n) + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) /* Implemented by aarch64_shl. */ - BUILTIN_VSDQ_I_DI (BINOP, sshl) - BUILTIN_VSDQ_I_DI (BINOP, ushl) - BUILTIN_VSDQ_I_DI (BINOP, srshl) - BUILTIN_VSDQ_I_DI (BINOP, urshl) + BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) - BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n) - BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n) + BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3) + BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3) /* Implemented by aarch64_shr_n. */ - BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n) - BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n) + BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) /* Implemented by aarch64_sra_n. */ - BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n) + BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) /* Implemented by aarch64_shll_n. */ - BUILTIN_VDW (SHIFTIMM, sshll_n) - BUILTIN_VDW (SHIFTIMM, ushll_n) + BUILTIN_VDW (SHIFTIMM, sshll_n, 0) + BUILTIN_VDW (SHIFTIMM, ushll_n, 0) /* Implemented by aarch64_shll2_n. */ - BUILTIN_VQW (SHIFTIMM, sshll2_n) - BUILTIN_VQW (SHIFTIMM, ushll2_n) + BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) + BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) /* Implemented by aarch64_qshrn_n. */ - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) /* Implemented by aarch64_si_n. */ - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) /* Implemented by aarch64_qshl_n. */ - BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n) - BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n) - BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n) + BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) /* Implemented by aarch64_cm. */ - BUILTIN_VSDQ_I_DI (BINOP, cmeq) - BUILTIN_VSDQ_I_DI (BINOP, cmge) - BUILTIN_VSDQ_I_DI (BINOP, cmgt) - BUILTIN_VSDQ_I_DI (BINOP, cmle) - BUILTIN_VSDQ_I_DI (BINOP, cmlt) + BUILTIN_VALLDI (BINOP, cmeq, 0) + BUILTIN_VALLDI (BINOP, cmge, 0) + BUILTIN_VALLDI (BINOP, cmgt, 0) + BUILTIN_VALLDI (BINOP, cmle, 0) + BUILTIN_VALLDI (BINOP, cmlt, 0) /* Implemented by aarch64_cm. */ - BUILTIN_VSDQ_I_DI (BINOP, cmgeu) - BUILTIN_VSDQ_I_DI (BINOP, cmgtu) - BUILTIN_VSDQ_I_DI (BINOP, cmtst) + BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) - /* Implemented by aarch64_. */ - BUILTIN_VDQF (BINOP, fmax) - BUILTIN_VDQF (BINOP, fmin) - /* Implemented by aarch64_. */ - BUILTIN_VDQ_BHSI (BINOP, smax) - BUILTIN_VDQ_BHSI (BINOP, smin) - BUILTIN_VDQ_BHSI (BINOP, umax) - BUILTIN_VDQ_BHSI (BINOP, umin) + /* Implemented by reduc_plus_. */ + BUILTIN_VALL (UNOP, reduc_splus_, 10) + BUILTIN_VDQ (UNOP, reduc_uplus_, 10) - /* Implemented by aarch64_frint. */ - BUILTIN_VDQF (UNOP, frintz) - BUILTIN_VDQF (UNOP, frintp) - BUILTIN_VDQF (UNOP, frintm) - BUILTIN_VDQF (UNOP, frinti) - BUILTIN_VDQF (UNOP, frintx) - BUILTIN_VDQF (UNOP, frinta) + /* Implemented by reduc__. */ + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10) + BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10) + BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10) - /* Implemented by aarch64_fcvt. */ - BUILTIN_VDQF (UNOP, fcvtzs) - BUILTIN_VDQF (UNOP, fcvtzu) - BUILTIN_VDQF (UNOP, fcvtas) - BUILTIN_VDQF (UNOP, fcvtau) - BUILTIN_VDQF (UNOP, fcvtps) - BUILTIN_VDQF (UNOP, fcvtpu) - BUILTIN_VDQF (UNOP, fcvtms) - BUILTIN_VDQF (UNOP, fcvtmu) + /* Implemented by 3. + smax variants map to fmaxnm, + smax_nan variants map to fmax. */ + BUILTIN_VDQIF (BINOP, smax, 3) + BUILTIN_VDQIF (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, umax, 3) + BUILTIN_VDQ_BHSI (BINOP, umin, 3) + BUILTIN_VDQF (BINOP, smax_nan, 3) + BUILTIN_VDQF (BINOP, smin_nan, 3) + /* Implemented by 2. */ + BUILTIN_VDQF (UNOP, btrunc, 2) + BUILTIN_VDQF (UNOP, ceil, 2) + BUILTIN_VDQF (UNOP, floor, 2) + BUILTIN_VDQF (UNOP, nearbyint, 2) + BUILTIN_VDQF (UNOP, rint, 2) + BUILTIN_VDQF (UNOP, round, 2) + BUILTIN_VDQF (UNOP, frintn, 2) + + /* Implemented by l2. */ + VAR1 (UNOP, lbtruncv2sf, 2, v2si) + VAR1 (UNOP, lbtruncv4sf, 2, v4si) + VAR1 (UNOP, lbtruncv2df, 2, v2di) + + VAR1 (UNOP, lbtruncuv2sf, 2, v2si) + VAR1 (UNOP, lbtruncuv4sf, 2, v4si) + VAR1 (UNOP, lbtruncuv2df, 2, v2di) + + VAR1 (UNOP, lroundv2sf, 2, v2si) + VAR1 (UNOP, lroundv4sf, 2, v4si) + VAR1 (UNOP, lroundv2df, 2, v2di) + /* Implemented by l2. */ + VAR1 (UNOP, lroundsf, 2, si) + VAR1 (UNOP, lrounddf, 2, di) + + VAR1 (UNOP, lrounduv2sf, 2, v2si) + VAR1 (UNOP, lrounduv4sf, 2, v4si) + VAR1 (UNOP, lrounduv2df, 2, v2di) + VAR1 (UNOP, lroundusf, 2, si) + VAR1 (UNOP, lroundudf, 2, di) + + VAR1 (UNOP, lceilv2sf, 2, v2si) + VAR1 (UNOP, lceilv4sf, 2, v4si) + VAR1 (UNOP, lceilv2df, 2, v2di) + + VAR1 (UNOP, lceiluv2sf, 2, v2si) + VAR1 (UNOP, lceiluv4sf, 2, v4si) + VAR1 (UNOP, lceiluv2df, 2, v2di) + VAR1 (UNOP, lceilusf, 2, si) + VAR1 (UNOP, lceiludf, 2, di) + + VAR1 (UNOP, lfloorv2sf, 2, v2si) + VAR1 (UNOP, lfloorv4sf, 2, v4si) + VAR1 (UNOP, lfloorv2df, 2, v2di) + + VAR1 (UNOP, lflooruv2sf, 2, v2si) + VAR1 (UNOP, lflooruv4sf, 2, v4si) + VAR1 (UNOP, lflooruv2df, 2, v2di) + VAR1 (UNOP, lfloorusf, 2, si) + VAR1 (UNOP, lfloorudf, 2, di) + + VAR1 (UNOP, lfrintnv2sf, 2, v2si) + VAR1 (UNOP, lfrintnv4sf, 2, v4si) + VAR1 (UNOP, lfrintnv2df, 2, v2di) + VAR1 (UNOP, lfrintnsf, 2, si) + VAR1 (UNOP, lfrintndf, 2, di) + + VAR1 (UNOP, lfrintnuv2sf, 2, v2si) + VAR1 (UNOP, lfrintnuv4sf, 2, v4si) + VAR1 (UNOP, lfrintnuv2df, 2, v2di) + VAR1 (UNOP, lfrintnusf, 2, si) + VAR1 (UNOP, lfrintnudf, 2, di) + + /* Implemented by 2. */ + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + + VAR1 (UNOP, floatunsv2si, 2, v2sf) + VAR1 (UNOP, floatunsv4si, 2, v4sf) + VAR1 (UNOP, floatunsv2di, 2, v2df) + /* Implemented by aarch64_. */ - BUILTIN_VALL (BINOP, zip1) - BUILTIN_VALL (BINOP, zip2) - BUILTIN_VALL (BINOP, uzp1) - BUILTIN_VALL (BINOP, uzp2) - BUILTIN_VALL (BINOP, trn1) - BUILTIN_VALL (BINOP, trn2) + BUILTIN_VALL (BINOP, zip1, 0) + BUILTIN_VALL (BINOP, zip2, 0) + BUILTIN_VALL (BINOP, uzp1, 0) + BUILTIN_VALL (BINOP, uzp2, 0) + BUILTIN_VALL (BINOP, trn1, 0) + BUILTIN_VALL (BINOP, trn2, 0) + /* Implemented by + aarch64_frecp. */ + BUILTIN_GPF (UNOP, frecpe, 0) + BUILTIN_GPF (BINOP, frecps, 0) + BUILTIN_GPF (UNOP, frecpx, 0) + + BUILTIN_VDQF (UNOP, frecpe, 0) + BUILTIN_VDQF (BINOP, frecps, 0) + + BUILTIN_VALLDI (UNOP, abs, 2) + + VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) + VAR1 (BINOP, float_truncate_hi_, 0, v4sf) + + VAR1 (UNOP, float_extend_lo_, 0, v2df) + VAR1 (UNOP, float_truncate_lo_, 0, v2sf) + /* Implemented by aarch64_ld1. */ - BUILTIN_VALL (LOAD1, ld1) + BUILTIN_VALL (LOAD1, ld1, 0) /* Implemented by aarch64_st1. */ - BUILTIN_VALL (STORE1, st1) + BUILTIN_VALL (STORE1, st1, 0) + /* Implemented by aarch64_crypto_aes. */ + VAR1 (BINOPU, crypto_aese, 0, v16qi) + VAR1 (BINOPU, crypto_aesd, 0, v16qi) + VAR1 (UNOPU, crypto_aesmc, 0, v16qi) + VAR1 (UNOPU, crypto_aesimc, 0, v16qi) + + /* Implemented by aarch64_crypto_sha1. */ + VAR1 (UNOPU, crypto_sha1h, 0, si) + VAR1 (BINOPU, crypto_sha1su1, 0, v4si) + VAR1 (TERNOPU, crypto_sha1c, 0, v4si) + VAR1 (TERNOPU, crypto_sha1m, 0, v4si) + VAR1 (TERNOPU, crypto_sha1p, 0, v4si) + VAR1 (TERNOPU, crypto_sha1su0, 0, v4si) + + /* Implemented by aarch64_crypto_sha256. */ + VAR1 (TERNOPU, crypto_sha256h, 0, v4si) + VAR1 (TERNOPU, crypto_sha256h2, 0, v4si) + VAR1 (BINOPU, crypto_sha256su0, 0, v4si) + VAR1 (TERNOPU, crypto_sha256su1, 0, v4si) + + /* Implemented by aarch64_crypto_pmull. */ + VAR1 (BINOPP, crypto_pmull, 0, di) + VAR1 (BINOPP, crypto_pmull, 0, v2di) --- a/src/gcc/config/aarch64/constraints.md +++ b/src/gcc/config/aarch64/constraints.md @@ -75,11 +75,6 @@ "Integer constant zero." (match_test "op == const0_rtx")) -(define_constraint "Usa" - "A constraint that matches an absolute symbolic address." - (and (match_code "const,symbol_ref") - (match_test "aarch64_symbolic_address_p (op)"))) - (define_constraint "Ush" "A constraint that matches an absolute symbolic address high part." (and (match_code "high") @@ -148,10 +143,25 @@ "@internal A constraint that matches vector of immediates." (and (match_code "const_vector") - (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op), - NULL, NULL, NULL, - NULL, NULL) != 0"))) + (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op), + false, NULL)"))) +(define_constraint "Dh" + "@internal + A constraint that matches an immediate operand valid for\ + AdvSIMD scalar move in HImode." + (and (match_code "const_int") + (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, + HImode)"))) + +(define_constraint "Dq" + "@internal + A constraint that matches an immediate operand valid for\ + AdvSIMD scalar move in QImode." + (and (match_code "const_int") + (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, + QImode)"))) + (define_constraint "Dl" "@internal A constraint that matches vector of immediates for left shifts." --- a/src/gcc/config/aarch64/aarch64.c +++ b/src/gcc/config/aarch64/aarch64.c @@ -45,6 +45,8 @@ #include "gimple.h" #include "optabs.h" #include "dwarf2.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" /* Classifies an address. @@ -87,6 +89,15 @@ enum aarch64_symbol_type symbol_type; }; +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; + bool msl; +}; + /* The current code model. */ enum aarch64_code_model aarch64_cmodel; @@ -103,8 +114,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); -static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *, - int *, unsigned char *, int *, int *); static bool aarch64_vector_mode_supported_p (enum machine_mode); static unsigned bit_count (unsigned HOST_WIDE_INT); static bool aarch64_const_vec_all_same_int_p (rtx, @@ -178,14 +187,35 @@ NAMED_PARAM (FP2FP, 4) }; +/* Generic costs for vector insn classes. */ #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif +static const struct cpu_vector_cost generic_vector_cost = +{ + NAMED_PARAM (scalar_stmt_cost, 1), + NAMED_PARAM (scalar_load_cost, 1), + NAMED_PARAM (scalar_store_cost, 1), + NAMED_PARAM (vec_stmt_cost, 1), + NAMED_PARAM (vec_to_scalar_cost, 1), + NAMED_PARAM (scalar_to_vec_cost, 1), + NAMED_PARAM (vec_align_load_cost, 1), + NAMED_PARAM (vec_unalign_load_cost, 1), + NAMED_PARAM (vec_unalign_store_cost, 1), + NAMED_PARAM (vec_store_cost, 1), + NAMED_PARAM (cond_taken_branch_cost, 3), + NAMED_PARAM (cond_not_taken_branch_cost, 1) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif static const struct tune_params generic_tunings = { &generic_rtx_cost_table, &generic_addrcost_table, &generic_regmove_cost, + &generic_vector_cost, NAMED_PARAM (memmov_cost, 4) }; @@ -524,13 +554,15 @@ return; } + case SYMBOL_TINY_ABSOLUTE: + emit_insn (gen_rtx_SET (Pmode, dest, imm)); + return; + case SYMBOL_SMALL_GOT: { rtx tmp_reg = dest; if (can_create_pseudo_p ()) - { - tmp_reg = gen_reg_rtx (Pmode); - } + tmp_reg = gen_reg_rtx (Pmode); emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm)); emit_insn (gen_ldr_got_small (dest, tmp_reg, imm)); return; @@ -581,6 +613,10 @@ return; } + case SYMBOL_TINY_GOT: + emit_insn (gen_ldr_got_tiny (dest, imm)); + return; + default: gcc_unreachable (); } @@ -604,49 +640,85 @@ { rtx low_dst; - gcc_assert (GET_MODE (dst) == TImode); + enum machine_mode src_mode = GET_MODE (src); + enum machine_mode dst_mode = GET_MODE (dst); + int src_regno = REGNO (src); + int dst_regno = REGNO (dst); + gcc_assert (dst_mode == TImode || dst_mode == TFmode); + if (REG_P (dst) && REG_P (src)) { - int src_regno = REGNO (src); - int dst_regno = REGNO (dst); + gcc_assert (src_mode == TImode || src_mode == TFmode); - gcc_assert (GET_MODE (src) == TImode); - /* Handle r -> w, w -> r. */ if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) { - emit_insn (gen_aarch64_movtilow_di (dst, - gen_lowpart (word_mode, src))); - emit_insn (gen_aarch64_movtihigh_di (dst, - gen_highpart (word_mode, src))); - return; + switch (src_mode) { + case TImode: + emit_insn + (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src))); + emit_insn + (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src))); + return; + case TFmode: + emit_insn + (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src))); + emit_insn + (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src))); + return; + default: + gcc_unreachable (); + } } else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) { - emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), - src)); - emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), - src)); - return; + switch (src_mode) { + case TImode: + emit_insn + (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src)); + emit_insn + (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src)); + return; + case TFmode: + emit_insn + (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src)); + emit_insn + (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src)); + return; + default: + gcc_unreachable (); + } } /* Fall through to r -> r cases. */ } - low_dst = gen_lowpart (word_mode, dst); - if (REG_P (low_dst) - && reg_overlap_mentioned_p (low_dst, src)) - { - aarch64_emit_move (gen_highpart (word_mode, dst), - gen_highpart_mode (word_mode, TImode, src)); - aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); - } - else - { - aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); - aarch64_emit_move (gen_highpart (word_mode, dst), - gen_highpart_mode (word_mode, TImode, src)); - } + switch (dst_mode) { + case TImode: + low_dst = gen_lowpart (word_mode, dst); + if (REG_P (low_dst) + && reg_overlap_mentioned_p (low_dst, src)) + { + aarch64_emit_move (gen_highpart (word_mode, dst), + gen_highpart_mode (word_mode, TImode, src)); + aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); + } + else + { + aarch64_emit_move (low_dst, gen_lowpart (word_mode, src)); + aarch64_emit_move (gen_highpart (word_mode, dst), + gen_highpart_mode (word_mode, TImode, src)); + } + return; + case TFmode: + emit_move_insn (gen_rtx_REG (DFmode, dst_regno), + gen_rtx_REG (DFmode, src_regno)); + emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1), + gen_rtx_REG (DFmode, src_regno + 1)); + return; + default: + gcc_unreachable (); + } } bool @@ -656,11 +728,99 @@ || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); } +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + +/* Split a complex SIMD move. */ + +void +aarch64_split_simd_move (rtx dst, rtx src) +{ + enum machine_mode src_mode = GET_MODE (src); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src)) + { + rtx (*gen) (rtx, rtx); + + gcc_assert (VECTOR_MODE_P (src_mode)); + + switch (src_mode) + { + case V16QImode: + gen = gen_aarch64_split_simd_movv16qi; + break; + case V8HImode: + gen = gen_aarch64_split_simd_movv8hi; + break; + case V4SImode: + gen = gen_aarch64_split_simd_movv4si; + break; + case V2DImode: + gen = gen_aarch64_split_simd_movv2di; + break; + case V4SFmode: + gen = gen_aarch64_split_simd_movv4sf; + break; + case V2DFmode: + gen = gen_aarch64_split_simd_movv2df; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src)); + return; + } +} + static rtx -aarch64_force_temporary (rtx x, rtx value) +aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value) { if (can_create_pseudo_p ()) - return force_reg (Pmode, value); + return force_reg (mode, value); else { x = aarch64_emit_move (x, value); @@ -672,7 +832,7 @@ static rtx aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) { - if (!aarch64_plus_immediate (GEN_INT (offset), DImode)) + if (!aarch64_plus_immediate (GEN_INT (offset), mode)) { rtx high; /* Load the full offset into a register. This @@ -679,8 +839,9 @@ might be improvable in the future. */ high = GEN_INT (offset); offset = 0; - high = aarch64_force_temporary (temp, high); - reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); + high = aarch64_force_temporary (mode, temp, high); + reg = aarch64_force_temporary (mode, temp, + gen_rtx_PLUS (mode, high, reg)); } return plus_constant (mode, reg, offset); } @@ -719,7 +880,7 @@ && targetm.cannot_force_const_mem (mode, imm)) { gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (dest, base); + base = aarch64_force_temporary (mode, dest, base); base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); aarch64_emit_move (dest, base); return; @@ -733,10 +894,11 @@ case SYMBOL_SMALL_TLSDESC: case SYMBOL_SMALL_GOTTPREL: case SYMBOL_SMALL_GOT: + case SYMBOL_TINY_GOT: if (offset != const0_rtx) { gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (dest, base); + base = aarch64_force_temporary (mode, dest, base); base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); aarch64_emit_move (dest, base); return; @@ -745,6 +907,7 @@ case SYMBOL_SMALL_TPREL: case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: aarch64_load_symref_appropriately (dest, imm, sty); return; @@ -1810,7 +1973,7 @@ Establish the stack frame by decreasing the stack pointer with a properly calculated size and, if necessary, create a frame record filled with the values of LR and previous frame pointer. The - current FP is also set up is it is in use. */ + current FP is also set up if it is in use. */ void aarch64_expand_prologue (void) @@ -2553,12 +2716,14 @@ aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; + if (GET_CODE (x) == HIGH) return true; split_const (x, &base, &offset); if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) - return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM); + return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM); return aarch64_tls_referenced_p (x); } @@ -2996,10 +3161,13 @@ /* Classify the base of symbolic expression X, given that X appears in context CONTEXT. */ -static enum aarch64_symbol_type -aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context) + +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx x, + enum aarch64_symbol_context context) { rtx offset; + split_const (x, &x, &offset); return aarch64_classify_symbol (x, context); } @@ -3087,10 +3255,11 @@ if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) && y == const0_rtx && (code == EQ || code == NE || code == LT || code == GE) - && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND)) + && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND + || GET_CODE (x) == NEG)) return CC_NZmode; - /* A compare with a shifted operand. Because of canonicalization, + /* A compare with a shifted or negated operand. Because of canonicalization, the comparison will have to be swapped when we emit the assembly code. */ if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) @@ -3097,7 +3266,8 @@ && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT || GET_CODE (x) == LSHIFTRT - || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) + || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND + || GET_CODE (x) == NEG)) return CC_SWPmode; /* A compare of a mode narrower than SI mode against zero can be done @@ -3282,26 +3452,6 @@ asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); break; - case 'Q': - /* Print the least significant register of a pair (TImode) of regs. */ - if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; - } - asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]); - break; - - case 'R': - /* Print the most significant register of a pair (TImode) of regs. */ - if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; - } - asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]); - break; - case 'm': /* Print a condition (eq, ne, etc). */ @@ -3349,7 +3499,7 @@ output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); return; } - asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM); + asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM); break; case 'S': @@ -3362,18 +3512,17 @@ output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); return; } - asm_fprintf (f, "%sv%d", REGISTER_PREFIX, - REGNO (x) - V0_REGNUM + (code - 'S')); + asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); break; case 'X': - /* Print integer constant in hex. */ + /* Print bottom 16 bits of integer constant in hex. */ if (GET_CODE (x) != CONST_INT) { output_operand_lossage ("invalid operand for '%%%c'", code); return; } - asm_fprintf (f, "0x%wx", UINTVAL (x)); + asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff); break; case 'w': @@ -3383,20 +3532,19 @@ if (x == const0_rtx || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) { - asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code); + asm_fprintf (f, "%czr", code); break; } if (REG_P (x) && GP_REGNUM_P (REGNO (x))) { - asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, - REGNO (x) - R0_REGNUM); + asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM); break; } if (REG_P (x) && REGNO (x) == SP_REGNUM) { - asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : ""); + asm_fprintf (f, "%ssp", code == 'w' ? "w" : ""); break; } @@ -3504,6 +3652,10 @@ asm_fprintf (asm_out_file, ":tprel:"); break; + case SYMBOL_TINY_GOT: + gcc_unreachable (); + break; + default: break; } @@ -3533,6 +3685,10 @@ asm_fprintf (asm_out_file, ":tprel_lo12_nc:"); break; + case SYMBOL_TINY_GOT: + asm_fprintf (asm_out_file, ":got:"); + break; + default: break; } @@ -3647,13 +3803,6 @@ output_addr_const (f, x); } -void -aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED, - int labelno ATTRIBUTE_UNUSED) -{ - sorry ("function profiling"); -} - bool aarch64_label_mentioned_p (rtx x) { @@ -3919,7 +4068,7 @@ return offset - crtl->outgoing_args_size; if (from == FRAME_POINTER_REGNUM) - return cfun->machine->frame.saved_regs_size; + return cfun->machine->frame.saved_regs_size + get_frame_size (); } if (to == STACK_POINTER_REGNUM) @@ -3928,6 +4077,7 @@ { HOST_WIDE_INT elim = crtl->outgoing_args_size + cfun->machine->frame.saved_regs_size + + get_frame_size () - cfun->machine->frame.fp_lr_offset; elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); return elim; @@ -4601,6 +4751,101 @@ return aarch64_tune_params->memmov_cost; } +/* Vectorizer cost model target hooks. */ + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return aarch64_tune_params->vec_costs->scalar_stmt_cost; + + case scalar_load: + return aarch64_tune_params->vec_costs->scalar_load_cost; + + case scalar_store: + return aarch64_tune_params->vec_costs->scalar_store_cost; + + case vector_stmt: + return aarch64_tune_params->vec_costs->vec_stmt_cost; + + case vector_load: + return aarch64_tune_params->vec_costs->vec_align_load_cost; + + case vector_store: + return aarch64_tune_params->vec_costs->vec_store_cost; + + case vec_to_scalar: + return aarch64_tune_params->vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return aarch64_tune_params->vec_costs->scalar_to_vec_cost; + + case unaligned_load: + return aarch64_tune_params->vec_costs->vec_unalign_load_cost; + + case unaligned_store: + return aarch64_tune_params->vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: + return aarch64_tune_params->vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return aarch64_tune_params->vec_costs->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); + return elements / 2 + 1; + + default: + gcc_unreachable (); + } +} + +/* Implement targetm.vectorize.add_stmt_cost. */ +static unsigned +aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + unsigned *cost = (unsigned *) data; + unsigned retval = 0; + + if (flag_vect_cost_model) + { + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = + aarch64_builtin_vectorization_cost (kind, vectype, misalign); + + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + a function (linear for now) of the loop nest level. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + { + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + unsigned nest_level = loop_depth (loop); + + count *= nest_level; + } + + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; + } + + return retval; +} + static void initialize_aarch64_code_model (void); /* Parse the architecture extension string. */ @@ -4956,6 +5201,7 @@ /* Return the method that should be used to access SYMBOL_REF or LABEL_REF X in context CONTEXT. */ + enum aarch64_symbol_type aarch64_classify_symbol (rtx x, enum aarch64_symbol_context context ATTRIBUTE_UNUSED) @@ -4969,6 +5215,8 @@ case AARCH64_CMODEL_TINY_PIC: case AARCH64_CMODEL_TINY: + return SYMBOL_TINY_ABSOLUTE; + case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; @@ -4978,71 +5226,47 @@ } } - gcc_assert (GET_CODE (x) == SYMBOL_REF); - - switch (aarch64_cmodel) + if (GET_CODE (x) == SYMBOL_REF) { - case AARCH64_CMODEL_LARGE: - return SYMBOL_FORCE_TO_MEM; - - case AARCH64_CMODEL_TINY: - case AARCH64_CMODEL_SMALL: - - /* This is needed to get DFmode, TImode constants to be loaded off - the constant pool. Is it necessary to dump TImode values into - the constant pool. We don't handle TImode constant loads properly - yet and hence need to use the constant pool. */ - if (CONSTANT_POOL_ADDRESS_P (x)) + if (aarch64_cmodel == AARCH64_CMODEL_LARGE + || CONSTANT_POOL_ADDRESS_P (x)) return SYMBOL_FORCE_TO_MEM; if (aarch64_tls_symbol_p (x)) return aarch64_classify_tls_symbol (x); - if (SYMBOL_REF_WEAK (x)) - return SYMBOL_FORCE_TO_MEM; + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_TINY: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_TINY_ABSOLUTE; - return SYMBOL_SMALL_ABSOLUTE; + case AARCH64_CMODEL_SMALL: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_SMALL_ABSOLUTE; - case AARCH64_CMODEL_TINY_PIC: - case AARCH64_CMODEL_SMALL_PIC: + case AARCH64_CMODEL_TINY_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_TINY_GOT; + return SYMBOL_TINY_ABSOLUTE; - if (CONSTANT_POOL_ADDRESS_P (x)) - return SYMBOL_FORCE_TO_MEM; + case AARCH64_CMODEL_SMALL_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_SMALL_ABSOLUTE; - if (aarch64_tls_symbol_p (x)) - return aarch64_classify_tls_symbol (x); + default: + gcc_unreachable (); + } + } - if (!aarch64_symbol_binds_local_p (x)) - return SYMBOL_SMALL_GOT; - - return SYMBOL_SMALL_ABSOLUTE; - - default: - gcc_unreachable (); - } /* By default push everything into the constant pool. */ return SYMBOL_FORCE_TO_MEM; } -/* Return true if X is a symbolic constant that can be used in context - CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */ - bool -aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context, - enum aarch64_symbol_type *symbol_type) -{ - rtx offset; - split_const (x, &x, &offset); - if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF) - *symbol_type = aarch64_classify_symbol (x, context); - else - return false; - - /* No checking of offset at this point. */ - return true; -} - -bool aarch64_constant_address_p (rtx x) { return (CONSTANT_P (x) && memory_address_p (DImode, x)); @@ -5092,8 +5316,7 @@ /* This could probably go away because we now decompose CONST_INTs according to expand_mov_immediate. */ if ((GET_CODE (x) == CONST_VECTOR - && aarch64_simd_valid_immediate (x, mode, false, - NULL, NULL, NULL, NULL, NULL) != -1) + && aarch64_simd_valid_immediate (x, mode, false, NULL)) || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) return !targetm.cannot_force_const_mem (mode, x); @@ -5924,32 +6147,57 @@ return false; } -/* Return quad mode as the preferred SIMD mode. */ +/* Return appropriate SIMD container + for MODE within a vector of WIDTH bits. */ static enum machine_mode -aarch64_preferred_simd_mode (enum machine_mode mode) +aarch64_simd_container_mode (enum machine_mode mode, unsigned width) { + gcc_assert (width == 64 || width == 128); if (TARGET_SIMD) - switch (mode) - { - case DFmode: - return V2DFmode; - case SFmode: - return V4SFmode; - case SImode: - return V4SImode; - case HImode: - return V8HImode; - case QImode: - return V16QImode; - case DImode: - return V2DImode; - break; - - default:; - } + { + if (width == 128) + switch (mode) + { + case DFmode: + return V2DFmode; + case SFmode: + return V4SFmode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + case DImode: + return V2DImode; + default: + break; + } + else + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + default: + break; + } + } return word_mode; } +/* Return 128-bit container as the preferred SIMD mode for MODE. */ +static enum machine_mode +aarch64_preferred_simd_mode (enum machine_mode mode) +{ + return aarch64_simd_container_mode (mode, 128); +} + /* Return the bitmask of possible vector sizes for the vectorizer to iterate over. */ static unsigned int @@ -5999,6 +6247,7 @@ { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" }, { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" }, { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" }, + { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" }, { VOIDmode, NULL, NULL } }; @@ -6037,7 +6286,7 @@ } /* Return the equivalent letter for size. */ -static unsigned char +static char sizetochar (int size) { switch (size) @@ -6084,15 +6333,10 @@ return aarch64_float_const_representable_p (x0); } -/* TODO: This function returns values similar to those - returned by neon_valid_immediate in gcc/config/arm/arm.c - but the API here is different enough that these magic numbers - are not used. It should be sufficient to return true or false. */ -static int -aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) +/* Return true for valid and false for invalid. */ +bool +aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, + struct simd_immediate_info *info) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ matches = 1; \ @@ -6103,7 +6347,6 @@ { \ immtype = (CLASS); \ elsize = (ELSIZE); \ - elchar = sizetochar (elsize); \ eshift = (SHIFT); \ emvn = (NEG); \ break; \ @@ -6112,7 +6355,6 @@ unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); unsigned char bytes[16]; - unsigned char elchar = 0; int immtype = -1, matches; unsigned int invmask = inverse ? 0xff : 0; int eshift, emvn; @@ -6119,29 +6361,19 @@ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { - bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode); - int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0))); + if (! (aarch64_simd_imm_zero_p (op, mode) + || aarch64_vect_float_const_representable_p (op))) + return false; - if (!(simd_imm_zero - || aarch64_vect_float_const_representable_p (op))) - return -1; + if (info) + { + info->value = CONST_VECTOR_ELT (op, 0); + info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); + info->mvn = false; + info->shift = 0; + } - if (modconst) - *modconst = CONST_VECTOR_ELT (op, 0); - - if (elementwidth) - *elementwidth = elem_width; - - if (elementchar) - *elementchar = sizetochar (elem_width); - - if (shift) - *shift = 0; - - if (simd_imm_zero) - return 19; - else - return 18; + return true; } /* Splat vector constant out into a byte vector. */ @@ -6215,16 +6447,16 @@ CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0); + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1); + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); @@ -6233,31 +6465,20 @@ } while (0); - /* TODO: Currently the assembler cannot handle types 12 to 15. - And there is no way to specify cmode through the compiler. - Disable them till there is support in the assembler. */ - if (immtype == -1 - || (immtype >= 12 && immtype <= 15) - || immtype == 18) - return -1; + if (immtype == -1) + return false; + if (info) + { + info->element_width = elsize; + info->mvn = emvn != 0; + info->shift = eshift; - if (elementwidth) - *elementwidth = elsize; + unsigned HOST_WIDE_INT imm = 0; - if (elementchar) - *elementchar = elchar; + if (immtype >= 12 && immtype <= 15) + info->msl = true; - if (mvn) - *mvn = emvn; - - if (shift) - *shift = eshift; - - if (modconst) - { - unsigned HOST_WIDE_INT imm = 0; - /* Un-invert bytes of recognized vector, if necessary. */ if (invmask != 0) for (i = 0; i < idx; i++) @@ -6272,68 +6493,27 @@ imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) << (i * BITS_PER_UNIT); - *modconst = GEN_INT (imm); - } + + info->value = GEN_INT (imm); + } else - { - unsigned HOST_WIDE_INT imm = 0; + { + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); - for (i = 0; i < elsize / BITS_PER_UNIT; i++) - imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); - /* Construct 'abcdefgh' because the assembler cannot handle - generic constants. */ - gcc_assert (shift != NULL && mvn != NULL); - if (*mvn) + generic constants. */ + if (info->mvn) imm = ~imm; - imm = (imm >> *shift) & 0xff; - *modconst = GEN_INT (imm); - } + imm = (imm >> info->shift) & 0xff; + info->value = GEN_INT (imm); + } } - return immtype; + return true; #undef CHECK } -/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction - (or, implicitly, MVNI) immediate. Write back width per element - to *ELEMENTWIDTH, and a modified constant (whatever should be output - for a MOVI instruction) in *MODCONST. */ -int -aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode, - rtx *modconst, int *elementwidth, - unsigned char *elementchar, - int *mvn, int *shift) -{ - rtx tmpconst; - int tmpwidth; - unsigned char tmpwidthc; - int tmpmvn = 0, tmpshift = 0; - int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst, - &tmpwidth, &tmpwidthc, - &tmpmvn, &tmpshift); - - if (retval == -1) - return 0; - - if (modconst) - *modconst = tmpconst; - - if (elementwidth) - *elementwidth = tmpwidth; - - if (elementchar) - *elementchar = tmpwidthc; - - if (mvn) - *mvn = tmpmvn; - - if (shift) - *shift = tmpshift; - - return 1; -} - static bool aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT minval, @@ -6395,6 +6575,25 @@ return true; } +bool +aarch64_mov_operand_p (rtx x, + enum aarch64_symbol_context context, + enum machine_mode mode) +{ + if (GET_CODE (x) == HIGH + && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) + return true; + + if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + return true; + + if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) + return true; + + return aarch64_classify_symbolic_expression (x, context) + == SYMBOL_TINY_ABSOLUTE; +} + /* Return a const_int vector of VAL. */ rtx aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val) @@ -6409,6 +6608,19 @@ return gen_rtx_CONST_VECTOR (mode, v); } +/* Check OP is a legal scalar immediate for the MOVI instruction. */ + +bool +aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_preferred_simd_mode (mode); + rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); + return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); +} + /* Construct and return a PARALLEL RTX vector. */ rtx aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high) @@ -6634,8 +6846,7 @@ gcc_unreachable (); if (const_vec != NULL_RTX - && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, - NULL, NULL, NULL)) + && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) /* Load using MOVI/MVNI. */ return const_vec; else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) @@ -7193,49 +7404,80 @@ } char* -aarch64_output_simd_mov_immediate (rtx *const_vector, +aarch64_output_simd_mov_immediate (rtx const_vector, enum machine_mode mode, unsigned width) { - int is_valid; - unsigned char widthc; - int lane_width_bits; + bool is_valid; static char templ[40]; - int shift = 0, mvn = 0; const char *mnemonic; + const char *shift_op; unsigned int lane_count = 0; + char element_char; - is_valid = - aarch64_simd_immediate_valid_for_move (*const_vector, mode, - const_vector, &lane_width_bits, - &widthc, &mvn, &shift); + struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false }; + + /* This will return true to show const_vector is legal for use as either + a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will + also update INFO to show how the immediate should be generated. */ + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); gcc_assert (is_valid); + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + mode = GET_MODE_INNER (mode); if (mode == SFmode || mode == DFmode) { - bool zero_p = - aarch64_float_const_zero_rtx_p (*const_vector); - gcc_assert (shift == 0); - mnemonic = zero_p ? "movi" : "fmov"; + gcc_assert (info.shift == 0 && ! info.mvn); + if (aarch64_float_const_zero_rtx_p (info.value)) + info.value = GEN_INT (0); + else + { +#define buf_size 20 + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); +#undef buf_size + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); + else + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", + lane_count, element_char, float_buf); + return templ; + } } - else - mnemonic = mvn ? "mvni" : "movi"; - gcc_assert (lane_width_bits != 0); - lane_count = width / lane_width_bits; + mnemonic = info.mvn ? "mvni" : "movi"; + shift_op = info.msl ? "msl" : "lsl"; if (lane_count == 1) - snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic); - else if (shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d", - mnemonic, lane_count, widthc, shift); + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX + ", %s %d", mnemonic, lane_count, element_char, + UINTVAL (info.value), shift_op, info.shift); else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1", - mnemonic, lane_count, widthc); + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, lane_count, element_char, UINTVAL (info.value)); return templ; } +char* +aarch64_output_scalar_simd_mov_immediate (rtx immediate, + enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_simd_container_mode (mode, 64); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); + return aarch64_output_simd_mov_immediate (v_op, vmode, 64); +} + /* Split operands into moves from op[1] + op[2] into op[0]. */ void @@ -7860,6 +8102,9 @@ #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN aarch64_fold_builtin + #undef TARGET_FUNCTION_ARG #define TARGET_FUNCTION_ARG aarch64_function_arg @@ -7881,6 +8126,9 @@ #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + #undef TARGET_GIMPLIFY_VA_ARG_EXPR #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr @@ -7960,6 +8208,13 @@ #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + aarch64_builtin_vectorization_cost + #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode --- a/src/gcc/config/aarch64/iterators.md +++ b/src/gcc/config/aarch64/iterators.md @@ -83,6 +83,9 @@ ;; Vector Float modes. (define_mode_iterator VDQF [V2SF V4SF V2DF]) +;; Modes suitable to use as the return type of a vcond expression. +(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) + ;; All Float modes. (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF]) @@ -125,9 +128,15 @@ ;; Vector modes except double int. (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +;; Vector modes for Q and H types. +(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) + ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for Q, H and S types. +(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + ;; Vector and scalar integer modes for H and S (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI]) @@ -163,10 +172,15 @@ [ UNSPEC_ASHIFT_SIGNED ; Used in aarch-simd.md. UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md. + UNSPEC_FMAX ; Used in aarch64-simd.md. + UNSPEC_FMAXNMV ; Used in aarch64-simd.md. UNSPEC_FMAXV ; Used in aarch64-simd.md. + UNSPEC_FMIN ; Used in aarch64-simd.md. + UNSPEC_FMINNMV ; Used in aarch64-simd.md. UNSPEC_FMINV ; Used in aarch64-simd.md. UNSPEC_FADDV ; Used in aarch64-simd.md. - UNSPEC_ADDV ; Used in aarch64-simd.md. + UNSPEC_SADDV ; Used in aarch64-simd.md. + UNSPEC_UADDV ; Used in aarch64-simd.md. UNSPEC_SMAXV ; Used in aarch64-simd.md. UNSPEC_SMINV ; Used in aarch64-simd.md. UNSPEC_UMAXV ; Used in aarch64-simd.md. @@ -223,9 +237,6 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. - UNSPEC_FMAX ; Used in aarch64-simd.md. - UNSPEC_FMIN ; Used in aarch64-simd.md. - UNSPEC_BSL ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_CONCAT ; Used in vector permute patterns. UNSPEC_ZIP1 ; Used in vector permute patterns. @@ -234,6 +245,22 @@ UNSPEC_UZP2 ; Used in vector permute patterns. UNSPEC_TRN1 ; Used in vector permute patterns. UNSPEC_TRN2 ; Used in vector permute patterns. + UNSPEC_AESE ; Used in aarch64-simd.md. + UNSPEC_AESD ; Used in aarch64-simd.md. + UNSPEC_AESMC ; Used in aarch64-simd.md. + UNSPEC_AESIMC ; Used in aarch64-simd.md. + UNSPEC_SHA1C ; Used in aarch64-simd.md. + UNSPEC_SHA1M ; Used in aarch64-simd.md. + UNSPEC_SHA1P ; Used in aarch64-simd.md. + UNSPEC_SHA1H ; Used in aarch64-simd.md. + UNSPEC_SHA1SU0 ; Used in aarch64-simd.md. + UNSPEC_SHA1SU1 ; Used in aarch64-simd.md. + UNSPEC_SHA256H ; Used in aarch64-simd.md. + UNSPEC_SHA256H2 ; Used in aarch64-simd.md. + UNSPEC_SHA256SU0 ; Used in aarch64-simd.md. + UNSPEC_SHA256SU1 ; Used in aarch64-simd.md. + UNSPEC_PMULL ; Used in aarch64-simd.md. + UNSPEC_PMULL2 ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------- @@ -244,6 +271,9 @@ ;; 32-bit version and "%x0" in the 64-bit version. (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) +;; For constraints used in scalar immediate vector moves +(define_mode_attr hq [(HI "h") (QI "q")]) + ;; For scalar usage of vector/FP registers (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d") (SF "s") (DF "d") @@ -377,7 +407,8 @@ ;; Double modes of vector modes (lower case). (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") (V2SI "v4si") (V2SF "v4sf") - (SI "v2si") (DI "v2di")]) + (SI "v2si") (DI "v2di") + (DF "v2df")]) ;; Narrowed modes for VDN. (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI") @@ -432,6 +463,15 @@ (V2SF "s") (V4SF "s") (V2DF "d")]) +;; Corresponding core element mode for each vector mode. This is a +;; variation on mapping FP modes to GP regs. +(define_mode_attr vwcore [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "w") (V4SF "w") + (V2DF "x")]) + ;; Double vector types for ALLX. (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) @@ -527,9 +567,14 @@ ;; Iterator for integer conversions (define_code_iterator FIXUORS [fix unsigned_fix]) +;; Iterator for float conversions +(define_code_iterator FLOATUORS [float unsigned_float]) + ;; Code iterator for variants of vector max and min. (define_code_iterator MAXMIN [smax smin umax umin]) +(define_code_iterator FMAXMIN [smax smin]) + ;; Code iterator for variants of vector max and min. (define_code_iterator ADDSUB [plus minus]) @@ -548,6 +593,9 @@ ;; Unsigned comparison operators. (define_code_iterator UCOMPARISONS [ltu leu geu gtu]) +;; Unsigned comparison operators. +(define_code_iterator FAC_COMPARISONS [lt le ge gt]) + ;; ------------------------------------------------------------------- ;; Code Attributes ;; ------------------------------------------------------------------- @@ -560,6 +608,10 @@ (zero_extend "zero_extend") (sign_extract "extv") (zero_extract "extzv") + (fix "fix") + (unsigned_fix "fixuns") + (float "float") + (unsigned_float "floatuns") (and "and") (ior "ior") (xor "xor") @@ -599,10 +651,14 @@ (define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT") (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")]) +(define_code_attr fix_trunc_optab [(fix "fix_trunc") + (unsigned_fix "fixuns_trunc")]) + ;; Optab prefix for sign/zero-extending operations (define_code_attr su_optab [(sign_extend "") (zero_extend "u") (div "") (udiv "u") (fix "") (unsigned_fix "u") + (float "s") (unsigned_float "u") (ss_plus "s") (us_plus "u") (ss_minus "s") (us_minus "u")]) @@ -627,7 +683,9 @@ (define_code_attr su [(sign_extend "s") (zero_extend "u") (sign_extract "s") (zero_extract "u") (fix "s") (unsigned_fix "u") - (div "s") (udiv "u")]) + (div "s") (udiv "u") + (smax "s") (umax "u") + (smin "s") (umin "u")]) ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")]) @@ -636,10 +694,10 @@ (define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")]) ;; Max/min attributes. -(define_code_attr maxmin [(smax "smax") - (smin "smin") - (umax "umax") - (umin "umin")]) +(define_code_attr maxmin [(smax "max") + (smin "min") + (umax "max") + (umin "min")]) ;; MLA/MLS attributes. (define_code_attr as [(ss_plus "a") (ss_minus "s")]) @@ -661,8 +719,11 @@ (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV UNSPEC_SMAXV UNSPEC_SMINV]) -(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV]) +(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV + UNSPEC_FMAXNMV UNSPEC_FMINNMV]) +(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV]) + (define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD UNSPEC_SRHADD UNSPEC_URHADD UNSPEC_SHSUB UNSPEC_UHSUB @@ -675,7 +736,7 @@ (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2 UNSPEC_SUBHN2 UNSPEC_RSUBHN2]) -(define_int_iterator FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN]) +(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN]) (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) @@ -711,25 +772,46 @@ UNSPEC_UZP1 UNSPEC_UZP2]) (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM - UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA]) + UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX + UNSPEC_FRINTA]) (define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM - UNSPEC_FRINTA]) + UNSPEC_FRINTA UNSPEC_FRINTN]) +(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) + +(define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD]) +(define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) + +(define_int_iterator CRYPTO_SHA1 [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P]) + +(define_int_iterator CRYPTO_SHA256 [UNSPEC_SHA256H UNSPEC_SHA256H2]) + ;; ------------------------------------------------------------------- ;; Int Iterators Attributes. ;; ------------------------------------------------------------------- -(define_int_attr maxminv [(UNSPEC_UMAXV "umax") - (UNSPEC_UMINV "umin") - (UNSPEC_SMAXV "smax") - (UNSPEC_SMINV "smin")]) +(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") + (UNSPEC_SMAXV "smax") + (UNSPEC_SMINV "smin") + (UNSPEC_FMAX "smax_nan") + (UNSPEC_FMAXNMV "smax") + (UNSPEC_FMAXV "smax_nan") + (UNSPEC_FMIN "smin_nan") + (UNSPEC_FMINNMV "smin") + (UNSPEC_FMINV "smin_nan")]) -(define_int_attr fmaxminv [(UNSPEC_FMAXV "max") - (UNSPEC_FMINV "min")]) +(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") + (UNSPEC_SMAXV "smax") + (UNSPEC_SMINV "smin") + (UNSPEC_FMAX "fmax") + (UNSPEC_FMAXNMV "fmaxnm") + (UNSPEC_FMAXV "fmax") + (UNSPEC_FMIN "fmin") + (UNSPEC_FMINNMV "fminnm") + (UNSPEC_FMINV "fmin")]) -(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") - (UNSPEC_FMIN "fmin")]) - (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u") @@ -740,6 +822,7 @@ (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r") (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u") (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su") + (UNSPEC_SADDV "s") (UNSPEC_UADDV "u") (UNSPEC_SSLI "s") (UNSPEC_USLI "u") (UNSPEC_SSRI "s") (UNSPEC_USRI "u") (UNSPEC_USRA "u") (UNSPEC_SSRA "s") @@ -798,15 +881,18 @@ (UNSPEC_FRINTM "floor") (UNSPEC_FRINTI "nearbyint") (UNSPEC_FRINTX "rint") - (UNSPEC_FRINTA "round")]) + (UNSPEC_FRINTA "round") + (UNSPEC_FRINTN "frintn")]) ;; frint suffix for floating-point rounding instructions. (define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p") (UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i") - (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a")]) + (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a") + (UNSPEC_FRINTN "n")]) (define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round") - (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")]) + (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor") + (UNSPEC_FRINTN "frintn")]) (define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") @@ -815,3 +901,13 @@ (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) + +(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) + +(define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")]) +(define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")]) + +(define_int_attr sha1_op [(UNSPEC_SHA1C "c") (UNSPEC_SHA1P "p") + (UNSPEC_SHA1M "m")]) + +(define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")]) --- a/src/gcc/config/aarch64/aarch64.h +++ b/src/gcc/config/aarch64/aarch64.h @@ -49,6 +49,8 @@ break; \ } \ \ + if (TARGET_CRYPTO) \ + builtin_define ("__ARM_FEATURE_CRYPTO"); \ } while (0) @@ -151,6 +153,7 @@ #define AARCH64_FL_FP (1 << 1) /* Has FP. */ #define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ #define AARCH64_FL_SLOWMUL (1 << 3) /* A slow multiply core. */ +#define AARCH64_FL_CRC (1 << 4) /* Has CRC. */ /* Has FP and SIMD. */ #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) @@ -163,6 +166,7 @@ /* Macros to test ISA flags. */ extern unsigned long aarch64_isa_flags; +#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) @@ -171,6 +175,8 @@ extern unsigned long aarch64_tune_flags; #define AARCH64_TUNE_SLOWMUL (aarch64_tune_flags & AARCH64_FL_SLOWMUL) +/* Crypto is an optional feature. */ +#define TARGET_CRYPTO AARCH64_ISA_CRYPTO /* Standard register usage. */ @@ -434,7 +440,7 @@ #define INDEX_REG_CLASS CORE_REGS #define BASE_REG_CLASS POINTER_REGS -/* Register pairs used to eliminate unneeded registers that point intoi +/* Register pairs used to eliminate unneeded registers that point into the stack frame. */ #define ELIMINABLE_REGS \ { \ @@ -475,7 +481,7 @@ /* Stack layout; function entry, exit and calling. */ #define STACK_GROWS_DOWNWARD 1 -#define FRAME_GROWS_DOWNWARD 0 +#define FRAME_GROWS_DOWNWARD 1 #define STARTING_FRAME_OFFSET 0 @@ -521,12 +527,6 @@ #endif -/* Which ABI to use. */ -enum arm_abi_type -{ - ARM_ABI_AAPCS64 -}; - enum arm_pcs { ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ @@ -534,11 +534,7 @@ }; -extern enum arm_abi_type arm_abi; extern enum arm_pcs arm_pcs_variant; -#ifndef ARM_DEFAULT_ABI -#define ARM_DEFAULT_ABI ARM_ABI_AAPCS64 -#endif #ifndef ARM_DEFAULT_PCS #define ARM_DEFAULT_PCS ARM_PCS_AAPCS64 @@ -709,6 +705,8 @@ #define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y) +#define REVERSIBLE_CC_MODE(MODE) 1 + #define REVERSE_CONDITION(CODE, MODE) \ (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ ? reverse_condition_maybe_unordered (CODE) \ @@ -758,9 +756,23 @@ #define PRINT_OPERAND_ADDRESS(STREAM, X) \ aarch64_print_operand_address (STREAM, X) -#define FUNCTION_PROFILER(STREAM, LABELNO) \ - aarch64_function_profiler (STREAM, LABELNO) +#define MCOUNT_NAME "_mcount" +#define NO_PROFILE_COUNTERS 1 + +/* Emit rtl for profiling. Output assembler code to FILE + to call "_mcount" for profiling a function entry. */ +#define PROFILE_HOOK(LABEL) \ +{ \ + rtx fun,lr; \ + lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ + fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ + emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ +} + +/* All the work done in PROFILE_HOOK, but still required. */ +#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) + /* For some reason, the Linux headers think they know how to define these macros. They don't!!! */ #undef ASM_APP_ON --- a/src/gcc/config/arm/arm1020e.md +++ b/src/gcc/config/arm/arm1020e.md @@ -66,13 +66,14 @@ ;; ALU operations with no shifted operand (define_insn_reservation "1020alu_op" 1 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "1020a_e,1020a_m,1020a_w") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "1020alu_shift_op" 1 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "simple_alu_shift,alu_shift")) + (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) "1020a_e,1020a_m,1020a_w") ;; ALU operations with a shift-by-register operand @@ -81,7 +82,7 @@ ;; the execute stage. (define_insn_reservation "1020alu_shift_reg_op" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "alu_shift_reg")) + (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) "1020a_e*2,1020a_m,1020a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -96,7 +97,7 @@ ;; until after the memory stage. (define_insn_reservation "1020mult1" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smulxy,smulwy")) + (eq_attr "type" "smulxy,smulwy")) "1020a_e,1020a_m,1020a_w") ;; The "smlaxy" and "smlawx" instructions require two iterations through @@ -104,7 +105,7 @@ ;; the execute stage. (define_insn_reservation "1020mult2" 2 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + (eq_attr "type" "smlaxy,smlalxy,smlawx")) "1020a_e*2,1020a_m,1020a_w") ;; The "smlalxy", "mul", and "mla" instructions require two iterations @@ -112,7 +113,7 @@ ;; the memory stage. (define_insn_reservation "1020mult3" 3 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "1020a_e*2,1020a_m,1020a_w") ;; The "muls" and "mlas" instructions loop in the execute stage for @@ -120,7 +121,7 @@ ;; available after three iterations. (define_insn_reservation "1020mult4" 3 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "1020a_e*4,1020a_m,1020a_w") ;; Long multiply instructions that produce two registers of @@ -135,7 +136,7 @@ ;; available after the memory cycle. (define_insn_reservation "1020mult5" 4 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "1020a_e*3,1020a_m,1020a_w") ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in @@ -143,7 +144,7 @@ ;; The value result is available after four iterations. (define_insn_reservation "1020mult6" 4 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "1020a_e*5,1020a_m,1020a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/cortex-a15.md +++ b/src/gcc/config/arm/cortex-a15.md @@ -61,7 +61,9 @@ ;; Simple ALU without shift (define_insn_reservation "cortex_a15_alu" 2 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "alu_reg,simple_alu_imm") + (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg") (eq_attr "neon_type" "none"))) "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") @@ -68,7 +70,7 @@ ;; ALU ops with immediate shift (define_insn_reservation "cortex_a15_alu_shift" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "simple_alu_shift,alu_shift") + (and (eq_attr "type" "extend,arlo_shift,,mov_shift,mvn_shift") (eq_attr "neon_type" "none"))) "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") @@ -76,7 +78,7 @@ ;; ALU ops with register controlled shift (define_insn_reservation "cortex_a15_alu_shift_reg" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "alu_shift_reg") + (and (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg") (eq_attr "neon_type" "none"))) "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ @@ -87,28 +89,26 @@ ;; 32-bit multiplies (define_insn_reservation "cortex_a15_mult32" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "mult") - (and (eq_attr "neon_type" "none") - (eq_attr "mul64" "no")))) + (and (eq_attr "mul32" "yes") + (eq_attr "neon_type" "none"))) "ca15_issue1,ca15_mx") ;; 64-bit multiplies (define_insn_reservation "cortex_a15_mult64" 4 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "mult") - (and (eq_attr "neon_type" "none") - (eq_attr "mul64" "yes")))) + (and (eq_attr "mul64" "yes") + (eq_attr "neon_type" "none"))) "ca15_issue1,ca15_mx*2") ;; Integer divide (define_insn_reservation "cortex_a15_udiv" 9 (and (eq_attr "tune" "cortexa15") - (eq_attr "insn" "udiv")) + (eq_attr "type" "udiv")) "ca15_issue1,ca15_mx") (define_insn_reservation "cortex_a15_sdiv" 10 (and (eq_attr "tune" "cortexa15") - (eq_attr "insn" "sdiv")) + (eq_attr "type" "sdiv")) "ca15_issue1,ca15_mx") ;; Block all issue pipes for a cycle --- a/src/gcc/config/arm/arm-tables.opt +++ b/src/gcc/config/arm/arm-tables.opt @@ -250,6 +250,9 @@ Enum(processor_type) String(cortex-a15) Value(cortexa15) EnumValue +Enum(processor_type) String(cortex-a53) Value(cortexa53) + +EnumValue Enum(processor_type) String(cortex-r4) Value(cortexr4) EnumValue @@ -259,6 +262,9 @@ Enum(processor_type) String(cortex-r5) Value(cortexr5) EnumValue +Enum(processor_type) String(cortex-r7) Value(cortexr7) + +EnumValue Enum(processor_type) String(cortex-m4) Value(cortexm4) EnumValue @@ -353,11 +359,14 @@ Enum(arm_arch) String(armv8-a) Value(23) EnumValue -Enum(arm_arch) String(iwmmxt) Value(24) +Enum(arm_arch) String(armv8-a+crc) Value(24) EnumValue -Enum(arm_arch) String(iwmmxt2) Value(25) +Enum(arm_arch) String(iwmmxt) Value(25) +EnumValue +Enum(arm_arch) String(iwmmxt2) Value(26) + Enum Name(arm_fpu) Type(int) Known ARM FPUs (for use with the -mfpu= option): --- a/src/gcc/config/arm/arm1026ejs.md +++ b/src/gcc/config/arm/arm1026ejs.md @@ -66,13 +66,14 @@ ;; ALU operations with no shifted operand (define_insn_reservation "alu_op" 1 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "a_e,a_m,a_w") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "alu_shift_op" 1 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "simple_alu_shift,alu_shift")) + (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) "a_e,a_m,a_w") ;; ALU operations with a shift-by-register operand @@ -81,7 +82,7 @@ ;; the execute stage. (define_insn_reservation "alu_shift_reg_op" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "alu_shift_reg")) + (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) "a_e*2,a_m,a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -96,7 +97,7 @@ ;; until after the memory stage. (define_insn_reservation "mult1" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smulxy,smulwy")) + (eq_attr "type" "smulxy,smulwy")) "a_e,a_m,a_w") ;; The "smlaxy" and "smlawx" instructions require two iterations through @@ -104,7 +105,7 @@ ;; the execute stage. (define_insn_reservation "mult2" 2 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smlaxy,smlalxy,smlawx")) + (eq_attr "type" "smlaxy,smlalxy,smlawx")) "a_e*2,a_m,a_w") ;; The "smlalxy", "mul", and "mla" instructions require two iterations @@ -112,7 +113,7 @@ ;; the memory stage. (define_insn_reservation "mult3" 3 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "a_e*2,a_m,a_w") ;; The "muls" and "mlas" instructions loop in the execute stage for @@ -120,7 +121,7 @@ ;; available after three iterations. (define_insn_reservation "mult4" 3 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "a_e*4,a_m,a_w") ;; Long multiply instructions that produce two registers of @@ -135,7 +136,7 @@ ;; available after the memory cycle. (define_insn_reservation "mult5" 4 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "a_e*3,a_m,a_w") ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in @@ -143,7 +144,7 @@ ;; The value result is available after four iterations. (define_insn_reservation "mult6" 4 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "a_e*5,a_m,a_w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/linux-elf.h +++ b/src/gcc/config/arm/linux-elf.h @@ -44,9 +44,9 @@ #define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p" +/* We do not have any MULTILIB_OPTIONS specified, so there are no + MULTILIB_DEFAULTS. */ #undef MULTILIB_DEFAULTS -#define MULTILIB_DEFAULTS \ - { "marm", "mlittle-endian", "mfloat-abi=hard", "mno-thumb-interwork" } /* Now we define the strings used to build the spec file. */ #undef LIB_SPEC --- a/src/gcc/config/arm/arm1136jfs.md +++ b/src/gcc/config/arm/arm1136jfs.md @@ -75,13 +75,14 @@ ;; ALU operations with no shifted operand (define_insn_reservation "11_alu_op" 2 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "e_1,e_2,e_3,e_wb") ;; ALU operations with a shift-by-constant operand (define_insn_reservation "11_alu_shift_op" 2 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "simple_alu_shift,alu_shift")) + (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) "e_1,e_2,e_3,e_wb") ;; ALU operations with a shift-by-register operand @@ -90,7 +91,7 @@ ;; the shift stage. (define_insn_reservation "11_alu_shift_reg_op" 3 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "alu_shift_reg")) + (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) "e_1*2,e_2,e_3,e_wb") ;; alu_ops can start sooner, if there is no shifter dependency @@ -129,13 +130,13 @@ ;; Multiply and multiply-accumulate results are available after four stages. (define_insn_reservation "11_mult1" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "e_1*2,e_2,e_3,e_wb") ;; The *S variants set the condition flags, which requires three more cycles. (define_insn_reservation "11_mult2" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "e_1*2,e_2,e_3,e_wb") (define_bypass 3 "11_mult1,11_mult2" @@ -160,13 +161,13 @@ ;; the two multiply-accumulate instructions. (define_insn_reservation "11_mult3" 5 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smull,umull,smlal,umlal")) + (eq_attr "type" "smull,umull,smlal,umlal")) "e_1*3,e_2,e_3,e_wb*2") ;; The *S variants set the condition flags, which requires three more cycles. (define_insn_reservation "11_mult4" 5 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smulls,umulls,smlals,umlals")) + (eq_attr "type" "smulls,umulls,smlals,umlals")) "e_1*3,e_2,e_3,e_wb*2") (define_bypass 4 "11_mult3,11_mult4" @@ -190,7 +191,8 @@ ;; cycles. (define_insn_reservation "11_mult5" 3 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx")) + (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\ + smusd,smusdx,smlsd,smlsdx")) "e_1,e_2,e_3,e_wb") (define_bypass 2 "11_mult5" @@ -211,7 +213,7 @@ ;; The same idea, then the 32-bit result is added to a 64-bit quantity. (define_insn_reservation "11_mult6" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "e_1*2,e_2,e_3,e_wb*2") ;; Signed 32x32 multiply, then the most significant 32 bits are extracted @@ -218,7 +220,7 @@ ;; and are available after the memory stage. (define_insn_reservation "11_mult7" 4 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "insn" "smmul,smmulr")) + (eq_attr "type" "smmul,smmulr")) "e_1*2,e_2,e_3,e_wb") (define_bypass 3 "11_mult6,11_mult7" --- a/src/gcc/config/arm/marvell-pj4.md +++ b/src/gcc/config/arm/marvell-pj4.md @@ -41,41 +41,39 @@ (define_insn_reservation "pj4_alu_e1" 1 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "simple_alu_imm,alu_reg") - (not (eq_attr "conds" "set")) - (eq_attr "insn" "mov,mvn")) + (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg") + (not (eq_attr "conds" "set"))) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu_e1_conds" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "simple_alu_imm,alu_reg") - (eq_attr "conds" "set") - (eq_attr "insn" "mov,mvn")) + (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg") + (eq_attr "conds" "set")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu" 1 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "simple_alu_imm,alu_reg") - (not (eq_attr "conds" "set")) - (not (eq_attr "insn" "mov,mvn"))) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") + (not (eq_attr "conds" "set"))) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu_conds" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "simple_alu_imm,alu_reg") - (eq_attr "conds" "set") - (not (eq_attr "insn" "mov,mvn"))) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") + (eq_attr "conds" "set")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_shift" 1 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "alu_shift,alu_shift_reg,simple_alu_shift") + (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") (not (eq_attr "conds" "set")) (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_shift_conds" 4 (and (eq_attr "tune" "marvell_pj4") - (eq_attr "type" "alu_shift,alu_shift_reg,simple_alu_shift") + (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") (eq_attr "conds" "set") (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") @@ -82,13 +80,15 @@ (define_insn_reservation "pj4_alu_shift" 1 (and (eq_attr "tune" "marvell_pj4") (not (eq_attr "conds" "set")) - (eq_attr "type" "alu_shift,alu_shift_reg,simple_alu_shift")) + (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") (define_insn_reservation "pj4_alu_shift_conds" 4 (and (eq_attr "tune" "marvell_pj4") (eq_attr "conds" "set") - (eq_attr "type" "alu_shift,alu_shift_reg,simple_alu_shift")) + (eq_attr "type" "arlo_shift,arlo_shift_reg,extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") (define_bypass 2 "pj4_alu_shift,pj4_shift" @@ -95,10 +95,14 @@ "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp") (define_insn_reservation "pj4_ir_mul" 3 - (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "mult")) "pj4_is,pj4_mul,nothing*2,pj4_cp") + (and (eq_attr "tune" "marvell_pj4") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "pj4_is,pj4_mul,nothing*2,pj4_cp") (define_insn_reservation "pj4_ir_div" 20 - (and (eq_attr "tune" "marvell_pj4") (eq_attr "insn" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp") + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp") ;; Branches and calls. --- a/src/gcc/config/arm/thumb2.md +++ b/src/gcc/config/arm/thumb2.md @@ -60,105 +60,230 @@ "TARGET_THUMB2" "bic%?\\t%0, %1, %2%S4" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "2") - (set_attr "type" "alu_shift")] + (set_attr "type" "arlo_shift")] ) -(define_insn "*thumb2_smaxsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") - (smax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") - (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) +;; We use the '0' constraint for operand 1 because reload should +;; be smart enough to generate an appropriate move for the r/r/r case. +(define_insn_and_split "*thumb2_smaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (smax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_THUMB2" - "@ - cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2 - cmp\\t%1, %2\;it\\tge\;movge\\t%0, %1 - cmp\\t%1, %2\;ite\\tge\;movge\\t%0, %1\;movlt\\t%0, %2" + "TARGET_THUMB2" + "#" + ; cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (lt:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" [(set_attr "conds" "clob") - (set_attr "length" "10,10,14")] + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "length" "6,6,10")] ) -(define_insn "*thumb2_sminsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") - (smin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") - (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) +(define_insn_and_split "*thumb2_sminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (smin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "@ - cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2 - cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %1 - cmp\\t%1, %2\;ite\\tlt\;movlt\\t%0, %1\;movge\\t%0, %2" + "#" + ; cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (ge:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" [(set_attr "conds" "clob") - (set_attr "length" "10,10,14")] + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "length" "6,6,10")] ) -(define_insn "*thumb32_umaxsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") - (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") - (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) - (clobber (reg:CC CC_REGNUM))] +(define_insn_and_split "*thumb32_umaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (umax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) + (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "@ - cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2 - cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %1 - cmp\\t%1, %2\;ite\\tcs\;movcs\\t%0, %1\;movcc\\t%0, %2" + "#" + ; cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (ltu:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" [(set_attr "conds" "clob") - (set_attr "length" "10,10,14")] + (set_attr "length" "6,6,10") + (set_attr "enabled_for_depr_it" "yes,yes,no")] ) -(define_insn "*thumb2_uminsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") - (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") - (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) +(define_insn_and_split "*thumb2_uminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (umin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "@ - cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2 - cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %1 - cmp\\t%1, %2\;ite\\tcc\;movcc\\t%0, %1\;movcs\\t%0, %2" + "#" + ; cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" [(set_attr "conds" "clob") - (set_attr "length" "10,10,14")] + (set_attr "length" "6,6,10") + (set_attr "enabled_for_depr_it" "yes,yes,no")] ) ;; Thumb-2 does not have rsc, so use a clever trick with shifter operands. -(define_insn "*thumb2_negdi2" +(define_insn_and_split "*thumb2_negdi2" [(set (match_operand:DI 0 "s_register_operand" "=&r,r") (neg:DI (match_operand:DI 1 "s_register_operand" "?r,0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1" + "#" ; negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1 + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 2) (minus:SI (minus:SI (match_dup 3) + (ashift:SI (match_dup 3) + (const_int 1))) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) -(define_insn "*thumb2_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) +(define_insn_and_split "*thumb2_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r") + (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "@ - cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") + "#" + ; eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31 + ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 + ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 + "&& reload_completed" + [(const_int 0)] + { + if (REGNO(operands[0]) == REGNO(operands[1])) + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_LT (SImode, + cc_reg, + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31))))); + } + DONE; + } + [(set_attr "conds" "*,clob,clob") (set_attr "shift" "1") - (set_attr "predicable" "no, yes") + (set_attr "predicable" "yes,no,no") + (set_attr "predicable_short_it" "no") + (set_attr "enabled_for_depr_it" "yes,yes,no") (set_attr "ce_count" "2") - (set_attr "length" "10,8")] + (set_attr "length" "8,6,10")] ) -(define_insn "*thumb2_neg_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=r,&r") - (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) +(define_insn_and_split "*thumb2_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0")))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "@ - cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" - [(set_attr "conds" "clob,*") + "#" + ; eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31 + ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 + ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 + "&& reload_completed" + [(const_int 0)] + { + if (REGNO(operands[0]) == REGNO(operands[1])) + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_GT (SImode, + cc_reg, + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[0]))); + } + DONE; + } + [(set_attr "conds" "*,clob,clob") (set_attr "shift" "1") - (set_attr "predicable" "no, yes") + (set_attr "predicable" "yes,no,no") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "predicable_short_it" "no") (set_attr "ce_count" "2") - (set_attr "length" "10,8")] + (set_attr "length" "8,6,10")] ) ;; We have two alternatives here for memory loads (and similarly for stores) @@ -167,8 +292,8 @@ ;; regs. The high register alternatives are not taken into account when ;; choosing register preferences in order to reflect their expense. (define_insn "*thumb2_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m") - (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))] "TARGET_THUMB2 && ! TARGET_IWMMXT && !(TARGET_HARD_FLOAT && TARGET_VFP) && ( register_operand (operands[0], SImode) @@ -176,6 +301,7 @@ "@ mov%?\\t%0, %1 mov%?\\t%0, %1 + mov%?\\t%0, %1 mvn%?\\t%0, #%B1 movw%?\\t%0, %1 ldr%?\\t%0, %1 @@ -182,10 +308,12 @@ ldr%?\\t%0, %1 str%?\\t%1, %0 str%?\\t%1, %0" - [(set_attr "type" "*,*,simple_alu_imm,*,load1,load1,store1,store1") + [(set_attr "type" "*,arlo_imm,arlo_imm,arlo_imm,*,load1,load1,store1,store1") + (set_attr "length" "2,4,2,4,4,4,4,4,4") (set_attr "predicable" "yes") - (set_attr "pool_range" "*,*,*,*,1018,4094,*,*") - (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*") + (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")] ) (define_insn "tls_load_dot_plus_four" @@ -223,6 +351,21 @@ (set_attr "neg_pool_range" "*,*,*,250")] ) +(define_insn "*thumb2_storewb_pairsi" + [(set (match_operand:SI 0 "register_operand" "=&kr") + (plus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (set (mem:SI (plus:SI (match_dup 0) (match_dup 2))) + (match_operand:SI 3 "register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (match_operand:SI 5 "const_int_operand" "n"))) + (match_operand:SI 4 "register_operand" "r"))] + "TARGET_THUMB2 + && INTVAL (operands[5]) == INTVAL (operands[2]) + 4" + "strd\\t%3, %4, [%0, %2]!" + [(set_attr "type" "store2")] +) + (define_insn "*thumb2_cmpsi_neg_shiftsi" [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 0 "s_register_operand" "r") @@ -233,57 +376,170 @@ "cmn%?\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "type" "alu_shift")] + (set_attr "type" "arlo_shift")] ) -(define_insn "*thumb2_mov_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") +(define_insn_and_split "*thumb2_mov_scc" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)]))] "TARGET_THUMB2" - "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "TARGET_THUMB2" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (const_int 1) + (const_int 0)))] + "" [(set_attr "conds" "use") - (set_attr "length" "10")] + (set_attr "enabled_for_depr_it" "yes,no") + (set_attr "length" "8,10")] ) -(define_insn "*thumb2_mov_negscc" +(define_insn_and_split "*thumb2_mov_negscc" [(set (match_operand:SI 0 "s_register_operand" "=r") (neg:SI (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && !arm_restrict_it" + "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" "TARGET_THUMB2" - "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + } [(set_attr "conds" "use") (set_attr "length" "10")] ) -(define_insn "*thumb2_mov_notscc" +(define_insn_and_split "*thumb2_mov_negscc_strict_it" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && arm_restrict_it" + "#" ; ";mvn\\t%0, #0 ;it\\t%D1\;mov%D1\\t%0, #0\" + "&& reload_completed" + [(set (match_dup 0) + (match_dup 3)) + (cond_exec (match_dup 4) + (set (match_dup 0) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + + } + [(set_attr "conds" "use") + (set_attr "length" "8")] +) + +(define_insn_and_split "*thumb2_mov_notscc" [(set (match_operand:SI 0 "s_register_operand" "=r") (not:SI (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && !arm_restrict_it" + "#" ; "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" "TARGET_THUMB2" - "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (match_dup 4)))] + { + operands[3] = GEN_INT (~1); + operands[4] = GEN_INT (~0); + } [(set_attr "conds" "use") (set_attr "length" "10")] ) -(define_insn "*thumb2_movsicc_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") +(define_insn_and_split "*thumb2_mov_notscc_strict_it" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && arm_restrict_it" + "#" ; "mvn %0, #0 ; it%d1 ; lsl%d1 %0, %0, #1" + "&& reload_completed" + [(set (match_dup 0) + (match_dup 3)) + (cond_exec (match_dup 4) + (set (match_dup 0) + (ashift:SI (match_dup 0) + (const_int 1))))] + { + operands[3] = GEN_INT (~0); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]), + VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "length" "8")] +) + +(define_insn_and_split "*thumb2_movsicc_insn" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r") (if_then_else:SI (match_operator 3 "arm_comparison_operator" [(match_operand 4 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K") - (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))] + (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r") + (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))] "TARGET_THUMB2" "@ it\\t%D3\;mov%D3\\t%0, %2 + it\\t%d3\;mov%d3\\t%0, %1 + it\\t%D3\;mov%D3\\t%0, %2 it\\t%D3\;mvn%D3\\t%0, #%B2 it\\t%d3\;mov%d3\\t%0, %1 it\\t%d3\;mvn%d3\\t%0, #%B1 - ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 - ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 - ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 - ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" - [(set_attr "length" "6,6,6,6,10,10,10,10") + # + # + # + # + #" + ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 + ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + "&& reload_completed" + [(const_int 0)] + { + enum rtx_code rev_code; + enum machine_mode mode; + rtx rev_cond; + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + operands[3], + gen_rtx_SET (VOIDmode, + operands[0], + operands[1]))); + rev_code = GET_CODE (operands[3]); + mode = GET_MODE (operands[4]); + if (mode == CCFPmode || mode == CCFPEmode) + rev_code = reverse_condition_maybe_unordered (rev_code); + else + rev_code = reverse_condition (rev_code); + + rev_cond = gen_rtx_fmt_ee (rev_code, + VOIDmode, + gen_rtx_REG (mode, CC_REGNUM), + const0_rtx); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + rev_cond, + gen_rtx_SET (VOIDmode, + operands[0], + operands[2]))); + DONE; + } + [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6") + (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes") (set_attr "conds" "use")] ) @@ -333,28 +589,74 @@ ;; addresses will have the thumb bit set correctly. -(define_insn "*thumb2_and_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") +(define_insn_and_split "*thumb2_and_scc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (and:SI (match_operator:SI 1 "arm_comparison_operator" - [(match_operand 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 2 "s_register_operand" "r")))] + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_THUMB2" - "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" + "#" ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0" + "&& reload_completed" + [(set (match_dup 0) + (and:SI (match_dup 3) (const_int 1))) + (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } [(set_attr "conds" "use") - (set_attr "length" "10")] + (set (attr "length") (if_then_else (match_test "arm_restrict_it") + (const_int 8) + (const_int 10)))] ) -(define_insn "*thumb2_ior_scc" +(define_insn_and_split "*thumb2_ior_scc" [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "0,?r")))] + "TARGET_THUMB2 && !arm_restrict_it" + "@ + it\\t%d1\;orr%d1\\t%0, %3, #1 + #" + ; alt 1: ite\\t%D1\;mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1 + "&& reload_completed + && REGNO (operands [0]) != REGNO (operands[3])" + [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) + (ior:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "length" "6,10")] +) + +(define_insn "*thumb2_ior_scc_strict_it" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") (ior:SI (match_operator:SI 2 "arm_comparison_operator" [(match_operand 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "s_register_operand" "0,?r")))] - "TARGET_THUMB2" + (match_operand:SI 1 "s_register_operand" "0,?l")))] + "TARGET_THUMB2 && arm_restrict_it" "@ - it\\t%d2\;orr%d2\\t%0, %1, #1 - ite\\t%D2\;mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" + it\\t%d2\;mov%d2\\t%0, #1\;it\\t%d2\;orr%d2\\t%0, %1 + mov\\t%0, #1\;orr\\t%0, %1\;it\\t%D2\;mov%D2\\t%0, %1" [(set_attr "conds" "use") - (set_attr "length" "6,10")] + (set_attr "length" "8")] ) (define_insn "*thumb2_cond_move" @@ -384,13 +686,20 @@ output_asm_insn (\"it\\t%D4\", operands); break; case 2: - output_asm_insn (\"ite\\t%D4\", operands); + if (arm_restrict_it) + output_asm_insn (\"it\\t%D4\", operands); + else + output_asm_insn (\"ite\\t%D4\", operands); break; default: abort(); } if (which_alternative != 0) - output_asm_insn (\"mov%D4\\t%0, %1\", operands); + { + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (arm_restrict_it && which_alternative == 2) + output_asm_insn (\"it\\t%d4\", operands); + } if (which_alternative != 1) output_asm_insn (\"mov%d4\\t%0, %2\", operands); return \"\"; @@ -407,7 +716,7 @@ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) (match_operand:SI 1 "s_register_operand" "0,?r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_THUMB2" + "TARGET_THUMB2 && !arm_restrict_it" "* if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) return \"%i5\\t%0, %1, %2, lsr #31\"; @@ -436,9 +745,78 @@ (set_attr "length" "14")] ) +(define_insn_and_split "*thumb2_cond_arith_strict_it" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (match_operator:SI 5 "shiftable_operator_strict_it" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (match_operand:SI 1 "s_register_operand" "0")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && arm_restrict_it" + "#" + "&& reload_completed" + [(const_int 0)] + { + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + { + /* %i5 %0, %1, %2, lsr #31 */ + rtx shifted_op = gen_rtx_LSHIFTRT (SImode, operands[2], GEN_INT (31)); + rtx op = NULL_RTX; + + switch (GET_CODE (operands[5])) + { + case AND: + op = gen_rtx_AND (SImode, shifted_op, operands[1]); + break; + case PLUS: + op = gen_rtx_PLUS (SImode, shifted_op, operands[1]); + break; + default: gcc_unreachable (); + } + emit_insn (gen_rtx_SET (VOIDmode, operands[0], op)); + DONE; + } + + /* "cmp %2, %3" */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[2], operands[3]))); + + if (GET_CODE (operands[5]) == AND) + { + /* %i5 %0, %1, #1 + it%D4 + mov%D4 %0, #0 */ + enum rtx_code rc = reverse_condition (GET_CODE (operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], gen_rtx_AND (SImode, operands[1], GEN_INT (1)))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + DONE; + } + else + { + /* it\\t%d4 + %i5%d4\\t%0, %1, #1 */ + emit_insn (gen_rtx_COND_EXEC (VOIDmode, gen_rtx_fmt_ee (GET_CODE (operands[4]), + VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx), + gen_rtx_SET(VOIDmode, operands[0], + gen_rtx_PLUS (SImode, + operands[1], + GEN_INT (1))))); + DONE; + } + FAIL; + } + [(set_attr "conds" "clob") + (set_attr "length" "12")] +) + (define_insn "*thumb2_cond_sub" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?Ts") (match_operator:SI 4 "arm_comparison_operator" [(match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) @@ -448,8 +826,16 @@ output_asm_insn (\"cmp\\t%2, %3\", operands); if (which_alternative != 0) { - output_asm_insn (\"ite\\t%D4\", operands); - output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (arm_restrict_it) + { + output_asm_insn (\"mov\\t%0, %1\", operands); + output_asm_insn (\"it\\t%d4\", operands); + } + else + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + } } else output_asm_insn (\"it\\t%d4\", operands); @@ -459,37 +845,82 @@ (set_attr "length" "10,14")] ) -(define_insn "*thumb2_negscc" - [(set (match_operand:SI 0 "s_register_operand" "=r") +(define_insn_and_split "*thumb2_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (neg:SI (match_operator 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_rhs_operand" "rI")]))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "* - if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) - return \"asr\\t%0, %1, #31\"; + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); - if (GET_CODE (operands[3]) == NE) - return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\"; + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + { + /* Emit asr\\t%0, %1, #31 */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)))); + DONE; + } + else if (GET_CODE (operands[3]) == NE && !arm_restrict_it) + { + /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */ + if (CONST_INT_P (operands[2])) + emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2], + GEN_INT (- INTVAL (operands[2])))); + else + emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2])); - output_asm_insn (\"cmp\\t%1, %2\", operands); - output_asm_insn (\"ite\\t%D3\", operands); - output_asm_insn (\"mov%D3\\t%0, #0\", operands); - return \"mvn%d3\\t%0, #0\"; - " + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_NE (SImode, + cc_reg, + const0_rtx), + gen_rtx_SET (SImode, + operands[0], + GEN_INT (~0)))); + DONE; + } + else + { + /* Emit: cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/ + enum rtx_code rc = reverse_condition (GET_CODE (operands[3])); + enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]); + rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[1], operands[2]))); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0))); + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + tmp1, + const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + DONE; + } + FAIL; + } [(set_attr "conds" "clob") (set_attr "length" "14")] ) (define_insn "*thumb2_movcond" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts,Ts") (if_then_else:SI (match_operator 5 "arm_comparison_operator" [(match_operand:SI 3 "s_register_operand" "r,r,r") (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")]) - (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") - (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (match_operand:SI 1 "arm_rhs_operand" "0,TsI,?TsI") + (match_operand:SI 2 "arm_rhs_operand" "TsI,0,TsI"))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* @@ -544,12 +975,18 @@ output_asm_insn (\"it\\t%d5\", operands); break; case 2: - output_asm_insn (\"ite\\t%d5\", operands); + if (arm_restrict_it) + { + output_asm_insn (\"mov\\t%0, %1\", operands); + output_asm_insn (\"it\\t%D5\", operands); + } + else + output_asm_insn (\"ite\\t%d5\", operands); break; default: abort(); } - if (which_alternative != 0) + if (which_alternative != 0 && !(arm_restrict_it && which_alternative == 2)) output_asm_insn (\"mov%d5\\t%0, %1\", operands); if (which_alternative != 1) output_asm_insn (\"mov%D5\\t%0, %2\", operands); @@ -570,8 +1007,9 @@ "@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] ) @@ -583,8 +1021,9 @@ "@ uxth%?\\t%0, %1 ldr%(h%)\\t%0, %1" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] ) @@ -596,8 +1035,9 @@ "@ uxtb%(%)\\t%0, %1 ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] ) @@ -688,8 +1128,8 @@ (set_attr "shift" "1") (set_attr "length" "2") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "arlo_shift") + (const_string "arlo_shift_reg")))] ) (define_insn "*thumb2_mov_shortim" @@ -811,7 +1251,7 @@ " [(set_attr "conds" "set") (set_attr "length" "2,2,4,4") - (set_attr "type" "simple_alu_imm,*,simple_alu_imm,*")] + (set_attr "type" "arlo_imm,*,arlo_imm,*")] ) (define_insn "*thumb2_mulsi_short" @@ -823,7 +1263,7 @@ "mul%!\\t%0, %2, %0" [(set_attr "predicable" "yes") (set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_mulsi_short_compare0" [(set (reg:CC_NOOV CC_REGNUM) @@ -836,7 +1276,7 @@ "TARGET_THUMB2 && optimize_size" "muls\\t%0, %2, %0" [(set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_mulsi_short_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -848,7 +1288,7 @@ "TARGET_THUMB2 && optimize_size" "muls\\t%0, %2, %0" [(set_attr "length" "2") - (set_attr "insn" "muls")]) + (set_attr "type" "muls")]) (define_insn "*thumb2_cbz" [(set (pc) (if_then_else @@ -922,7 +1362,8 @@ (match_operand:SI 1 "s_register_operand" "r")))] "TARGET_THUMB2" "orn%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*orsi_not_shiftsi_si" @@ -934,8 +1375,9 @@ "TARGET_THUMB2" "orn%?\\t%0, %1, %2%S4" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "2") - (set_attr "type" "alu_shift")] + (set_attr "type" "arlo_shift")] ) (define_peephole2 --- a/src/gcc/config/arm/arm.c +++ b/src/gcc/config/arm/arm.c @@ -173,6 +173,7 @@ static tree arm_builtin_decl (unsigned, bool); static void emit_constant_insn (rtx cond, rtx pattern); static rtx emit_set_insn (rtx, rtx); +static rtx emit_multi_reg_push (unsigned long); static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode, tree, bool); static rtx arm_function_arg (cumulative_args_t, enum machine_mode, @@ -280,6 +281,7 @@ static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); +static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -620,6 +622,13 @@ #undef TARGET_CLASS_LIKELY_SPILLED_P #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p +#undef TARGET_VECTORIZE_BUILTINS +#define TARGET_VECTORIZE_BUILTINS + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + arm_builtin_vectorized_function + #undef TARGET_VECTOR_ALIGNMENT #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment @@ -649,6 +658,13 @@ #define TARGET_CANONICALIZE_COMPARISON \ arm_canonicalize_comparison +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset + +#undef MAX_INSN_PER_IT_BLOCK +#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) + + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -710,6 +726,7 @@ #define FL_ARCH7 (1 << 22) /* Architecture 7. */ #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ #define FL_ARCH8 (1 << 24) /* Architecture 8. */ +#define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */ #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ @@ -839,6 +856,10 @@ int arm_arch_arm_hwdiv; int arm_arch_thumb_hwdiv; +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +int prefer_neon_for_64bits = 0; + /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we must report the mode of the memory reference from TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ @@ -868,6 +889,9 @@ /* The number of bits used in arm_condexec_mask. */ int arm_condexec_masklen = 0; +/* Nonzero if chip supports the ARMv8 CRC instructions. */ +int arm_arch_crc = 0; + /* The condition codes of the ARM, and the inverse function. */ static const char * const arm_condition_codes[] = { @@ -936,6 +960,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_fastmul_tune = @@ -950,6 +975,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -967,6 +993,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_xscale_tune = @@ -981,6 +1008,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_9e_tune = @@ -995,6 +1023,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_v6t2_tune = @@ -1009,6 +1038,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -1024,6 +1054,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_cortex_a15_tune = @@ -1031,7 +1062,7 @@ arm_9e_rtx_costs, NULL, 1, /* Constant limit. */ - 5, /* Max cond insns. */ + 2, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, @@ -1038,6 +1069,7 @@ true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -1055,6 +1087,7 @@ false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_cortex_a9_tune = @@ -1069,6 +1102,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than @@ -1085,6 +1119,7 @@ false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; const struct tune_params arm_fa726te_tune = @@ -1099,6 +1134,7 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ }; @@ -1842,7 +1878,13 @@ arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + arm_arch_crc = (insn_flags & FL_CRC32) != 0; + if (arm_restrict_it == 2) + arm_restrict_it = arm_arch8 && TARGET_THUMB2; + if (!TARGET_THUMB2) + arm_restrict_it = 0; + /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ if (TARGET_THUMB1) @@ -2129,11 +2171,25 @@ global_options.x_param_values, global_options_set.x_param_values); + /* Use Neon to perform 64-bits operations rather than core + registers. */ + prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits; + if (use_neon_for_64bits == 1) + prefer_neon_for_64bits = true; + /* Use the alternative scheduling-pressure algorithm by default. */ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, global_options.x_param_values, global_options_set.x_param_values); + /* Disable shrink-wrap when optimizing function for size, since it tends to + generate additional returns. */ + if (optimize_function_for_size_p (cfun) && TARGET_THUMB2) + flag_shrink_wrap = false; + /* TBD: Dwarf info for apcs frame is not handled yet. */ + if (TARGET_APCS_FRAME) + flag_shrink_wrap = false; + /* Register global variables with the garbage collector. */ arm_add_gc_roots (); } @@ -2382,6 +2438,10 @@ if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) return 0; + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + return 0; + offsets = arm_get_frame_offsets (); stack_adjust = offsets->outgoing_args - offsets->saved_regs; @@ -2479,6 +2539,18 @@ return 1; } +/* Return TRUE if we should try to use a simple_return insn, i.e. perform + shrink-wrapping if possible. This is the case if we need to emit a + prologue, which we can test by looking at the offsets. */ +bool +use_simple_return_p (void) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + return offsets->outgoing_args != 0; +} + /* Return TRUE if int I is a valid immediate ARM constant. */ int @@ -2617,6 +2689,11 @@ switch (code) { + case AND: + case IOR: + case XOR: + return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF) + && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF); case PLUS: return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode); @@ -5337,9 +5414,8 @@ if (cfun->machine->sibcall_blocked) return false; - /* Never tailcall something for which we have no decl, or if we - are generating code for Thumb-1. */ - if (decl == NULL || TARGET_THUMB1) + /* Never tailcall something if we are generating code for Thumb-1. */ + if (TARGET_THUMB1) return false; /* The PIC register is live on entry to VxWorks PLT entries, so we @@ -5349,13 +5425,14 @@ /* Cannot tail-call to long calls, since these are out of range of a branch instruction. */ - if (arm_is_long_call_p (decl)) + if (decl && arm_is_long_call_p (decl)) return false; /* If we are interworking and the function is not declared static then we can't tail-call it unless we know that it exists in this compilation unit (since it might be a Thumb routine). */ - if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl)) + if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl) + && !TREE_ASM_WRITTEN (decl)) return false; func_type = arm_current_func_type (); @@ -5387,6 +5464,7 @@ sibling calls. */ if (TARGET_AAPCS_BASED && arm_abi == ARM_ABI_AAPCS + && decl && DECL_WEAK (decl)) return false; @@ -8592,7 +8670,12 @@ instruction we depend on is another ALU instruction, then we may have to account for an additional stall. */ if (shift_opnum != 0 - && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG)) + && (attr_type == TYPE_ARLO_SHIFT + || attr_type == TYPE_ARLO_SHIFT_REG + || attr_type == TYPE_MOV_SHIFT + || attr_type == TYPE_MVN_SHIFT + || attr_type == TYPE_MOV_SHIFT_REG + || attr_type == TYPE_MVN_SHIFT_REG)) { rtx shifted_operand; int opno; @@ -8873,12 +8956,12 @@ if (recog_memoized (insn) < 0) return false; - if (get_attr_insn (insn) == INSN_MOV) - return false; - switch (get_attr_type (insn)) { - case TYPE_ALU_REG: + case TYPE_ARLO_REG: + case TYPE_MVN_REG: + case TYPE_SHIFT: + case TYPE_SHIFT_REG: case TYPE_LOAD_BYTE: case TYPE_LOAD1: case TYPE_STORE1: @@ -8919,13 +9002,15 @@ return false; } - if (get_attr_insn (insn) == INSN_MOV) - return true; - switch (get_attr_type (insn)) { - case TYPE_SIMPLE_ALU_IMM: - case TYPE_SIMPLE_ALU_SHIFT: + case TYPE_ARLO_IMM: + case TYPE_EXTEND: + case TYPE_MVN_IMM: + case TYPE_MOV_IMM: + case TYPE_MOV_REG: + case TYPE_MOV_SHIFT: + case TYPE_MOV_SHIFT_REG: case TYPE_BRANCH: case TYPE_CALL: return true; @@ -9084,6 +9169,12 @@ return cost; } +int +arm_max_conditional_execute (void) +{ + return max_insns_skipped; +} + static int arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) { @@ -11839,6 +11930,142 @@ return 1; } +/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx +by mode size. */ +inline static rtx +next_consecutive_mem (rtx mem) +{ + enum machine_mode mode = GET_MODE (mem); + HOST_WIDE_INT offset = GET_MODE_SIZE (mode); + rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); + + return adjust_automodify_address (mem, mode, addr, offset); +} + +/* Copy using LDRD/STRD instructions whenever possible. + Returns true upon success. */ +bool +gen_movmem_ldrd_strd (rtx *operands) +{ + unsigned HOST_WIDE_INT len; + HOST_WIDE_INT align; + rtx src, dst, base; + rtx reg0; + bool src_aligned, dst_aligned; + bool src_volatile, dst_volatile; + + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (CONST_INT_P (operands[3])); + + len = UINTVAL (operands[2]); + if (len > 64) + return false; + + /* Maximum alignment we can assume for both src and dst buffers. */ + align = INTVAL (operands[3]); + + if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0)) + return false; + + /* Place src and dst addresses in registers + and update the corresponding mem rtx. */ + dst = operands[0]; + dst_volatile = MEM_VOLATILE_P (dst); + dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + src = operands[1]; + src_volatile = MEM_VOLATILE_P (src); + src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + if (!unaligned_access && !(src_aligned && dst_aligned)) + return false; + + if (src_volatile || dst_volatile) + return false; + + /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ + if (!(dst_aligned || src_aligned)) + return arm_gen_movmemqi (operands); + + src = adjust_address (src, DImode, 0); + dst = adjust_address (dst, DImode, 0); + while (len >= 8) + { + len -= 8; + reg0 = gen_reg_rtx (DImode); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loaddi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storedi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + } + + gcc_assert (len < 8); + if (len >= 4) + { + /* More than a word but less than a double-word to copy. Copy a word. */ + reg0 = gen_reg_rtx (SImode); + src = adjust_address (src, SImode, 0); + dst = adjust_address (dst, SImode, 0); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loadsi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storesi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + len -= 4; + } + + if (len == 0) + return true; + + /* Copy the remaining bytes. */ + if (len >= 2) + { + dst = adjust_address (dst, HImode, 0); + src = adjust_address (src, HImode, 0); + reg0 = gen_reg_rtx (SImode); + if (src_aligned) + emit_insn (gen_zero_extendhisi2 (reg0, src)); + else + emit_insn (gen_unaligned_loadhiu (reg0, src)); + + if (dst_aligned) + emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0))); + else + emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0))); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + if (len == 2) + return true; + } + + dst = adjust_address (dst, QImode, 0); + src = adjust_address (src, QImode, 0); + reg0 = gen_reg_rtx (QImode); + emit_move_insn (reg0, src); + emit_move_insn (dst, reg0); + return true; +} + /* Select a dominance comparison mode if possible for a test of the general form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. COND_OR == DOM_CC_X_AND_Y => (X && Y) @@ -12639,6 +12866,277 @@ return true; } +/* Helper for gen_operands_ldrd_strd. Returns true iff the memory + operand ADDR is an immediate offset from the base register and is + not volatile, in which case it sets BASE and OFFSET + accordingly. */ +bool +mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset) +{ + /* TODO: Handle more general memory operand patterns, such as + PRE_DEC and PRE_INC. */ + + /* Convert a subreg of mem into mem itself. */ + if (GET_CODE (addr) == SUBREG) + addr = alter_subreg (&addr, true); + + gcc_assert (MEM_P (addr)); + + /* Don't modify volatile memory accesses. */ + if (MEM_VOLATILE_P (addr)) + return false; + + *offset = const0_rtx; + + addr = XEXP (addr, 0); + if (REG_P (addr)) + { + *base = addr; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return (REG_P (*base) && CONST_INT_P (*offset)); + } + + return false; +} + +#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0) + +/* Called from a peephole2 to replace two word-size accesses with a + single LDRD/STRD instruction. Returns true iff we can generate a + new instruction sequence. That is, both accesses use the same base + register and the gap between constant offsets is 4. This function + may reorder its operands to match ldrd/strd RTL templates. + OPERANDS are the operands found by the peephole matcher; + OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the + corresponding memory operands. LOAD indicaates whether the access + is load or store. CONST_STORE indicates a store of constant + integer values held in OPERANDS[4,5] and assumes that the pattern + is of length 4 insn, for the purpose of checking dead registers. + COMMUTE indicates that register operands may be reordered. */ +bool +gen_operands_ldrd_strd (rtx *operands, bool load, + bool const_store, bool commute) +{ + int nops = 2; + HOST_WIDE_INT offsets[2], offset; + rtx base = NULL_RTX; + rtx cur_base, cur_offset, tmp; + int i, gap; + HARD_REG_SET regset; + + gcc_assert (!const_store || !load); + /* Check that the memory references are immediate offsets from the + same base register. Extract the base register, the destination + registers, and the corresponding memory offsets. */ + for (i = 0; i < nops; i++) + { + if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset)) + return false; + + if (i == 0) + base = cur_base; + else if (REGNO (base) != REGNO (cur_base)) + return false; + + offsets[i] = INTVAL (cur_offset); + if (GET_CODE (operands[i]) == SUBREG) + { + tmp = SUBREG_REG (operands[i]); + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); + operands[i] = tmp; + } + } + + /* Make sure there is no dependency between the individual loads. */ + if (load && REGNO (operands[0]) == REGNO (base)) + return false; /* RAW */ + + if (load && REGNO (operands[0]) == REGNO (operands[1])) + return false; /* WAW */ + + /* If the same input register is used in both stores + when storing different constants, try to find a free register. + For example, the code + mov r0, 0 + str r0, [r2] + mov r0, 1 + str r0, [r2, #4] + can be transformed into + mov r1, 0 + strd r1, r0, [r2] + in Thumb mode assuming that r1 is free. */ + if (const_store + && REGNO (operands[0]) == REGNO (operands[1]) + && INTVAL (operands[4]) != INTVAL (operands[5])) + { + if (TARGET_THUMB2) + { + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* Use the new register in the first load to ensure that + if the original input register is not dead after peephole, + then it will have the correct constant value. */ + operands[0] = tmp; + } + else if (TARGET_ARM) + { + return false; + int regno = REGNO (operands[0]); + if (!peep2_reg_dead_p (4, operands[0])) + { + /* When the input register is even and is not dead after the + pattern, it has to hold the second constant but we cannot + form a legal STRD in ARM mode with this register as the second + register. */ + if (regno % 2 == 0) + return false; + + /* Is regno-1 free? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + operands[0] = tmp; + } + else + { + /* Find a DImode register. */ + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp != NULL_RTX) + { + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + } + else + { + /* Can we use the input register to form a DI register? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, + regno % 2 == 0 ? regno + 1 : regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + operands[regno % 2 == 1 ? 0 : 1] = tmp; + } + } + + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1); + } + } + + /* Make sure the instructions are ordered with lower memory access first. */ + if (offsets[0] > offsets[1]) + { + gap = offsets[0] - offsets[1]; + offset = offsets[1]; + + /* Swap the instructions such that lower memory is accessed first. */ + SWAP_RTX (operands[0], operands[1]); + SWAP_RTX (operands[2], operands[3]); + if (const_store) + SWAP_RTX (operands[4], operands[5]); + } + else + { + gap = offsets[1] - offsets[0]; + offset = offsets[0]; + } + + /* Make sure accesses are to consecutive memory locations. */ + if (gap != 4) + return false; + + /* Make sure we generate legal instructions. */ + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + + /* In Thumb state, where registers are almost unconstrained, there + is little hope to fix it. */ + if (TARGET_THUMB2) + return false; + + if (load && commute) + { + /* Try reordering registers. */ + SWAP_RTX (operands[0], operands[1]); + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + } + + if (const_store) + { + /* If input registers are dead after this pattern, they can be + reordered or replaced by other registers that are free in the + current pattern. */ + if (!peep2_reg_dead_p (4, operands[0]) + || !peep2_reg_dead_p (4, operands[1])) + return false; + + /* Try to reorder the input registers. */ + /* For example, the code + mov r0, 0 + mov r1, 1 + str r1, [r2] + str r0, [r2, #4] + can be transformed into + mov r1, 0 + mov r0, 1 + strd r0, [r2] + */ + if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset, + false, false)) + { + SWAP_RTX (operands[0], operands[1]); + return true; + } + + /* Try to find a free DI register. */ + CLEAR_HARD_REG_SET (regset); + add_to_hard_reg_set (®set, SImode, REGNO (operands[0])); + add_to_hard_reg_set (®set, SImode, REGNO (operands[1])); + while (true) + { + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* DREG must be an even-numbered register in DImode. + Split it into SI registers. */ + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1])); + + return (operands_ok_ldrd_strd (operands[0], operands[1], + base, offset, + false, load)); + } + } + + return false; +} +#undef SWAP_RTX + + + /* Print a symbolic form of X to the debug file, F. */ static void @@ -13872,6 +14370,16 @@ && IN_RANGE (INTVAL (op1), -7, 7)) action = CONV; } + /* ADCS , */ + else if (GET_CODE (XEXP (src, 0)) == PLUS + && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst) + && low_register_operand (XEXP (XEXP (src, 0), 1), + SImode) + && COMPARISON_P (op1) + && cc_register (XEXP (op1, 0), VOIDmode) + && maybe_get_arm_condition_code (op1) == ARM_CS + && XEXP (op1, 1) == const0_rtx) + action = CONV; break; case MINUS: @@ -14830,7 +15338,8 @@ { /* Constraints should ensure this. */ gcc_assert (code0 == MEM && code1 == REG); - gcc_assert (REGNO (operands[1]) != IP_REGNUM); + gcc_assert ((REGNO (operands[1]) != IP_REGNUM) + || (TARGET_ARM && TARGET_LDRD)); switch (GET_CODE (XEXP (operands[0], 0))) { @@ -16303,124 +16812,308 @@ } } -/* Generate and emit a pattern that will be recognized as STRD pattern. If even - number of registers are being pushed, multiple STRD patterns are created for - all register pairs. If odd number of registers are pushed, emit a - combination of STRDs and STR for the prologue saves. */ +/* Generate and emit a sequence of insns equivalent to PUSH, but using + STR and STRD. If an even number of registers are being pushed, one + or more STRD patterns are created for each register pair. If an + odd number of registers are pushed, emit an initial STR followed by + as many STRD instructions as are needed. This works best when the + stack is initially 64-bit aligned (the normal case), since it + ensures that each STRD is also 64-bit aligned. */ static void thumb2_emit_strd_push (unsigned long saved_regs_mask) { int num_regs = 0; - int i, j; + int i; + int regno; rtx par = NULL_RTX; - rtx insn = NULL_RTX; rtx dwarf = NULL_RTX; - rtx tmp, reg, tmp1; + rtx tmp; + bool first = true; + num_regs = bit_count (saved_regs_mask); + + /* Must be at least one register to save, and can't save SP or PC. */ + gcc_assert (num_regs > 0 && num_regs <= 14); + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + + /* Create sequence for DWARF info. All the frame-related data for + debugging is held in this wrapper. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* Describe the stack adjustment. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + /* Find the first register. */ + for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++) + ; + + i = 0; + + /* If there's an odd number of registers to push. Start off by + pushing a single register. This ensures that subsequent strd + operations are dword aligned (assuming that SP was originally + 64-bit aligned). */ + if ((num_regs & 1) != 0) + { + rtx reg, mem, insn; + + reg = gen_rtx_REG (SImode, regno); + if (num_regs == 1) + mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)); + else + mem = gen_frame_mem (Pmode, + gen_rtx_PRE_MODIFY + (Pmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * num_regs))); + + tmp = gen_rtx_SET (VOIDmode, mem, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + insn = emit_insn (tmp); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + i++; + regno++; + XVECEXP (dwarf, 0, i) = tmp; + first = false; + } + + while (i < num_regs) + if (saved_regs_mask & (1 << regno)) + { + rtx reg1, reg2, mem1, mem2; + rtx tmp0, tmp1, tmp2; + int regno2; + + /* Find the register to pair with this one. */ + for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0; + regno2++) + ; + + reg1 = gen_rtx_REG (SImode, regno); + reg2 = gen_rtx_REG (SImode, regno2); + + if (first) + { + rtx insn; + + first = false; + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * num_regs)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * (num_regs - 1))); + tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * (num_regs))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp0) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3)); + XVECEXP (par, 0, 0) = tmp0; + XVECEXP (par, 0, 1) = tmp1; + XVECEXP (par, 0, 2) = tmp2; + insn = emit_insn (par); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + else + { + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = tmp1; + XVECEXP (par, 0, 1) = tmp2; + emit_insn (par); + } + + /* Create unwind information. This is an approximation. */ + tmp1 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)), + reg1); + tmp2 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))), + reg2); + + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp1; + XVECEXP (dwarf, 0, i + 2) = tmp2; + i += 2; + regno = regno2 + 1; + } + else + regno++; + + return; +} + +/* STRD in ARM mode requires consecutive registers. This function emits STRD + whenever possible, otherwise it emits single-word stores. The first store + also allocates stack space for all saved registers, using writeback with + post-addressing mode. All other stores use offset addressing. If no STRD + can be emitted, this function emits a sequence of single-word stores, + and not an STM as before, because single-word stores provide more freedom + scheduling and can be turned into an STM by peephole optimizations. */ +static void +arm_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j, dwarf_index = 0; + int offset = 0; + rtx dwarf = NULL_RTX; + rtx insn = NULL_RTX; + rtx tmp, mem; + + /* TODO: A more efficient code can be emitted by changing the + layout, e.g., first push all pairs that can use STRD to keep the + stack aligned, and then push all other registers. */ for (i = 0; i <= LAST_ARM_REGNUM; i++) if (saved_regs_mask & (1 << i)) num_regs++; - gcc_assert (num_regs && num_regs <= 16); + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + gcc_assert (num_regs > 0); - /* Pre-decrement the stack pointer, based on there being num_regs 4-byte - registers to push. */ - tmp = gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); - RTX_FRAME_RELATED_P (tmp) = 1; - insn = emit_insn (tmp); - /* Create sequence for DWARF info. */ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); - /* RTLs cannot be shared, hence create new copy for dwarf. */ - tmp1 = gen_rtx_SET (VOIDmode, + /* For dwarf info, we generate explicit stack update. */ + tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); - RTX_FRAME_RELATED_P (tmp1) = 1; - XVECEXP (dwarf, 0, 0) = tmp1; + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; - gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); - gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); - - /* Var j iterates over all the registers to gather all the registers in - saved_regs_mask. Var i gives index of register R_j in stack frame. - A PARALLEL RTX of register-pair is created here, so that pattern for - STRD can be matched. If num_regs is odd, 1st register will be pushed - using STR and remaining registers will be pushed with STRD in pairs. - If num_regs is even, all registers are pushed with STRD in pairs. - Hence, skip first element for odd num_regs. */ - for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--) + /* Save registers. */ + offset = - 4 * num_regs; + j = 0; + while (j <= LAST_ARM_REGNUM) if (saved_regs_mask & (1 << j)) { - /* Create RTX for store. New RTX is created for dwarf as - they are not sharable. */ - reg = gen_rtx_REG (SImode, j); - tmp = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); + if ((j % 2 == 0) + && (saved_regs_mask & (1 << (j + 1)))) + { + /* Current register and previous register form register pair for + which STRD can be generated. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (DImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); - tmp1 = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); - RTX_FRAME_RELATED_P (tmp) = 1; - RTX_FRAME_RELATED_P (tmp1) = 1; + tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); - if (((i - (num_regs % 2)) % 2) == 1) - /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to - be created. Hence create it first. The STRD pattern we are - generating is : - [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1)) - (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ] - where the target registers need not be consecutive. */ - par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; - /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is - even, the reg_j is added as 0th element and if it is odd, reg_i is - added as 1st element of STRD pattern shown above. */ - XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp; - XVECEXP (dwarf, 0, (i + 1)) = tmp1; + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; - if (((i - (num_regs % 2)) % 2) == 0) - /* When (i - (num_regs % 2)) is even, RTXs for both the registers - to be loaded are generated in above given STRD pattern, and the - pattern can be emitted now. */ - emit_insn (par); + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset + 4)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; - i--; - } + offset += 8; + j += 2; + } + else + { + /* Emit a single word store. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (SImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); - if ((num_regs % 2) == 1) - { - /* If odd number of registers are pushed, generate STR pattern to store - lone register. */ - for (; (saved_regs_mask & (1 << j)) == 0; j--); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); - tmp1 = gen_frame_mem (SImode, plus_constant (Pmode, - stack_pointer_rtx, 4 * i)); - reg = gen_rtx_REG (SImode, j); - tmp = gen_rtx_SET (SImode, tmp1, reg); - RTX_FRAME_RELATED_P (tmp) = 1; + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; - emit_insn (tmp); + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant(Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; - tmp1 = gen_rtx_SET (SImode, - gen_frame_mem - (SImode, - plus_constant (Pmode, stack_pointer_rtx, 4 * i)), - reg); - RTX_FRAME_RELATED_P (tmp1) = 1; - XVECEXP (dwarf, 0, (i + 1)) = tmp1; - } + offset += 4; + j += 1; + } + } + else + j++; + /* Attach dwarf info to the first insn we generate. */ + gcc_assert (insn != NULL_RTX); add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); RTX_FRAME_RELATED_P (insn) = 1; - return; } /* Generate and emit an insn that we will recognize as a push_multi. @@ -16565,6 +17258,19 @@ return par; } +/* Add a REG_CFA_ADJUST_CFA REG note to INSN. + SIZE is the offset to be adjusted. + DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */ +static void +arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src) +{ + rtx dwarf; + + RTX_FRAME_RELATED_P (insn) = 1; + dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size)); + add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf); +} + /* Generate and emit an insn pattern that we will recognize as a pop_multi. SAVED_REGS_MASK shows which registers need to be restored. @@ -16622,6 +17328,17 @@ if (saved_regs_mask & (1 << i)) { reg = gen_rtx_REG (SImode, i); + if ((num_regs == 1) && emit_update && !return_in_pc) + { + /* Emit single load with writeback. */ + tmp = gen_frame_mem (SImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)); + tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp)); + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + return; + } + tmp = gen_rtx_SET (VOIDmode, reg, gen_frame_mem @@ -16644,6 +17361,9 @@ par = emit_insn (par); REG_NOTES (par) = dwarf; + if (!return_in_pc) + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs, + stack_pointer_rtx, stack_pointer_rtx); } /* Generate and emit an insn pattern that we will recognize as a pop_multi @@ -16714,6 +17434,9 @@ par = emit_insn (par); REG_NOTES (par) = dwarf; + + arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs, + base_reg, base_reg); } /* Generate and emit a pattern that will be recognized as LDRD pattern. If even @@ -16789,6 +17512,7 @@ pattern can be emitted now. */ par = emit_insn (par); REG_NOTES (par) = dwarf; + RTX_FRAME_RELATED_P (par) = 1; } i++; @@ -16805,7 +17529,12 @@ stack_pointer_rtx, plus_constant (Pmode, stack_pointer_rtx, 4 * i)); RTX_FRAME_RELATED_P (tmp) = 1; - emit_insn (tmp); + tmp = emit_insn (tmp); + if (!return_in_pc) + { + arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i, + stack_pointer_rtx, stack_pointer_rtx); + } dwarf = NULL_RTX; @@ -16839,9 +17568,11 @@ else { par = emit_insn (tmp); + REG_NOTES (par) = dwarf; + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); } - REG_NOTES (par) = dwarf; } else if ((num_regs % 2) == 1 && return_in_pc) { @@ -16853,6 +17584,132 @@ return; } +/* LDRD in ARM mode needs consecutive registers as operands. This function + emits LDRD whenever possible, otherwise it emits single-word loads. It uses + offset addressing and then generates one separate stack udpate. This provides + more scheduling freedom, compared to writeback on every load. However, + if the function returns using load into PC directly + (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated + before the last load. TODO: Add a peephole optimization to recognize + the new epilogue sequence as an LDM instruction whenever possible. TODO: Add + peephole optimization to merge the load at stack-offset zero + with the stack update instruction using load with writeback + in post-index addressing mode. */ +static void +arm_emit_ldrd_pop (unsigned long saved_regs_mask) +{ + int j = 0; + int offset = 0; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, mem; + + /* Restore saved registers. */ + gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM)))); + j = 0; + while (j <= LAST_ARM_REGNUM) + if (saved_regs_mask & (1 << j)) + { + if ((j % 2) == 0 + && (saved_regs_mask & (1 << (j + 1))) + && (j + 1) != PC_REGNUM) + { + /* Current register and next register form register pair for which + LDRD can be generated. PC is always the last register popped, and + we handle it separately. */ + if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem); + tmp = emit_insn (tmp); + RTX_FRAME_RELATED_P (tmp) = 1; + + /* Generate dwarf info. */ + + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j + 1), + dwarf); + + REG_NOTES (tmp) = dwarf; + + offset += 8; + j += 2; + } + else if (j != PC_REGNUM) + { + /* Emit a single word load. */ + if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem); + tmp = emit_insn (tmp); + RTX_FRAME_RELATED_P (tmp) = 1; + + /* Generate dwarf info. */ + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + + offset += 4; + j += 1; + } + else /* j == PC_REGNUM */ + j++; + } + else + j++; + + /* Update the stack. */ + if (offset > 0) + { + tmp = gen_rtx_SET (Pmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + tmp = emit_insn (tmp); + arm_add_cfa_adjust_cfa_note (tmp, offset, + stack_pointer_rtx, stack_pointer_rtx); + offset = 0; + } + + if (saved_regs_mask & (1 << PC_REGNUM)) + { + /* Only PC is to be popped. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + tmp = gen_rtx_SET (SImode, + gen_rtx_REG (SImode, PC_REGNUM), + gen_frame_mem (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, 1) = tmp; + par = emit_jump_insn (par); + + /* Generate dwarf info. */ + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, PC_REGNUM), + NULL_RTX); + REG_NOTES (par) = dwarf; + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); + } +} + /* Calculate the size of the return value that is passed in registers. */ static unsigned arm_size_return_regs (void) @@ -16877,11 +17734,27 @@ || df_regs_ever_live_p (LR_REGNUM)); } +/* We do not know if r3 will be available because + we do have an indirect tailcall happening in this + particular case. */ +static bool +is_indirect_tailcall_p (rtx call) +{ + rtx pat = PATTERN (call); + /* Indirect tail call. */ + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) + pat = SET_SRC (pat); + + pat = XEXP (XEXP (pat, 0), 0); + return REG_P (pat); +} + /* Return true if r3 is used by any of the tail call insns in the current function. */ static bool -any_sibcall_uses_r3 (void) +any_sibcall_could_use_r3 (void) { edge_iterator ei; edge e; @@ -16895,7 +17768,8 @@ if (!CALL_P (call)) call = prev_nonnote_nondebug_insn (call); gcc_assert (CALL_P (call) && SIBLING_CALL_P (call)); - if (find_regno_fusage (call, USE, 3)) + if (find_regno_fusage (call, USE, 3) + || is_indirect_tailcall_p (call)) return true; } return false; @@ -17062,9 +17936,11 @@ /* If it is safe to use r3, then do so. This sometimes generates better code on Thumb-2 by avoiding the need to use 32-bit push/pop instructions. */ - if (! any_sibcall_uses_r3 () + if (! any_sibcall_could_use_r3 () && arm_size_return_regs () <= 12 - && (offsets->saved_regs_mask & (1 << 3)) == 0) + && (offsets->saved_regs_mask & (1 << 3)) == 0 + && (TARGET_THUMB2 + || !(TARGET_LDRD && current_tune->prefer_ldrd_strd))) { reg = 3; } @@ -17497,6 +18373,12 @@ { thumb2_emit_strd_push (live_regs_mask); } + else if (TARGET_ARM + && !TARGET_APCS_FRAME + && !IS_INTERRUPT (func_type)) + { + arm_emit_strd_push (live_regs_mask); + } else { insn = emit_multi_reg_push (live_regs_mask); @@ -18774,7 +19656,14 @@ enum arm_cond_code code; int n; int mask; + int max; + /* Maximum number of conditionally executed instructions in a block + is minimum of the two max values: maximum allowed in an IT block + and maximum that is beneficial according to the cost model and tune. */ + max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ? + max_insns_skipped : MAX_INSN_PER_IT_BLOCK; + /* Remove the previous insn from the count of insns to be output. */ if (arm_condexec_count) arm_condexec_count--; @@ -18816,9 +19705,9 @@ /* ??? Recognize conditional jumps, and combine them with IT blocks. */ if (GET_CODE (body) != COND_EXEC) break; - /* Allow up to 4 conditionally executed instructions in a block. */ + /* Maximum number of conditionally executed instructions in a block. */ n = get_attr_ce_count (insn); - if (arm_condexec_masklen + n > 4) + if (arm_condexec_masklen + n > max) break; predicate = COND_EXEC_TEST (body); @@ -19376,6 +20265,7 @@ typedef enum { T_V8QI, T_V4HI, + T_V4HF, T_V2SI, T_V2SF, T_DI, @@ -19393,8 +20283,8 @@ #define TYPE_MODE_BIT(X) (1 << (X)) #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ - | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \ - | TYPE_MODE_BIT (T_DI)) + | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \ + | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI)) #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) @@ -19401,6 +20291,7 @@ #define v8qi_UP T_V8QI #define v4hi_UP T_V4HI +#define v4hf_UP T_V4HF #define v2si_UP T_V2SI #define v2sf_UP T_V2SF #define di_UP T_DI @@ -19436,6 +20327,8 @@ NEON_SCALARMULH, NEON_SCALARMAC, NEON_CONVERT, + NEON_FLOAT_WIDEN, + NEON_FLOAT_NARROW, NEON_FIXCONV, NEON_SELECT, NEON_RESULTPAIR, @@ -19496,7 +20389,8 @@ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ {#N, NEON_##T, UP (J), CF (N, J), 0} -/* The mode entries in the following table correspond to the "key" type of the +/* The NEON builtin data can be found in arm_neon_builtins.def. + The mode entries in the following table correspond to the "key" type of the instruction variant, i.e. equivalent to that which would be specified after the assembler mnemonic, which usually refers to the last vector operand. (Signed/unsigned/polynomial types are not differentiated between though, and @@ -19506,196 +20400,7 @@ static neon_builtin_datum neon_builtin_data[] = { - VAR10 (BINOP, vadd, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), - VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), - VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR3 (BINOP, vaddhn, v8hi, v4si, v2di), - VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si), - VAR2 (TERNOP, vfma, v2sf, v4sf), - VAR2 (TERNOP, vfms, v2sf, v4sf), - VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si), - VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si), - VAR2 (TERNOP, vqdmlal, v4hi, v2si), - VAR2 (TERNOP, vqdmlsl, v4hi, v2si), - VAR3 (BINOP, vmull, v8qi, v4hi, v2si), - VAR2 (SCALARMULL, vmull_n, v4hi, v2si), - VAR2 (LANEMULL, vmull_lane, v4hi, v2si), - VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si), - VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si), - VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si), - VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si), - VAR2 (BINOP, vqdmull, v4hi, v2si), - VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di), - VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di), - VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di), - VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), - VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR10 (BINOP, vsub, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), - VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), - VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR3 (BINOP, vsubhn, v8hi, v4si, v2di), - VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR2 (BINOP, vcage, v2sf, v4sf), - VAR2 (BINOP, vcagt, v2sf, v4sf), - VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR3 (BINOP, vabdl, v8qi, v4hi, v2si), - VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR3 (TERNOP, vabal, v8qi, v4hi, v2si), - VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf), - VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf), - VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf), - VAR2 (BINOP, vrecps, v2sf, v4sf), - VAR2 (BINOP, vrsqrts, v2sf, v4sf), - VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR2 (UNOP, vcnt, v8qi, v16qi), - VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), - VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), - VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - /* FIXME: vget_lane supports more variants than this! */ - VAR10 (GETLANE, vget_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (SETLANE, vset_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di), - VAR10 (DUP, vdup_n, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (DUPLANE, vdup_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di), - VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di), - VAR3 (UNOP, vmovn, v8hi, v4si, v2di), - VAR3 (UNOP, vqmovn, v8hi, v4si, v2di), - VAR3 (UNOP, vqmovun, v8hi, v4si, v2di), - VAR3 (UNOP, vmovl, v8qi, v4hi, v2si), - VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR2 (LANEMAC, vmlal_lane, v4hi, v2si), - VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si), - VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si), - VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si), - VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR2 (SCALARMAC, vmlal_n, v4hi, v2si), - VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si), - VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si), - VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si), - VAR10 (BINOP, vext, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), - VAR2 (UNOP, vrev16, v8qi, v16qi), - VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), - VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), - VAR10 (SELECT, vbsl, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR2 (RINT, vrintn, v2sf, v4sf), - VAR2 (RINT, vrinta, v2sf, v4sf), - VAR2 (RINT, vrintp, v2sf, v4sf), - VAR2 (RINT, vrintm, v2sf, v4sf), - VAR2 (RINT, vrintz, v2sf, v4sf), - VAR2 (RINT, vrintx, v2sf, v4sf), - VAR1 (VTBL, vtbl1, v8qi), - VAR1 (VTBL, vtbl2, v8qi), - VAR1 (VTBL, vtbl3, v8qi), - VAR1 (VTBL, vtbl4, v8qi), - VAR1 (VTBX, vtbx1, v8qi), - VAR1 (VTBX, vtbx2, v8qi), - VAR1 (VTBX, vtbx3, v8qi), - VAR1 (VTBX, vtbx4, v8qi), - VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di), - VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOAD1, vld1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOAD1LANE, vld1_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOAD1, vld1_dup, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (STORE1, vst1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (STORE1LANE, vst1_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR9 (LOADSTRUCT, - vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (LOADSTRUCTLANE, vld2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di), - VAR9 (STORESTRUCT, vst2, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (STORESTRUCTLANE, vst2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR9 (LOADSTRUCT, - vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (LOADSTRUCTLANE, vld3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di), - VAR9 (STORESTRUCT, vst3, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (STORESTRUCTLANE, vst3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR9 (LOADSTRUCT, vld4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (LOADSTRUCTLANE, vld4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di), - VAR9 (STORESTRUCT, vst4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (STORESTRUCTLANE, vst4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), - VAR10 (LOGICBINOP, vand, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOGICBINOP, vorr, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (BINOP, veor, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOGICBINOP, vbic, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), - VAR10 (LOGICBINOP, vorn, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +#include "arm_neon_builtins.def" }; #undef CF @@ -19710,9 +20415,36 @@ #undef VAR9 #undef VAR10 -/* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have - symbolic names defined here (which would require too much duplication). - FIXME? */ +#define CF(N,X) ARM_BUILTIN_NEON_##N##X +#define VAR1(T, N, A) \ + CF (N, A) +#define VAR2(T, N, A, B) \ + VAR1 (T, N, A), \ + CF (N, B) +#define VAR3(T, N, A, B, C) \ + VAR2 (T, N, A, B), \ + CF (N, C) +#define VAR4(T, N, A, B, C, D) \ + VAR3 (T, N, A, B, C), \ + CF (N, D) +#define VAR5(T, N, A, B, C, D, E) \ + VAR4 (T, N, A, B, C, D), \ + CF (N, E) +#define VAR6(T, N, A, B, C, D, E, F) \ + VAR5 (T, N, A, B, C, D, E), \ + CF (N, F) +#define VAR7(T, N, A, B, C, D, E, F, G) \ + VAR6 (T, N, A, B, C, D, E, F), \ + CF (N, G) +#define VAR8(T, N, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, A, B, C, D, E, F, G), \ + CF (N, H) +#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, A, B, C, D, E, F, G, H), \ + CF (N, I) +#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ + CF (N, J) enum arm_builtins { ARM_BUILTIN_GETWCGR0, @@ -19961,13 +20693,54 @@ ARM_BUILTIN_WMERGE, - ARM_BUILTIN_NEON_BASE, + ARM_BUILTIN_CRC32B, + ARM_BUILTIN_CRC32H, + ARM_BUILTIN_CRC32W, + ARM_BUILTIN_CRC32CB, + ARM_BUILTIN_CRC32CH, + ARM_BUILTIN_CRC32CW, - ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data) +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + +#define CRYPTO1(L, U, M1, M2) \ + ARM_BUILTIN_CRYPTO_##U, +#define CRYPTO2(L, U, M1, M2, M3) \ + ARM_BUILTIN_CRYPTO_##U, +#define CRYPTO3(L, U, M1, M2, M3, M4) \ + ARM_BUILTIN_CRYPTO_##U, + +#include "crypto.def" + +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + +#include "arm_neon_builtins.def" + + ,ARM_BUILTIN_MAX }; +#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) + +#undef CF +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 + static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX]; +#define NUM_DREG_TYPES 5 +#define NUM_QREG_TYPES 6 + static void arm_init_neon_builtins (void) { @@ -19976,10 +20749,12 @@ tree neon_intQI_type_node; tree neon_intHI_type_node; + tree neon_floatHF_type_node; tree neon_polyQI_type_node; tree neon_polyHI_type_node; tree neon_intSI_type_node; tree neon_intDI_type_node; + tree neon_intUTI_type_node; tree neon_float_type_node; tree intQI_pointer_node; @@ -20002,6 +20777,7 @@ tree V8QI_type_node; tree V4HI_type_node; + tree V4HF_type_node; tree V2SI_type_node; tree V2SF_type_node; tree V16QI_type_node; @@ -20041,9 +20817,9 @@ tree void_ftype_pv4sf_v4sf_v4sf; tree void_ftype_pv2di_v2di_v2di; - tree reinterp_ftype_dreg[5][5]; - tree reinterp_ftype_qreg[5][5]; - tree dreg_types[5], qreg_types[5]; + tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; + tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; + tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; /* Create distinguished type nodes for NEON vector element types, and pointers to values of such types, so we can detect them later. */ @@ -20056,6 +20832,9 @@ neon_float_type_node = make_node (REAL_TYPE); TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; layout_type (neon_float_type_node); + neon_floatHF_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode); + layout_type (neon_floatHF_type_node); /* Define typedefs which exactly correspond to the modes we are basing vector types on. If you change these names you'll need to change @@ -20064,6 +20843,8 @@ "__builtin_neon_qi"); (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, "__builtin_neon_hi"); + (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node, + "__builtin_neon_hf"); (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, "__builtin_neon_si"); (*lang_hooks.types.register_builtin_type) (neon_float_type_node, @@ -20105,6 +20886,8 @@ build_vector_type_for_mode (neon_intQI_type_node, V8QImode); V4HI_type_node = build_vector_type_for_mode (neon_intHI_type_node, V4HImode); + V4HF_type_node = + build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); V2SI_type_node = build_vector_type_for_mode (neon_intSI_type_node, V2SImode); V2SF_type_node = @@ -20126,7 +20909,9 @@ intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); + neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); + (*lang_hooks.types.register_builtin_type) (intUQI_type_node, "__builtin_neon_uqi"); (*lang_hooks.types.register_builtin_type) (intUHI_type_node, @@ -20135,6 +20920,10 @@ "__builtin_neon_usi"); (*lang_hooks.types.register_builtin_type) (intUDI_type_node, "__builtin_neon_udi"); + (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + "__builtin_neon_poly64"); + (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node, + "__builtin_neon_poly128"); /* Opaque integer types for structures of vectors. */ intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); @@ -20196,6 +20985,80 @@ build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, V2DI_type_node, NULL); + if (TARGET_CRYPTO && TARGET_HARD_FLOAT) + { + tree V4USI_type_node = + build_vector_type_for_mode (intUSI_type_node, V4SImode); + + tree V16UQI_type_node = + build_vector_type_for_mode (intUQI_type_node, V16QImode); + + tree v16uqi_ftype_v16uqi + = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE); + + tree v16uqi_ftype_v16uqi_v16uqi + = build_function_type_list (V16UQI_type_node, V16UQI_type_node, + V16UQI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, + V4USI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi_v4usi_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, + V4USI_type_node, V4USI_type_node, NULL_TREE); + + tree uti_ftype_udi_udi + = build_function_type_list (neon_intUTI_type_node, intUDI_type_node, + intUDI_type_node, NULL_TREE); + + #undef CRYPTO1 + #undef CRYPTO2 + #undef CRYPTO3 + #undef C + #undef N + #undef CF + #undef FT1 + #undef FT2 + #undef FT3 + + #define C(U) \ + ARM_BUILTIN_CRYPTO_##U + #define N(L) \ + "__builtin_arm_crypto_"#L + #define FT1(R, A) \ + R##_ftype_##A + #define FT2(R, A1, A2) \ + R##_ftype_##A1##_##A2 + #define FT3(R, A1, A2, A3) \ + R##_ftype_##A1##_##A2##_##A3 + #define CRYPTO1(L, U, R, A) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + #define CRYPTO2(L, U, R, A1, A2) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + + #define CRYPTO3(L, U, R, A1, A2, A3) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + #include "crypto.def" + + #undef CRYPTO1 + #undef CRYPTO2 + #undef CRYPTO3 + #undef C + #undef N + #undef FT1 + #undef FT2 + #undef FT3 + } dreg_types[0] = V8QI_type_node; dreg_types[1] = V4HI_type_node; dreg_types[2] = V2SI_type_node; @@ -20207,14 +21070,17 @@ qreg_types[2] = V4SI_type_node; qreg_types[3] = V4SF_type_node; qreg_types[4] = V2DI_type_node; + qreg_types[5] = neon_intUTI_type_node; - for (i = 0; i < 5; i++) + for (i = 0; i < NUM_QREG_TYPES; i++) { int j; - for (j = 0; j < 5; j++) + for (j = 0; j < NUM_QREG_TYPES; j++) { - reinterp_ftype_dreg[i][j] - = build_function_type_list (dreg_types[i], dreg_types[j], NULL); + if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES) + reinterp_ftype_dreg[i][j] + = build_function_type_list (dreg_types[i], dreg_types[j], NULL); + reinterp_ftype_qreg[i][j] = build_function_type_list (qreg_types[i], qreg_types[j], NULL); } @@ -20227,7 +21093,7 @@ neon_builtin_datum *d = &neon_builtin_data[i]; const char* const modenames[] = { - "v8qi", "v4hi", "v2si", "v2sf", "di", + "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di", "v16qi", "v8hi", "v4si", "v4sf", "v2di", "ti", "ei", "oi" }; @@ -20429,9 +21295,14 @@ case NEON_REINTERP: { - /* We iterate over 5 doubleword types, then 5 quadword - types. */ - int rhs = d->mode % 5; + /* We iterate over NUM_DREG_TYPES doubleword types, + then NUM_QREG_TYPES quadword types. + V4HF is not a type used in reinterpret, so we translate + d->mode to the correct index in reinterp_ftype_dreg. */ + bool qreg_p + = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8; + int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0)) + % NUM_QREG_TYPES; switch (insn_data[d->code].operand[0].mode) { case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; @@ -20444,11 +21315,43 @@ case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; + case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break; default: gcc_unreachable (); } } break; + case NEON_FLOAT_WIDEN: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + switch (insn_data[d->code].operand[1].mode) + { + case V4HFmode: + eltype = V4HF_type_node; + return_type = V4SF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } + case NEON_FLOAT_NARROW: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + + switch (insn_data[d->code].operand[1].mode) + { + case V4SFmode: + eltype = V4SF_type_node; + return_type = V4HF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } default: gcc_unreachable (); } @@ -20463,6 +21366,9 @@ } } +#undef NUM_DREG_TYPES +#undef NUM_QREG_TYPES + #define def_mbuiltin(MASK, NAME, TYPE, CODE) \ do \ { \ @@ -20485,7 +21391,7 @@ const enum rtx_code comparison; const unsigned int flag; }; - + static const struct builtin_description bdesc_2arg[] = { #define IWMMXT_BUILTIN(code, string, builtin) \ @@ -20591,6 +21497,33 @@ IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) + +#define CRC32_BUILTIN(L, U) \ + {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ + UNKNOWN, 0}, + CRC32_BUILTIN (crc32b, CRC32B) + CRC32_BUILTIN (crc32h, CRC32H) + CRC32_BUILTIN (crc32w, CRC32W) + CRC32_BUILTIN (crc32cb, CRC32CB) + CRC32_BUILTIN (crc32ch, CRC32CH) + CRC32_BUILTIN (crc32cw, CRC32CW) +#undef CRC32_BUILTIN + + +#define CRYPTO_BUILTIN(L, U) \ + {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \ + UNKNOWN, 0}, +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 +#define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U) +#define CRYPTO1(L, U, R, A) +#define CRYPTO3(L, U, R, A1, A2, A3) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + }; static const struct builtin_description bdesc_1arg[] = @@ -20619,8 +21552,28 @@ IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB) IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH) IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW) + +#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U) +#define CRYPTO2(L, U, R, A1, A2) +#define CRYPTO3(L, U, R, A1, A2, A3) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 }; +static const struct builtin_description bdesc_3arg[] = +{ +#define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U) +#define CRYPTO1(L, U, R, A) +#define CRYPTO2(L, U, R, A1, A2) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + }; +#undef CRYPTO_BUILTIN + /* Set up all the iWMMXt builtins. This is not called if TARGET_IWMMXT is zero. */ @@ -20815,7 +21768,7 @@ enum machine_mode mode; tree type; - if (d->name == 0) + if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2)) continue; mode = insn_data[d->icode].operand[1].mode; @@ -21010,6 +21963,42 @@ } static void +arm_init_crc32_builtins () +{ + tree si_ftype_si_qi + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intQI_type_node, NULL_TREE); + tree si_ftype_si_hi + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intHI_type_node, NULL_TREE); + tree si_ftype_si_si + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intSI_type_node, NULL_TREE); + + arm_builtin_decls[ARM_BUILTIN_CRC32B] + = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi, + ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32H] + = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi, + ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32W] + = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si, + ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CB] + = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi, + ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CH] + = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi, + ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CW] + = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si, + ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE); +} + +static void arm_init_builtins (void) { if (TARGET_REALLY_IWMMXT) @@ -21020,6 +22009,9 @@ if (arm_fp16_format) arm_init_fp16_builtins (); + + if (TARGET_CRC32) + arm_init_crc32_builtins (); } /* Return the ARM builtin for CODE. */ @@ -21113,6 +22105,73 @@ return x; } +/* Function to expand ternary builtins. */ +static rtx +arm_expand_ternop_builtin (enum insn_code icode, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = NULL_RTX; + + /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select + lane operand depending on endianness. */ + bool builtin_sha1cpm_p = false; + + if (insn_data[icode].n_operands == 5) + { + gcc_assert (icode == CODE_FOR_crypto_sha1c + || icode == CODE_FOR_crypto_sha1p + || icode == CODE_FOR_crypto_sha1m); + builtin_sha1cpm_p = true; + } + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode mode2 = insn_data[icode].operand[3].mode; + + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + if (VECTOR_MODE_P (mode2)) + op2 = safe_vector_operand (op2, mode2); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) + && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (builtin_sha1cpm_p) + op3 = GEN_INT (TARGET_BIG_END ? 1 : 0); + + if (builtin_sha1cpm_p) + pat = GEN_FCN (icode) (target, op0, op1, op2, op3); + else + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Subroutine of arm_expand_builtin to take care of binop insns. */ static rtx @@ -21162,9 +22221,17 @@ rtx pat; tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op0 = expand_normal (arg0); + rtx op1 = NULL_RTX; enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; + bool builtin_sha1h_p = false; + if (insn_data[icode].n_operands == 3) + { + gcc_assert (icode == CODE_FOR_crypto_sha1h); + builtin_sha1h_p = true; + } + if (! target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) @@ -21179,8 +22246,13 @@ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); } + if (builtin_sha1h_p) + op1 = GEN_INT (TARGET_BIG_END ? 1 : 0); - pat = GEN_FCN (icode) (target, op0); + if (builtin_sha1h_p) + pat = GEN_FCN (icode) (target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0); if (! pat) return 0; emit_insn (pat); @@ -21452,6 +22524,8 @@ case NEON_DUP: case NEON_RINT: case NEON_SPLIT: + case NEON_FLOAT_WIDEN: + case NEON_FLOAT_NARROW: case NEON_REINTERP: return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); @@ -21649,7 +22723,7 @@ rtx op1; rtx op2; rtx pat; - int fcode = DECL_FUNCTION_CODE (fndecl); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); size_t i; enum machine_mode tmode; enum machine_mode mode0; @@ -22143,6 +23217,10 @@ if (d->code == (const enum arm_builtins) fcode) return arm_expand_unop_builtin (d->icode, exp, target, 0); + for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_ternop_builtin (d->icode, exp, target); + /* @@@ Should really do something sensible here. */ return NULL_RTX; } @@ -23366,7 +24444,7 @@ all we really need to check here is if single register is to be returned, or multiple register return. */ void -thumb2_expand_return (void) +thumb2_expand_return (bool simple_return) { int i, num_regs; unsigned long saved_regs_mask; @@ -23379,7 +24457,7 @@ if (saved_regs_mask & (1 << i)) num_regs++; - if (saved_regs_mask) + if (!simple_return && saved_regs_mask) { if (num_regs == 1) { @@ -23658,6 +24736,7 @@ if (frame_pointer_needed) { + rtx insn; /* Restore stack pointer if necessary. */ if (TARGET_ARM) { @@ -23668,9 +24747,12 @@ /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_addsi3 (stack_pointer_rtx, - hard_frame_pointer_rtx, - GEN_INT (amount))); + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + stack_pointer_rtx, + hard_frame_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ @@ -23680,16 +24762,25 @@ { /* In Thumb-2 mode, the frame pointer points to the last saved register. */ - amount = offsets->locals_base - offsets->saved_regs; - if (amount) - emit_insn (gen_addsi3 (hard_frame_pointer_rtx, - hard_frame_pointer_rtx, - GEN_INT (amount))); + amount = offsets->locals_base - offsets->saved_regs; + if (amount) + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + hard_frame_pointer_rtx, + hard_frame_pointer_rtx); + } /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + insn = emit_insn (gen_movsi (stack_pointer_rtx, + hard_frame_pointer_rtx)); + arm_add_cfa_adjust_cfa_note (insn, 0, + stack_pointer_rtx, + hard_frame_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ emit_insn (gen_force_register_use (stack_pointer_rtx)); @@ -23702,12 +24793,15 @@ amount = offsets->outgoing_args - offsets->saved_regs; if (amount) { + rtx tmp; /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); - emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (amount))); + tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (tmp, amount, + stack_pointer_rtx, stack_pointer_rtx); /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not deleted. */ emit_insn (gen_force_register_use (stack_pointer_rtx)); @@ -23760,6 +24854,8 @@ REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (V2SImode, i), NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); } if (saved_regs_mask) @@ -23807,6 +24903,9 @@ REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (SImode, i), NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, + stack_pointer_rtx); } } } @@ -23818,6 +24917,8 @@ { if (TARGET_THUMB2) thumb2_emit_ldrd_pop (saved_regs_mask); + else if (TARGET_ARM && !IS_INTERRUPT (func_type)) + arm_emit_ldrd_pop (saved_regs_mask); else arm_emit_multi_reg_pop (saved_regs_mask); } @@ -23830,10 +24931,34 @@ } if (crtl->args.pretend_args_size) - emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (crtl->args.pretend_args_size))); + { + int i, j; + rtx dwarf = NULL_RTX; + rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); + RTX_FRAME_RELATED_P (tmp) = 1; + + if (cfun->machine->uses_anonymous_args) + { + /* Restore pretend args. Refer arm_expand_prologue on how to save + pretend_args in stack. */ + int num_regs = crtl->args.pretend_args_size / 4; + saved_regs_mask = (0xf0 >> num_regs) & 0xf; + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + rtx reg = gen_rtx_REG (SImode, i); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + j++; + } + REG_NOTES (tmp) = dwarf; + } + arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size, + stack_pointer_rtx, stack_pointer_rtx); + } + if (!really_return) return; @@ -24229,7 +25354,22 @@ { const char *fpu_name; if (arm_selected_arch) - asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); + { + const char* pos = strchr (arm_selected_arch->name, '+'); + if (pos) + { + char buf[15]; + gcc_assert (strlen (arm_selected_arch->name) + <= sizeof (buf) / sizeof (*pos)); + strncpy (buf, arm_selected_arch->name, + (pos - arm_selected_arch->name) * sizeof (*pos)); + buf[pos - arm_selected_arch->name] = '\0'; + asm_fprintf (asm_out_file, "\t.arch %s\n", buf); + asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1); + } + else + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); + } else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0) asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8); else @@ -25086,7 +26226,7 @@ { /* Neon also supports V2SImode, etc. listed in the clause below. */ if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode - || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) + || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) return true; if ((TARGET_NEON || TARGET_IWMMXT) @@ -25249,9 +26389,8 @@ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs)); - regno = (regno - FIRST_VFP_REGNUM) / 2; for (i = 0; i < nregs; i++) - XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); + XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i); return p; } @@ -25501,9 +26640,17 @@ handled_one = true; break; + /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P + to get correct dwarf information for shrink-wrap. We should not + emit unwind information for it because these are used either for + pretend arguments or notes to adjust sp and restore registers from + stack. */ + case REG_CFA_ADJUST_CFA: + case REG_CFA_RESTORE: + return; + case REG_CFA_DEF_CFA: case REG_CFA_EXPRESSION: - case REG_CFA_ADJUST_CFA: case REG_CFA_OFFSET: /* ??? Only handling here what we actually emit. */ gcc_unreachable (); @@ -25901,6 +27048,7 @@ case cortexa7: case cortexa8: case cortexa9: + case cortexa53: case fa726te: case marvell_pj4: return 2; @@ -25929,11 +27077,13 @@ { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, + { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" }, { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" }, { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" }, + /* 128-bit containerized types. */ { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" }, { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" }, @@ -26027,6 +27177,60 @@ return !TARGET_THUMB1; } +tree +arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) +{ + enum machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + +/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the + decl of the vectorized builtin for the appropriate vector mode. + NULL_TREE is returned if no such builtin is available. */ +#undef ARM_CHECK_BUILTIN_MODE +#define ARM_CHECK_BUILTIN_MODE(C) \ + (out_mode == SFmode && out_n == C \ + && in_mode == SFmode && in_n == C) + +#undef ARM_FIND_VRINT_VARIANT +#define ARM_FIND_VRINT_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \ + : NULL_TREE)) + + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case BUILT_IN_FLOORF: + return ARM_FIND_VRINT_VARIANT (vrintm); + case BUILT_IN_CEILF: + return ARM_FIND_VRINT_VARIANT (vrintp); + case BUILT_IN_TRUNCF: + return ARM_FIND_VRINT_VARIANT (vrintz); + case BUILT_IN_ROUNDF: + return ARM_FIND_VRINT_VARIANT (vrinta); + default: + return NULL_TREE; + } + } + return NULL_TREE; +} +#undef ARM_CHECK_BUILTIN_MODE +#undef ARM_FIND_VRINT_VARIANT + /* The AAPCS sets the maximum alignment of a vector to 64 bits. */ static HOST_WIDE_INT arm_vector_alignment (const_tree type) @@ -26257,40 +27461,72 @@ emit_insn (gen_memory_barrier ()); } -/* Emit the load-exclusive and store-exclusive instructions. */ +/* Emit the load-exclusive and store-exclusive instructions. + Use acquire and release versions if necessary. */ static void -arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem) +arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq) { rtx (*gen) (rtx, rtx); - switch (mode) + if (acq) { - case QImode: gen = gen_arm_load_exclusiveqi; break; - case HImode: gen = gen_arm_load_exclusivehi; break; - case SImode: gen = gen_arm_load_exclusivesi; break; - case DImode: gen = gen_arm_load_exclusivedi; break; - default: - gcc_unreachable (); + switch (mode) + { + case QImode: gen = gen_arm_load_acquire_exclusiveqi; break; + case HImode: gen = gen_arm_load_acquire_exclusivehi; break; + case SImode: gen = gen_arm_load_acquire_exclusivesi; break; + case DImode: gen = gen_arm_load_acquire_exclusivedi; break; + default: + gcc_unreachable (); + } } + else + { + switch (mode) + { + case QImode: gen = gen_arm_load_exclusiveqi; break; + case HImode: gen = gen_arm_load_exclusivehi; break; + case SImode: gen = gen_arm_load_exclusivesi; break; + case DImode: gen = gen_arm_load_exclusivedi; break; + default: + gcc_unreachable (); + } + } emit_insn (gen (rval, mem)); } static void -arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem) +arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, + rtx mem, bool rel) { rtx (*gen) (rtx, rtx, rtx); - switch (mode) + if (rel) { - case QImode: gen = gen_arm_store_exclusiveqi; break; - case HImode: gen = gen_arm_store_exclusivehi; break; - case SImode: gen = gen_arm_store_exclusivesi; break; - case DImode: gen = gen_arm_store_exclusivedi; break; - default: - gcc_unreachable (); + switch (mode) + { + case QImode: gen = gen_arm_store_release_exclusiveqi; break; + case HImode: gen = gen_arm_store_release_exclusivehi; break; + case SImode: gen = gen_arm_store_release_exclusivesi; break; + case DImode: gen = gen_arm_store_release_exclusivedi; break; + default: + gcc_unreachable (); + } } + else + { + switch (mode) + { + case QImode: gen = gen_arm_store_exclusiveqi; break; + case HImode: gen = gen_arm_store_exclusivehi; break; + case SImode: gen = gen_arm_store_exclusivesi; break; + case DImode: gen = gen_arm_store_exclusivedi; break; + default: + gcc_unreachable (); + } + } emit_insn (gen (bval, rval, mem)); } @@ -26325,6 +27561,15 @@ mod_f = operands[7]; mode = GET_MODE (mem); + /* Normally the succ memory model must be stronger than fail, but in the + unlikely event of fail being ACQUIRE and succ being RELEASE we need to + promote succ to ACQ_REL so that we don't lose the acquire semantics. */ + + if (TARGET_HAVE_LDACQ + && INTVAL (mod_f) == MEMMODEL_ACQUIRE + && INTVAL (mod_s) == MEMMODEL_RELEASE) + mod_s = GEN_INT (MEMMODEL_ACQ_REL); + switch (mode) { case QImode: @@ -26399,8 +27644,20 @@ scratch = operands[7]; mode = GET_MODE (mem); - arm_pre_atomic_barrier (mod_s); + bool use_acquire = TARGET_HAVE_LDACQ + && !(mod_s == MEMMODEL_RELAXED + || mod_s == MEMMODEL_CONSUME + || mod_s == MEMMODEL_RELEASE); + bool use_release = TARGET_HAVE_LDACQ + && !(mod_s == MEMMODEL_RELAXED + || mod_s == MEMMODEL_CONSUME + || mod_s == MEMMODEL_ACQUIRE); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_pre_atomic_barrier (mod_s); + label1 = NULL_RTX; if (!is_weak) { @@ -26409,7 +27666,7 @@ } label2 = gen_label_rtx (); - arm_emit_load_exclusive (mode, rval, mem); + arm_emit_load_exclusive (mode, rval, mem, use_acquire); cond = arm_gen_compare_reg (NE, rval, oldval, scratch); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); @@ -26417,7 +27674,7 @@ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); - arm_emit_store_exclusive (mode, scratch, mem, newval); + arm_emit_store_exclusive (mode, scratch, mem, newval, use_release); /* Weak or strong, we want EQ to be true for success, so that we match the flags that we got from the compare above. */ @@ -26436,7 +27693,9 @@ if (mod_f != MEMMODEL_RELAXED) emit_label (label2); - arm_post_atomic_barrier (mod_s); + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_post_atomic_barrier (mod_s); if (mod_f == MEMMODEL_RELAXED) emit_label (label2); @@ -26451,8 +27710,20 @@ enum machine_mode wmode = (mode == DImode ? DImode : SImode); rtx label, x; - arm_pre_atomic_barrier (model); + bool use_acquire = TARGET_HAVE_LDACQ + && !(model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE); + bool use_release = TARGET_HAVE_LDACQ + && !(model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_pre_atomic_barrier (model); + label = gen_label_rtx (); emit_label (label); @@ -26464,7 +27735,7 @@ old_out = new_out; value = simplify_gen_subreg (wmode, value, mode, 0); - arm_emit_load_exclusive (mode, old_out, mem); + arm_emit_load_exclusive (mode, old_out, mem, use_acquire); switch (code) { @@ -26512,12 +27783,15 @@ break; } - arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out)); + arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out), + use_release); x = gen_rtx_NE (VOIDmode, cond, const0_rtx); emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); - arm_post_atomic_barrier (model); + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_post_atomic_barrier (model); } #define MAX_VECT_LEN 16 @@ -27457,4 +28731,12 @@ } +/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ + +static unsigned HOST_WIDE_INT +arm_asan_shadow_offset (void) +{ + return (unsigned HOST_WIDE_INT) 1 << 29; +} + #include "gt-arm.h" --- a/src/gcc/config/arm/t-aprofile +++ b/src/gcc/config/arm/t-aprofile @@ -0,0 +1,177 @@ +# Copyright (C) 2012-2013 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This is a target makefile fragment that attempts to get +# multilibs built for the range of CPU's, FPU's and ABI's that +# are relevant for the A-profile architecture. It should +# not be used in conjunction with another make file fragment and +# assumes --with-arch, --with-cpu, --with-fpu, --with-float, --with-mode +# have their default values during the configure step. We enforce +# this during the top-level configury. + +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = +MULTILIB_REUSE = + +# We have the following hierachy: +# ISA: A32 (.) or T32 (thumb) +# Architecture: ARMv7-A (v7-a), ARMv7VE (v7ve), or ARMv8-A (v8-a). +# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), VFPv4-D16 (fpv4), +# NEON-VFPV4 (simdvfpv4), NEON for ARMv8 (simdv8), or None (.). +# Float-abi: Soft (.), softfp (softfp), or hard (hardfp). + +# We use the option -mcpu=cortex-a7 because we do not yet have march=armv7ve +# or march=armv7a+virt as a command line option for the compiler. +MULTILIB_OPTIONS += mthumb +MULTILIB_DIRNAMES += thumb + +MULTILIB_OPTIONS += march=armv7-a/mcpu=cortex-a7/march=armv8-a +MULTILIB_DIRNAMES += v7-a v7ve v8-a + +MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8 +MULTILIB_DIRNAMES += fpv3 simdv1 fpv4 simdvfpv4 simdv8 + +MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard +MULTILIB_DIRNAMES += softfp hard + +# We don't build no-float libraries with an FPU. +MULTILIB_EXCEPTIONS += *mfpu=vfpv3-d16 +MULTILIB_EXCEPTIONS += *mfpu=neon +MULTILIB_EXCEPTIONS += *mfpu=vfpv4-d16 +MULTILIB_EXCEPTIONS += *mfpu=neon-vfpv4 +MULTILIB_EXCEPTIONS += *mfpu=neon-fp-armv8 + +# We don't build libraries requiring an FPU at the CPU/Arch/ISA level. +MULTILIB_EXCEPTIONS += mfloat-abi=* +MULTILIB_EXCEPTIONS += mfpu=* +MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=* +MULTILIB_EXCEPTIONS += mthumb/mfpu=* +MULTILIB_EXCEPTIONS += *march=armv7-a/mfloat-abi=* +MULTILIB_EXCEPTIONS += *mcpu=cortex-a7/mfloat-abi=* +MULTILIB_EXCEPTIONS += *march=armv8-a/mfloat-abi=* + +# Ensure the correct FPU variants apply to the correct base architectures. +MULTILIB_EXCEPTIONS += *mcpu=cortex-a7/*mfpu=vfpv3-d16* +MULTILIB_EXCEPTIONS += *mcpu=cortex-a7/*mfpu=neon/* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv3-d16* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon/* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=vfpv4-d16* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-vfpv4* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv4-d16* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon-vfpv4* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-fp-armv8* +MULTILIB_EXCEPTIONS += *mcpu=cortex-a7/*mfpu=neon-fp-armv8* + +# CPU Matches +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8 +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9 +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5 +MULTILIB_MATCHES += mcpu?cortex-a7=mcpu?cortex-a15 +MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a53 + +# FPU matches +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16-d16 +MULTILIB_MATCHES += mfpu?vfpv4-d16=mfpu?vfpv4 +MULTILIB_MATCHES += mfpu?neon-fp-armv8=mfpu?crypto-neon-fp-armv8 + + +# Map all requests for vfpv3 with a later CPU to vfpv3-d16 v7-a. +# So if new CPUs are added above at the newer architecture levels, +# do something to map them below here. +# We take the approach of mapping down to v7-a regardless of what +# the fp option is if the integer architecture brings things down. +# This applies to any similar combination at the v7ve and v8-a arch +# levels. + +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mcpu.cortex-a7/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mcpu.cortex-a7/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp + + +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=mcpu.cortex-a7/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=mcpu.cortex-a7/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv8-a/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv8-a/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp + + +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=mcpu.cortex-a7/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=mcpu.cortex-a7/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp + + +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.hard=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.softfp=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.hard=mcpu.cortex-a7/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.softfp=mcpu.cortex-a7/mfpu.neon-fp-armv8/mfloat-abi.softfp + + + +# And again for mthumb. + +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/mcpu.cortex-a7/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/mcpu.cortex-a7/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/mcpu.cortex-a7/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/mcpu.cortex-a7/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/mcpu.cortex-a7/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/mcpu.cortex-a7/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/mcpu.cortex-a7/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/mcpu.cortex-a7/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/mcpu.cortex-a7/mfpu.neon-fp-armv8/mfloat-abi.softfp --- a/src/gcc/config/arm/arm.h +++ b/src/gcc/config/arm/arm.h @@ -49,8 +49,14 @@ builtin_define ("__ARM_FEATURE_QBIT"); \ if (TARGET_ARM_SAT) \ builtin_define ("__ARM_FEATURE_SAT"); \ + if (TARGET_CRYPTO) \ + builtin_define ("__ARM_FEATURE_CRYPTO"); \ if (unaligned_access) \ builtin_define ("__ARM_FEATURE_UNALIGNED"); \ + if (TARGET_CRC32) \ + builtin_define ("__ARM_FEATURE_CRC32"); \ + if (TARGET_32BIT) \ + builtin_define ("__ARM_32BIT_STATE"); \ if (TARGET_ARM_FEATURE_LDREX) \ builtin_define_with_int_value ( \ "__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX); \ @@ -183,6 +189,11 @@ #define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1)) +/* The maximaum number of instructions that is beneficial to + conditionally execute. */ +#undef MAX_CONDITIONAL_EXECUTE +#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute () + extern int arm_target_label; extern int arm_ccfsm_state; extern GTY(()) rtx arm_target_insn; @@ -269,6 +280,8 @@ #define TARGET_LDRD (arm_arch5e && ARM_DOUBLEWORD_ALIGN \ && !TARGET_THUMB1) +#define TARGET_CRC32 (arm_arch_crc) + /* The following two macros concern the ability to execute coprocessor instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently only ever tested when we know we are generating for VFP hardware; we need @@ -350,10 +363,16 @@ #define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \ && arm_arch_notm) +/* Nonzero if this chip supports load-acquire and store-release. */ +#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8) + /* Nonzero if integer division instructions supported. */ #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) +/* Should NEON be used for 64-bits bitops. */ +#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits) + /* True iff the full BPABI is being used. If TARGET_BPABI is true, then TARGET_AAPCS_BASED must be true -- but the converse does not hold. TARGET_BPABI implies the use of the BPABI runtime library, @@ -539,6 +558,13 @@ /* Nonzero if chip supports integer division instruction in Thumb mode. */ extern int arm_arch_thumb_hwdiv; +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +extern int prefer_neon_for_64bits; + +/* Nonzero if chip supports the ARMv8 CRC instructions. */ +extern int arm_arch_crc; + #ifndef TARGET_DEFAULT #define TARGET_DEFAULT (MASK_APCS_FRAME) #endif @@ -630,6 +656,8 @@ #define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) +#define MALLOC_ABI_ALIGNMENT BIGGEST_ALIGNMENT + /* XXX Blah -- this macro is used directly by libobjc. Since it supports no vector modes, cut out the complexity and fall back on BIGGEST_FIELD_ALIGNMENT. */ @@ -1040,7 +1068,7 @@ /* Modes valid for Neon D registers. */ #define VALID_NEON_DREG_MODE(MODE) \ ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ - || (MODE) == V2SFmode || (MODE) == DImode) + || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode) /* Modes valid for Neon Q registers. */ #define VALID_NEON_QREG_MODE(MODE) \ @@ -1130,6 +1158,7 @@ STACK_REG, BASE_REGS, HI_REGS, + CALLER_SAVE_REGS, GENERAL_REGS, CORE_REGS, VFP_D0_D7_REGS, @@ -1156,6 +1185,7 @@ "STACK_REG", \ "BASE_REGS", \ "HI_REGS", \ + "CALLER_SAVE_REGS", \ "GENERAL_REGS", \ "CORE_REGS", \ "VFP_D0_D7_REGS", \ @@ -1181,6 +1211,7 @@ { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \ { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \ + { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \ { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \ { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \ @@ -1643,7 +1674,7 @@ frame. */ #define EXIT_IGNORE_STACK 1 -#define EPILOGUE_USES(REGNO) ((REGNO) == LR_REGNUM) +#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == LR_REGNUM) /* Determine if the epilogue should be output as RTL. You should override this if you define FUNCTION_EXTRA_EPILOGUE. */ --- a/src/gcc/config/arm/cortex-a8.md +++ b/src/gcc/config/arm/cortex-a8.md @@ -85,22 +85,19 @@ ;; (source read in E2 and destination available at the end of that cycle). (define_insn_reservation "cortex_a8_alu" 2 (and (eq_attr "tune" "cortexa8") - (ior (and (and (eq_attr "type" "alu_reg,simple_alu_imm") - (eq_attr "neon_type" "none")) - (not (eq_attr "insn" "mov,mvn"))) - (eq_attr "insn" "clz"))) + (ior (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg") + (eq_attr "neon_type" "none")) + (eq_attr "type" "clz"))) "cortex_a8_default") (define_insn_reservation "cortex_a8_alu_shift" 2 (and (eq_attr "tune" "cortexa8") - (and (eq_attr "type" "simple_alu_shift,alu_shift") - (not (eq_attr "insn" "mov,mvn")))) + (eq_attr "type" "extend,arlo_shift")) "cortex_a8_default") (define_insn_reservation "cortex_a8_alu_shift_reg" 2 (and (eq_attr "tune" "cortexa8") - (and (eq_attr "type" "alu_shift_reg") - (not (eq_attr "insn" "mov,mvn")))) + (eq_attr "type" "arlo_shift_reg")) "cortex_a8_default") ;; Move instructions. @@ -107,8 +104,8 @@ (define_insn_reservation "cortex_a8_mov" 1 (and (eq_attr "tune" "cortexa8") - (and (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg") - (eq_attr "insn" "mov,mvn"))) + (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) "cortex_a8_default") ;; Exceptions to the default latencies for data processing instructions. @@ -139,22 +136,22 @@ (define_insn_reservation "cortex_a8_mul" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "mul,smulxy,smmul")) + (eq_attr "type" "mul,smulxy,smmul")) "cortex_a8_multiply_2") (define_insn_reservation "cortex_a8_mla" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) + (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) "cortex_a8_multiply_2") (define_insn_reservation "cortex_a8_mull" 7 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy")) + (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy")) "cortex_a8_multiply_3") (define_insn_reservation "cortex_a8_smulwy" 5 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smulwy,smuad,smusd")) + (eq_attr "type" "smulwy,smuad,smusd")) "cortex_a8_multiply") ;; smlald and smlsld are multiply-accumulate instructions but do not @@ -162,7 +159,7 @@ ;; cannot go in cortex_a8_mla above. (See below for bypass details.) (define_insn_reservation "cortex_a8_smlald" 6 (and (eq_attr "tune" "cortexa8") - (eq_attr "insn" "smlald,smlsld")) + (eq_attr "type" "smlald,smlsld")) "cortex_a8_multiply_2") ;; A multiply with a single-register result or an MLA, followed by an --- a/src/gcc/config/arm/arm-fixed.md +++ b/src/gcc/config/arm/arm-fixed.md @@ -19,12 +19,13 @@ ;; This file contains ARM instructions that support fixed-point operations. (define_insn "add3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "add%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "add3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -32,7 +33,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sadd%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "usadd3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -40,7 +42,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqadd%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ssadd3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -48,15 +51,17 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qadd%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sub3" - [(set (match_operand:FIXED 0 "s_register_operand" "=r") - (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r") - (match_operand:FIXED 2 "s_register_operand" "r")))] + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] "TARGET_32BIT" "sub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no")]) (define_insn "sub3" [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") @@ -64,7 +69,8 @@ (match_operand:ADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "ssub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "ussub3" [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") @@ -73,7 +79,8 @@ (match_operand:UQADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uqsub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "sssub3" [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") @@ -81,7 +88,8 @@ (match_operand:QADDSUB 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "qsub%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Fractional multiplies. @@ -96,7 +104,7 @@ rtx tmp1 = gen_reg_rtx (HImode); rtx tmp2 = gen_reg_rtx (HImode); rtx tmp3 = gen_reg_rtx (SImode); - + emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1]))); emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2]))); emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2)); @@ -132,7 +140,7 @@ rtx tmp1 = gen_reg_rtx (DImode); rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); - + /* s.31 * s.31 -> s.62 multiplication. */ emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), gen_lowpart (SImode, operands[2]))); @@ -154,7 +162,7 @@ rtx tmp1 = gen_reg_rtx (DImode); rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); - + emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), gen_lowpart (SImode, operands[2]))); emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15))); @@ -173,13 +181,13 @@ rtx tmp1 = gen_reg_rtx (DImode); rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); - + emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), gen_lowpart (SImode, operands[2]))); emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16))); emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16))); emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3)); - + DONE; }) @@ -209,7 +217,7 @@ } /* We have: - 31 high word 0 31 low word 0 + 31 high word 0 31 low word 0 [ S i i .... i i i ] [ i f f f ... f f ] | @@ -221,9 +229,18 @@ output_asm_insn ("ssat\\t%R3, #15, %R3", operands); output_asm_insn ("mrs\\t%4, APSR", operands); output_asm_insn ("tst\\t%4, #1<<27", operands); - if (TARGET_THUMB2) - output_asm_insn ("it\\tne", operands); - output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands); + if (arm_restrict_it) + { + output_asm_insn ("mvn\\t%4, %R3, asr #32", operands); + output_asm_insn ("it\\tne", operands); + output_asm_insn ("movne\\t%Q3, %4", operands); + } + else + { + if (TARGET_THUMB2) + output_asm_insn ("it\\tne", operands); + output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands); + } output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands); output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands); return ""; @@ -231,7 +248,9 @@ [(set_attr "conds" "clob") (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") - (const_int 38) + (if_then_else (match_test "arm_restrict_it") + (const_int 40) + (const_int 38)) (const_int 32)))]) ;; Same goes for this. @@ -257,7 +276,7 @@ } /* We have: - 31 high word 0 31 low word 0 + 31 high word 0 31 low word 0 [ i i i .... i i i ] [ f f f f ... f f ] | @@ -269,9 +288,18 @@ output_asm_insn ("usat\\t%R3, #16, %R3", operands); output_asm_insn ("mrs\\t%4, APSR", operands); output_asm_insn ("tst\\t%4, #1<<27", operands); - if (TARGET_THUMB2) - output_asm_insn ("it\\tne", operands); - output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands); + if (arm_restrict_it) + { + output_asm_insn ("sbfx\\t%4, %R3, #15, #1", operands); + output_asm_insn ("it\\tne", operands); + output_asm_insn ("movne\\t%Q3, %4", operands); + } + else + { + if (TARGET_THUMB2) + output_asm_insn ("it\\tne", operands); + output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands); + } output_asm_insn ("lsr\\t%0, %Q3, #16", operands); output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands); return ""; @@ -279,7 +307,9 @@ [(set_attr "conds" "clob") (set (attr "length") (if_then_else (eq_attr "is_thumb" "yes") - (const_int 38) + (if_then_else (match_test "arm_restrict_it") + (const_int 40) + (const_int 38)) (const_int 32)))]) (define_expand "mulha3" @@ -289,7 +319,7 @@ "TARGET_DSP_MULTIPLY && arm_arch_thumb2" { rtx tmp = gen_reg_rtx (SImode); - + emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]), gen_lowpart (HImode, operands[2]))); emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16), @@ -307,7 +337,7 @@ rtx tmp1 = gen_reg_rtx (SImode); rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); - + /* 8.8 * 8.8 -> 16.16 multiply. */ emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1]))); emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2]))); @@ -326,7 +356,7 @@ { rtx tmp = gen_reg_rtx (SImode); rtx rshift; - + emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]), gen_lowpart (HImode, operands[2]))); @@ -348,12 +378,12 @@ rtx tmp2 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); rtx rshift_tmp = gen_reg_rtx (SImode); - + /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies. Use a normal 32x32->32-bit multiply instead. */ emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1]))); emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2]))); - + emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2)); /* The operand to "usat" is signed, so we cannot use the "..., asr #8" @@ -374,9 +404,9 @@ "TARGET_32BIT && arm_arch6" "ssat%?\\t%0, #16, %2%S1" [(set_attr "predicable" "yes") - (set_attr "insn" "sat") + (set_attr "predicable_short_it" "no") (set_attr "shift" "1") - (set_attr "type" "alu_shift")]) + (set_attr "type" "arlo_shift")]) (define_insn "arm_usatsihi" [(set (match_operand:HI 0 "s_register_operand" "=r") @@ -384,4 +414,5 @@ "TARGET_INT_SIMD" "usat%?\\t%0, #16, %1" [(set_attr "predicable" "yes") - (set_attr "insn" "sat")]) + (set_attr "predicable_short_it" "no")] +) --- a/src/gcc/config/arm/crypto.def +++ b/src/gcc/config/arm/crypto.def @@ -0,0 +1,34 @@ +/* Cryptographic instruction builtin definitions. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +CRYPTO2 (aesd, AESD, v16uqi, v16uqi, v16uqi) +CRYPTO2 (aese, AESE, v16uqi, v16uqi, v16uqi) +CRYPTO1 (aesimc, AESIMC, v16uqi, v16uqi) +CRYPTO1 (aesmc, AESMC, v16uqi, v16uqi) +CRYPTO1 (sha1h, SHA1H, v4usi, v4usi) +CRYPTO2 (sha1su1, SHA1SU1, v4usi, v4usi, v4usi) +CRYPTO2 (sha256su0, SHA256SU0, v4usi, v4usi, v4usi) +CRYPTO3 (sha1c, SHA1C, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1m, SHA1M, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1p, SHA1P, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1su0, SHA1SU0, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256h, SHA256H, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256h2, SHA256H2, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256su1, SHA256SU1, v4usi, v4usi, v4usi, v4usi) +CRYPTO2 (vmullp64, VMULLP64, uti, udi, udi) --- a/src/gcc/config/arm/unspecs.md +++ b/src/gcc/config/arm/unspecs.md @@ -139,6 +139,10 @@ VUNSPEC_ATOMIC_OP ; Represent an atomic operation. VUNSPEC_LL ; Represent a load-register-exclusive. VUNSPEC_SC ; Represent a store-register-exclusive. + VUNSPEC_LAX ; Represent a load-register-acquire-exclusive. + VUNSPEC_SLX ; Represent a store-register-release-exclusive. + VUNSPEC_LDA ; Represent a store-register-acquire. + VUNSPEC_STL ; Represent a store-register-release. ]) ;; Enumerators for NEON unspecs. @@ -145,6 +149,27 @@ (define_c_enum "unspec" [ UNSPEC_ASHIFT_SIGNED UNSPEC_ASHIFT_UNSIGNED + UNSPEC_CRC32B + UNSPEC_CRC32H + UNSPEC_CRC32W + UNSPEC_CRC32CB + UNSPEC_CRC32CH + UNSPEC_CRC32CW + UNSPEC_AESD + UNSPEC_AESE + UNSPEC_AESIMC + UNSPEC_AESMC + UNSPEC_SHA1C + UNSPEC_SHA1M + UNSPEC_SHA1P + UNSPEC_SHA1H + UNSPEC_SHA1SU0 + UNSPEC_SHA1SU1 + UNSPEC_SHA256H + UNSPEC_SHA256H2 + UNSPEC_SHA256SU0 + UNSPEC_SHA256SU1 + UNSPEC_VMULLP64 UNSPEC_LOAD_COUNT UNSPEC_VABD UNSPEC_VABDL --- a/src/gcc/config/arm/cortex-m4.md +++ b/src/gcc/config/arm/cortex-m4.md @@ -31,7 +31,12 @@ ;; ALU and multiply is one cycle. (define_insn_reservation "cortex_m4_alu" 1 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult")) + (ior (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,\ + arlo_shift,arlo_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "cortex_m4_ex") ;; Byte, half-word and word load is two cycles. --- a/src/gcc/config/arm/linux-eabi.h +++ b/src/gcc/config/arm/linux-eabi.h @@ -84,10 +84,14 @@ LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) +#undef ASAN_CC1_SPEC +#define ASAN_CC1_SPEC "%{fsanitize=*:-funwind-tables}" + #undef CC1_SPEC #define CC1_SPEC \ - LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC, \ - GNU_USER_TARGET_CC1_SPEC " " ANDROID_CC1_SPEC) + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC " " \ + ANDROID_CC1_SPEC) #define CC1PLUS_SPEC \ LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) @@ -95,7 +99,7 @@ #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ --- a/src/gcc/config/arm/arm-cores.def +++ b/src/gcc/config/arm/arm-cores.def @@ -129,9 +129,11 @@ ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, cortex) ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, cortex_a9) ARM_CORE("cortex-a15", cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) +ARM_CORE("cortex-a53", cortexa53, 8A, FL_LDSCHED, cortex_a5) ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, cortex) ARM_CORE("cortex-r5", cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) +ARM_CORE("cortex-r7", cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, cortex) ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, cortex) ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, v6m) --- a/src/gcc/config/arm/cortex-r4.md +++ b/src/gcc/config/arm/cortex-r4.md @@ -78,24 +78,22 @@ ;; for the purposes of the dual-issue constraints above. (define_insn_reservation "cortex_r4_alu" 2 (and (eq_attr "tune_cortexr4" "yes") - (and (eq_attr "type" "alu_reg,simple_alu_imm") - (not (eq_attr "insn" "mov")))) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,mvn_imm,mvn_reg")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_mov" 2 (and (eq_attr "tune_cortexr4" "yes") - (and (eq_attr "type" "alu_reg,simple_alu_imm") - (eq_attr "insn" "mov"))) + (eq_attr "type" "mov_imm,mov_reg")) "cortex_r4_mov") (define_insn_reservation "cortex_r4_alu_shift" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "simple_alu_shift,alu_shift")) + (eq_attr "type" "extend,arlo_shift,mov_shift,mvn_shift")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_alu_shift_reg" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "alu_shift_reg")) + (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) "cortex_r4_alu_shift_reg") ;; An ALU instruction followed by an ALU instruction with no early dep. @@ -128,32 +126,32 @@ (define_insn_reservation "cortex_r4_mul_4" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "mul,smmul")) + (eq_attr "type" "mul,smmul")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mul_3" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smulxy,smulwy,smuad,smusd")) + (eq_attr "type" "smulxy,smulwy,smuad,smusd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mla_4" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "mla,smmla")) + (eq_attr "type" "mla,smmla")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mla_3" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd")) + (eq_attr "type" "smlaxy,smlawy,smlad,smlsd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_smlald" 3 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smlald,smlsld")) + (eq_attr "type" "smlald,smlsld")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mull" 4 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "smull,umull,umlal,umaal")) + (eq_attr "type" "smull,umull,umlal,umaal")) "cortex_r4_mul_2") ;; A multiply or an MLA with a single-register result, followed by an @@ -196,12 +194,12 @@ ;; This gives a latency of nine for udiv and ten for sdiv. (define_insn_reservation "cortex_r4_udiv" 9 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "udiv")) + (eq_attr "type" "udiv")) "cortex_r4_div_9") (define_insn_reservation "cortex_r4_sdiv" 10 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "insn" "sdiv")) + (eq_attr "type" "sdiv")) "cortex_r4_div_10") ;; Branches. We assume correct prediction. --- a/src/gcc/config/arm/arm-tune.md +++ b/src/gcc/config/arm/arm-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from arm-cores.def (define_attr "tune" - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexr5,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4" + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,genericv7a,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexr4,cortexr4f,cortexr5,cortexr7,cortexm4,cortexm3,cortexm1,cortexm0,cortexm0plus,marvell_pj4" (const (symbol_ref "((enum attr_tune) arm_tune)"))) --- a/src/gcc/config/arm/arm_acle.h +++ b/src/gcc/config/arm/arm_acle.h @@ -0,0 +1,100 @@ +/* ARM Non-NEON ACLE intrinsics include file. + + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_ACLE_H +#define _GCC_ARM_ACLE_H + +#include +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __ARM_FEATURE_CRC32 +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32b (uint32_t __a, uint8_t __b) +{ + return __builtin_arm_crc32b (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32h (uint32_t __a, uint16_t __b) +{ + return __builtin_arm_crc32h (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32w (uint32_t __a, uint32_t __b) +{ + return __builtin_arm_crc32w (__a, __b); +} + +#ifdef __ARM_32BIT_STATE +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32d (uint32_t __a, uint64_t __b) +{ + uint32_t __d; + + __d = __crc32w (__crc32w (__a, __b & 0xffffffffULL), __b >> 32); + return __d; +} +#endif + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cb (uint32_t __a, uint8_t __b) +{ + return __builtin_arm_crc32cb (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32ch (uint32_t __a, uint16_t __b) +{ + return __builtin_arm_crc32ch (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cw (uint32_t __a, uint32_t __b) +{ + return __builtin_arm_crc32cw (__a, __b); +} + +#ifdef __ARM_32BIT_STATE +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cd (uint32_t __a, uint64_t __b) +{ + uint32_t __d; + + __d = __crc32cw (__crc32cw (__a, __b & 0xffffffffULL), __b >> 32); + return __d; +} +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif --- a/src/gcc/config/arm/arm-protos.h +++ b/src/gcc/config/arm/arm-protos.h @@ -24,12 +24,13 @@ extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *); extern int use_return_insn (int, rtx); +extern bool use_simple_return_p (void); extern enum reg_class arm_regno_class (int); extern void arm_load_pic_register (unsigned long); extern int arm_volatile_func (void); extern void arm_expand_prologue (void); extern void arm_expand_epilogue (bool); -extern void thumb2_expand_return (void); +extern void thumb2_expand_return (bool); extern const char *arm_strip_name_encoding (const char *); extern void arm_asm_output_labelref (FILE *, const char *); extern void thumb2_asm_output_opcode (FILE *); @@ -78,6 +79,7 @@ extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, rtx (*) (rtx, rtx, rtx)); extern rtx neon_make_constant (rtx); +extern tree arm_builtin_vectorized_function (tree, tree, tree); extern void neon_expand_vector_init (rtx, rtx); extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); @@ -117,7 +119,9 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); +extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern int arm_gen_movmemqi (rtx *); +extern bool gen_movmem_ldrd_strd (rtx *); extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, HOST_WIDE_INT); @@ -224,6 +228,8 @@ extern void arm_order_regs_for_local_alloc (void); +extern int arm_max_conditional_execute (); + /* Vectorizer cost model implementation. */ struct cpu_vec_costs { const int scalar_stmt_cost; /* Cost of any scalar operation, excluding @@ -253,8 +259,7 @@ bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); int constant_limit; - /* Maximum number of instructions to conditionalise in - arm_final_prescan_insn. */ + /* Maximum number of instructions to conditionalise. */ int max_insns_skipped; int num_prefetch_slots; int l1_cache_size; @@ -269,6 +274,8 @@ bool logical_op_non_short_circuit[2]; /* Vectorizer costs. */ const struct cpu_vec_costs* vec_costs; + /* Prefer Neon for 64-bit bitops. */ + bool prefer_neon_for_64bits; }; extern const struct tune_params *current_tune; --- a/src/gcc/config/arm/vfp.md +++ b/src/gcc/config/arm/vfp.md @@ -18,31 +18,6 @@ ;; along with GCC; see the file COPYING3. If not see ;; . */ -;; The VFP "type" attributes differ from those used in the FPA model. -;; fcpys Single precision cpy. -;; ffariths Single precision abs, neg. -;; ffarithd Double precision abs, neg, cpy. -;; fadds Single precision add/sub. -;; faddd Double precision add/sub. -;; fconsts Single precision load immediate. -;; fconstd Double precision load immediate. -;; fcmps Single precision comparison. -;; fcmpd Double precision comparison. -;; fmuls Single precision multiply. -;; fmuld Double precision multiply. -;; fmacs Single precision multiply-accumulate. -;; fmacd Double precision multiply-accumulate. -;; ffmas Single precision fused multiply-accumulate. -;; ffmad Double precision fused multiply-accumulate. -;; fdivs Single precision sqrt or division. -;; fdivd Double precision sqrt or division. -;; f_flag fmstat operation -;; f_load[sd] Floating point load from memory. -;; f_store[sd] Floating point store to memory. -;; f_2_r Transfer vfp to arm reg. -;; r_2_f Transfer arm to vfp reg. -;; f_cvt Convert floating<->integral - ;; SImode moves ;; ??? For now do not allow loading constants into vfp regs. This causes ;; problems because small constants get converted into adds. @@ -78,9 +53,8 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,simple_alu_imm,simple_alu_imm,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") (set_attr "neon_type" "*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") - (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] ) @@ -87,9 +61,12 @@ ;; See thumb2.md:thumb2_movsi_insn for an explanation of the split ;; high/low register alternatives for loads and stores here. +;; The l/Py alternative should come after r/I to ensure that the short variant +;; is chosen with length 2 when the instruction is predicated for +;; arm_restrict_it. (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT && ( s_register_operand (operands[0], SImode) || s_register_operand (operands[1], SImode))" @@ -96,25 +73,27 @@ "* switch (which_alternative) { - case 0: case 1: + case 0: + case 1: + case 2: return \"mov%?\\t%0, %1\"; - case 2: + case 3: return \"mvn%?\\t%0, #%B1\"; - case 3: + case 4: return \"movw%?\\t%0, %1\"; - case 4: case 5: + case 6: return \"ldr%?\\t%0, %1\"; - case 6: case 7: + case 8: return \"str%?\\t%1, %0\"; - case 8: + case 9: return \"fmsr%?\\t%0, %1\\t%@ int\"; - case 9: + case 10: return \"fmrs%?\\t%0, %1\\t%@ int\"; - case 10: + case 11: return \"fcpys%?\\t%0, %1\\t%@ int\"; - case 11: case 12: + case 12: case 13: return output_move_vfp (operands); default: gcc_unreachable (); @@ -121,11 +100,12 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") - (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") - (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*") - (set_attr "pool_range" "*,*,*,*,1018,4094,*,*,*,*,*,1018,*") - (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") + (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] ) @@ -132,8 +112,8 @@ ;; DImode moves (define_insn "*movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,r,w,w, Uv") - (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))] + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv") + (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 && ( register_operand (operands[0], DImode) || register_operand (operands[1], DImode)) @@ -375,9 +355,8 @@ " [(set_attr "predicable" "yes") (set_attr "type" - "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg") (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") - (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] ) @@ -412,15 +391,14 @@ } " [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" - "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,mov_reg") (set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*") - (set_attr "insn" "*,*,*,*,*,*,*,*,mov") (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] ) - ;; DFmode moves (define_insn "*movdf_vfp" @@ -550,7 +528,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it" "@ it\\t%D3\;fcpys%D3\\t%0, %2 it\\t%d3\;fcpys%d3\\t%0, %1 @@ -598,7 +576,7 @@ [(match_operand 4 "cc_register" "") (const_int 0)]) (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it" "@ it\\t%D3\;fcpyd%D3\\t%P0, %P2 it\\t%d3\;fcpyd%d3\\t%P0, %P1 @@ -624,6 +602,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fabss%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffariths")] ) @@ -633,6 +612,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fabsd%?\\t%P0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffarithd")] ) @@ -644,6 +624,7 @@ fnegs%?\\t%0, %1 eor%?\\t%0, %1, #-2147483648" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffariths")] ) @@ -689,6 +670,7 @@ } " [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "length" "4,4,8") (set_attr "type" "ffarithd")] ) @@ -703,6 +685,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fadds%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fadds")] ) @@ -713,6 +696,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "faddd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "faddd")] ) @@ -724,6 +708,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsubs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fadds")] ) @@ -734,6 +719,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsubd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "faddd")] ) @@ -747,6 +733,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fdivs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivs")] ) @@ -757,6 +744,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fdivd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivd")] ) @@ -770,6 +758,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmuls%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuls")] ) @@ -780,6 +769,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuld")] ) @@ -790,6 +780,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmuls%?\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuls")] ) @@ -800,6 +791,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmuld%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmuld")] ) @@ -815,6 +807,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -826,6 +819,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -838,6 +832,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -849,6 +844,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -861,6 +857,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -872,6 +869,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmacd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -886,6 +884,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fnmscs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacs")] ) @@ -898,6 +897,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fnmscd%?\\t%P0, %P2, %P3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fmacd")] ) @@ -911,6 +911,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfma%?.\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma")] ) @@ -923,6 +924,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfms%?.\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma")] ) @@ -934,6 +936,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfnms%?.\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma")] ) @@ -946,6 +949,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" "vfnma%?.\\t%0, %1, %2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "ffma")] ) @@ -958,6 +962,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtds%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -967,6 +972,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fcvtsd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -976,6 +982,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" "vcvtb%?.f32.f16\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -985,6 +992,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" "vcvtb%?.f16.f32\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -994,6 +1002,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "ftosizs%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1003,6 +1012,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftosizd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1013,6 +1023,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "ftouizs%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1022,6 +1033,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "ftouizd%?\\t%0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1032,6 +1044,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsitos%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1041,6 +1054,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsitod%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1051,6 +1065,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fuitos%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1060,6 +1075,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fuitod%?\\t%P0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "f_cvt")] ) @@ -1072,6 +1088,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsqrts%?\\t%0, %1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivs")] ) @@ -1081,6 +1098,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "fsqrtd%?\\t%P0, %P1" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fdivd")] ) @@ -1168,6 +1186,7 @@ fcmps%?\\t%0, %1 fcmpzs%?\\t%0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmps")] ) @@ -1180,6 +1199,7 @@ fcmpes%?\\t%0, %1 fcmpezs%?\\t%0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmps")] ) @@ -1192,6 +1212,7 @@ fcmpd%?\\t%P0, %P1 fcmpzd%?\\t%P0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmpd")] ) @@ -1204,6 +1225,7 @@ fcmped%?\\t%P0, %P1 fcmpezd%?\\t%P0" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "type" "fcmpd")] ) @@ -1264,6 +1286,7 @@ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " "vrint%?.\\t%0, %1" [(set_attr "predicable" "") + (set_attr "predicable_short_it" "no") (set_attr "conds" "") (set_attr "type" "f_rint")] ) --- a/src/gcc/config/arm/t-linux-eabi +++ b/src/gcc/config/arm/t-linux-eabi @@ -18,6 +18,8 @@ # We do not build a Thumb multilib for Linux because the definition of # CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode. +# If you set MULTILIB_OPTIONS to a non-empty value you should also set +# MULTILIB_DEFAULTS in linux-elf.h. MULTILIB_OPTIONS = MULTILIB_DIRNAMES = --- a/src/gcc/config/arm/neon.md +++ b/src/gcc/config/arm/neon.md @@ -61,8 +61,7 @@ } } [(set_attr "neon_type" "neon_int_1,*,neon_vmov,*,neon_mrrc,neon_mcr_2_mcrr,*,*,*") - (set_attr "type" "*,f_stored,*,f_loadd,*,*,alu_reg,load2,store2") - (set_attr "insn" "*,*,*,*,*,*,mov,*,*") + (set_attr "type" "*,f_stored,*,f_loadd,*,*,mov_reg,load2,store2") (set_attr "length" "4,4,4,4,4,4,8,8,8") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") @@ -107,8 +106,7 @@ } [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_vmov,neon_ldm_2,\ neon_mrrc,neon_mcr_2_mcrr,*,*,*") - (set_attr "type" "*,*,*,*,*,*,alu_reg,load4,store4") - (set_attr "insn" "*,*,*,*,*,*,mov,*,*") + (set_attr "type" "*,*,*,*,*,*,mov_reg,load4,store4") (set_attr "length" "4,8,4,8,8,8,16,8,16") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") @@ -487,7 +485,7 @@ [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*") (set_attr "conds" "*,clob,clob,*,clob,clob,clob") (set_attr "length" "*,8,8,*,8,8,8") - (set_attr "arch" "nota8,*,*,onlya8,*,*,*")] + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] ) (define_insn "*sub3_neon" @@ -524,7 +522,7 @@ [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2") (set_attr "conds" "*,clob,clob,clob,*") (set_attr "length" "*,8,8,8,*") - (set_attr "arch" "nota8,*,*,*,onlya8")] + (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] ) (define_insn "*mul3_neon" @@ -679,29 +677,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") - (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") - (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))] - "TARGET_NEON" -{ - switch (which_alternative) - { - case 0: /* fall through */ - case 4: return "vorr\t%P0, %P1, %P2"; - case 1: /* fall through */ - case 5: return neon_output_logic_immediate ("vorr", &operands[2], - DImode, 0, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; - default: gcc_unreachable (); - } -} - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] -) - ;; The concrete forms of the Neon immediate-logic instructions are vbic and ;; vorr. We support the pseudo-instruction vand instead, because that ;; corresponds to the canonical form the middle-end expects to use for @@ -724,29 +699,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "anddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") - (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") - (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))] - "TARGET_NEON" -{ - switch (which_alternative) - { - case 0: /* fall through */ - case 4: return "vand\t%P0, %P1, %P2"; - case 1: /* fall through */ - case 5: return neon_output_logic_immediate ("vand", &operands[2], - DImode, 1, VALID_NEON_QREG_MODE (DImode)); - case 2: return "#"; - case 3: return "#"; - default: gcc_unreachable (); - } -} - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") - (set_attr "length" "*,*,8,8,*,*") - (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] -) - (define_insn "orn3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) @@ -828,21 +780,6 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w") - (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w") - (match_operand:DI 2 "s_register_operand" "w,r,r,w")))] - "TARGET_NEON" - "@ - veor\t%P0, %P1, %P2 - # - # - veor\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") - (set_attr "length" "*,8,8,*") - (set_attr "arch" "nota8,*,*,onlya8")] -) - (define_insn "one_cmpl2" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] @@ -1162,7 +1099,7 @@ } DONE; }" - [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") (set_attr "opt" "*,*,speed,speed,*,*")] ) @@ -1263,7 +1200,7 @@ DONE; }" - [(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") (set_attr "opt" "*,*,speed,speed,*,*")] ) @@ -3305,6 +3242,24 @@ (const_string "neon_fp_vadd_qqq_vabs_qq")))] ) +(define_insn "neon_vcvtv4sfv4hf" + [(set (match_operand:V4SF 0 "s_register_operand" "=w") + (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f32.f16\t%q0, %P1" + [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] +) + +(define_insn "neon_vcvtv4hfv4sf" + [(set (match_operand:V4HF 0 "s_register_operand" "=w") + (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f16.f32\t%P0, %q1" + [(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")] +) + (define_insn "neon_vcvt_n" [(set (match_operand: 0 "s_register_operand" "=w") (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") @@ -4545,9 +4500,19 @@ DONE; }) +(define_expand "neon_vreinterpretti" + [(match_operand:TI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + + (define_expand "neon_vreinterpretv16qi" [(match_operand:V16QI 0 "s_register_operand" "") - (match_operand:VQX 1 "s_register_operand" "")] + (match_operand:VQXMOV 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4556,7 +4521,7 @@ (define_expand "neon_vreinterpretv8hi" [(match_operand:V8HI 0 "s_register_operand" "") - (match_operand:VQX 1 "s_register_operand" "")] + (match_operand:VQXMOV 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4565,7 +4530,7 @@ (define_expand "neon_vreinterpretv4si" [(match_operand:V4SI 0 "s_register_operand" "") - (match_operand:VQX 1 "s_register_operand" "")] + (match_operand:VQXMOV 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4574,7 +4539,7 @@ (define_expand "neon_vreinterpretv4sf" [(match_operand:V4SF 0 "s_register_operand" "") - (match_operand:VQX 1 "s_register_operand" "")] + (match_operand:VQXMOV 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4583,7 +4548,7 @@ (define_expand "neon_vreinterpretv2di" [(match_operand:V2DI 0 "s_register_operand" "") - (match_operand:VQX 1 "s_register_operand" "")] + (match_operand:VQXMOV 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4660,21 +4625,22 @@ ) (define_insn "neon_vld1_dup" - [(set (match_operand:VDX 0 "s_register_operand" "=w") - (vec_duplicate:VDX (match_operand: 1 "neon_struct_operand" "Um")))] + [(set (match_operand:VD 0 "s_register_operand" "=w") + (vec_duplicate:VD (match_operand: 1 "neon_struct_operand" "Um")))] "TARGET_NEON" -{ - if (GET_MODE_NUNITS (mode) > 1) - return "vld1.\t{%P0[]}, %A1"; - else - return "vld1.\t%h0, %A1"; -} - [(set (attr "neon_type") - (if_then_else (gt (const_string "") (const_string "1")) - (const_string "neon_vld2_2_regs_vld1_vld2_all_lanes") - (const_string "neon_vld1_1_2_regs")))] + "vld1.\t{%P0[]}, %A1" + [(set_attr "neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")] ) +;; Special case for DImode. Treat it exactly like a simple load. +(define_expand "neon_vld1_dupdi" + [(set (match_operand:DI 0 "s_register_operand" "") + (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] + UNSPEC_VLD1))] + "TARGET_NEON" + "" +) + (define_insn "neon_vld1_dup" [(set (match_operand:VQ 0 "s_register_operand" "=w") (vec_duplicate:VQ (match_operand: 1 "neon_struct_operand" "Um")))] @@ -5635,7 +5601,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_and3 (operands[0], operands[1], operands[2])); + emit_insn (gen_and3 (operands[0], operands[1], operands[2])); DONE; }) @@ -5646,7 +5612,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); + emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); DONE; }) @@ -5657,7 +5623,7 @@ (match_operand:SI 3 "immediate_operand" "")] "TARGET_NEON" { - emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); + emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); DONE; }) --- a/src/gcc/config/arm/ldmstm.md +++ b/src/gcc/config/arm/ldmstm.md @@ -37,7 +37,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia" [(match_parallel 0 "load_multiple_operation" @@ -74,7 +75,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm4_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -108,7 +110,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -125,7 +128,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(ia%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm4_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -302,7 +306,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm4_db_update" [(match_parallel 0 "load_multiple_operation" @@ -323,7 +328,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "ldm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "load4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db" [(match_parallel 0 "store_multiple_operation" @@ -338,7 +344,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%5, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm4_db_update" [(match_parallel 0 "store_multiple_operation" @@ -355,7 +362,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" "stm%(db%)\t%5!, {%1, %2, %3, %4}" [(set_attr "type" "store4") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -477,7 +485,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia" [(match_parallel 0 "load_multiple_operation" @@ -508,7 +517,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm3_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -537,7 +547,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -552,7 +563,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(ia%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm3_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -704,7 +716,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm3_db_update" [(match_parallel 0 "load_multiple_operation" @@ -722,7 +735,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "ldm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "load3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db" [(match_parallel 0 "store_multiple_operation" @@ -735,7 +749,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%4, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm3_db_update" [(match_parallel 0 "store_multiple_operation" @@ -750,7 +765,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" "stm%(db%)\t%4!, {%1, %2, %3}" [(set_attr "type" "store3") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") @@ -855,7 +871,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia" [(match_parallel 0 "load_multiple_operation" @@ -880,7 +897,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_ldm2_ia_update" [(match_parallel 0 "load_multiple_operation" @@ -904,7 +922,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(ia%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -917,7 +936,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(ia%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb_stm2_ia_update" [(match_parallel 0 "store_multiple_operation" @@ -1044,7 +1064,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "ldm%(db%)\t%3, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*ldm2_db_update" [(match_parallel 0 "load_multiple_operation" @@ -1059,7 +1080,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "ldm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db" [(match_parallel 0 "store_multiple_operation" @@ -1070,7 +1092,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" "stm%(db%)\t%3, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*stm2_db_update" [(match_parallel 0 "store_multiple_operation" @@ -1083,7 +1106,8 @@ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" "stm%(db%)\t%3!, {%1, %2}" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_peephole2 [(set (match_operand:SI 0 "s_register_operand" "") --- a/src/gcc/config/arm/arm_neon_builtins.def +++ b/src/gcc/config/arm/arm_neon_builtins.def @@ -0,0 +1,213 @@ +/* NEON builtin definitions for ARM. + Copyright (C) 2013 + Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +VAR10 (BINOP, vadd, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), +VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), +VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (BINOP, vaddhn, v8hi, v4si, v2di), +VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si), +VAR2 (TERNOP, vfma, v2sf, v4sf), +VAR2 (TERNOP, vfms, v2sf, v4sf), +VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si), +VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si), +VAR2 (TERNOP, vqdmlal, v4hi, v2si), +VAR2 (TERNOP, vqdmlsl, v4hi, v2si), +VAR3 (BINOP, vmull, v8qi, v4hi, v2si), +VAR2 (SCALARMULL, vmull_n, v4hi, v2si), +VAR2 (LANEMULL, vmull_lane, v4hi, v2si), +VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si), +VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si), +VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si), +VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si), +VAR2 (BINOP, vqdmull, v4hi, v2si), +VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), +VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), +VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), +VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR3 (BINOP, vsubhn, v8hi, v4si, v2di), +VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR2 (BINOP, vcage, v2sf, v4sf), +VAR2 (BINOP, vcagt, v2sf, v4sf), +VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (BINOP, vabdl, v8qi, v4hi, v2si), +VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR3 (TERNOP, vabal, v8qi, v4hi, v2si), +VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf), +VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf), +VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf), +VAR2 (BINOP, vrecps, v2sf, v4sf), +VAR2 (BINOP, vrsqrts, v2sf, v4sf), +VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR2 (UNOP, vcnt, v8qi, v16qi), +VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), +VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), +VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si), + /* FIXME: vget_lane supports more variants than this! */ +VAR10 (GETLANE, vget_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (SETLANE, vset_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di), +VAR10 (DUP, vdup_n, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (DUPLANE, vdup_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di), +VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (UNOP, vmovn, v8hi, v4si, v2di), +VAR3 (UNOP, vqmovn, v8hi, v4si, v2di), +VAR3 (UNOP, vqmovun, v8hi, v4si, v2di), +VAR3 (UNOP, vmovl, v8qi, v4hi, v2si), +VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (LANEMAC, vmlal_lane, v4hi, v2si), +VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si), +VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si), +VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si), +VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (SCALARMAC, vmlal_n, v4hi, v2si), +VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si), +VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si), +VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si), +VAR10 (BINOP, vext, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), +VAR2 (UNOP, vrev16, v8qi, v16qi), +VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), +VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), +VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf), +VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf), +VAR10 (SELECT, vbsl, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR2 (RINT, vrintn, v2sf, v4sf), +VAR2 (RINT, vrinta, v2sf, v4sf), +VAR2 (RINT, vrintp, v2sf, v4sf), +VAR2 (RINT, vrintm, v2sf, v4sf), +VAR2 (RINT, vrintz, v2sf, v4sf), +VAR2 (RINT, vrintx, v2sf, v4sf), +VAR1 (VTBL, vtbl1, v8qi), +VAR1 (VTBL, vtbl2, v8qi), +VAR1 (VTBL, vtbl3, v8qi), +VAR1 (VTBL, vtbl4, v8qi), +VAR1 (VTBX, vtbx1, v8qi), +VAR1 (VTBX, vtbx2, v8qi), +VAR1 (VTBX, vtbx3, v8qi), +VAR1 (VTBX, vtbx4, v8qi), +VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di), +VAR6 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR10 (LOAD1, vld1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOAD1LANE, vld1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOAD1, vld1_dup, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (STORE1, vst1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (STORE1LANE, vst1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR9 (LOADSTRUCT, + vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst2, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR9 (LOADSTRUCT, + vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst3, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR9 (LOADSTRUCT, vld4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR10 (LOGICBINOP, vand, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vorr, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (BINOP, veor, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vbic, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vorn, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) --- a/src/gcc/config/arm/neon.ml +++ b/src/gcc/config/arm/neon.ml @@ -21,8 +21,8 @@ . *) (* Shorthand types for vector elements. *) -type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 - | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts +type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16 + | P64 | P128 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | Cast of elts * elts | NoElts type eltclass = Signed | Unsigned | Float | Poly | Int | Bits @@ -37,6 +37,7 @@ | T_uint16x4 | T_uint16x8 | T_uint32x2 | T_uint32x4 | T_uint64x1 | T_uint64x2 + | T_float16x4 | T_float32x2 | T_float32x4 | T_poly8x8 | T_poly8x16 | T_poly16x4 | T_poly16x8 @@ -46,11 +47,15 @@ | T_uint8 | T_uint16 | T_uint32 | T_uint64 | T_poly8 | T_poly16 - | T_float32 | T_arrayof of int * vectype + | T_poly64 | T_poly64x1 + | T_poly64x2 | T_poly128 + | T_float16 | T_float32 + | T_arrayof of int * vectype | T_ptrto of vectype | T_const of vectype | T_void | T_intQI | T_intHI | T_intSI - | T_intDI | T_floatSF + | T_intDI | T_intTI + | T_floatHF | T_floatSF (* The meanings of the following are: TImode : "Tetra", two registers (four words). @@ -92,8 +97,8 @@ | Arity3 of vectype * vectype * vectype * vectype | Arity4 of vectype * vectype * vectype * vectype * vectype -type vecmode = V8QI | V4HI | V2SI | V2SF | DI - | V16QI | V8HI | V4SI | V4SF | V2DI +type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI + | V16QI | V8HI | V4SI | V4SF | V2DI | TI | QI | HI | SI | SF type opcode = @@ -284,18 +289,23 @@ | Fixed_core_reg (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) | Requires_feature of string + (* Mark that the intrinsic requires a particular architecture version. *) | Requires_arch of int + (* Mark that the intrinsic requires a particular bit in __ARM_FP to + be set. *) + | Requires_FP_bit of int exception MixedMode of elts * elts let rec elt_width = function S8 | U8 | P8 | I8 | B8 -> 8 - | S16 | U16 | P16 | I16 | B16 -> 16 + | S16 | U16 | P16 | I16 | B16 | F16 -> 16 | S32 | F32 | U32 | I32 | B32 -> 32 - | S64 | U64 | I64 | B64 -> 64 + | S64 | U64 | P64 | I64 | B64 -> 64 + | P128 -> 128 | Conv (a, b) -> let wa = elt_width a and wb = elt_width b in - if wa = wb then wa else failwith "element width?" + if wa = wb then wa else raise (MixedMode (a, b)) | Cast (a, b) -> raise (MixedMode (a, b)) | NoElts -> failwith "No elts" @@ -302,8 +312,8 @@ let rec elt_class = function S8 | S16 | S32 | S64 -> Signed | U8 | U16 | U32 | U64 -> Unsigned - | P8 | P16 -> Poly - | F32 -> Float + | P8 | P16 | P64 | P128 -> Poly + | F16 | F32 -> Float | I8 | I16 | I32 | I64 -> Int | B8 | B16 | B32 | B64 -> Bits | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) @@ -315,6 +325,7 @@ | Signed, 16 -> S16 | Signed, 32 -> S32 | Signed, 64 -> S64 + | Float, 16 -> F16 | Float, 32 -> F32 | Unsigned, 8 -> U8 | Unsigned, 16 -> U16 @@ -322,6 +333,8 @@ | Unsigned, 64 -> U64 | Poly, 8 -> P8 | Poly, 16 -> P16 + | Poly, 64 -> P64 + | Poly, 128 -> P128 | Int, 8 -> I8 | Int, 16 -> I16 | Int, 32 -> I32 @@ -384,20 +397,28 @@ in scan ((Array.length operands) - 1) -let rec mode_of_elt elt shape = +(* Find a vecmode from a shape_elt ELT for an instruction with shape_form + SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode + for the given argument position, else determine which argument to return a + mode for automatically. *) + +let rec mode_of_elt ?argpos elt shape = let flt = match elt_class elt with Float | ConvClass(_, Float) -> true | _ -> false in let idx = match elt_width elt with - 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 + 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | 128 -> 4 | _ -> failwith "Bad element width" in match shape with All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> - [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) + if flt then + [| V8QI; V4HF; V2SF; DI |].(idx) + else + [| V8QI; V4HI; V2SI; DI |].(idx) | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> - [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) + [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI; TI|].(idx) | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> [| QI; HI; if flt then SF else SI; DI |].(idx) | Long | Wide | Wide_lane | Wide_scalar @@ -404,7 +425,11 @@ | Long_imm -> [| V8QI; V4HI; V2SI; DI |].(idx) | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) - | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) + | Use_operands ops -> + begin match argpos with + None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops))) + | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos))) + end | _ -> failwith "invalid shape" (* Modify an element type dependent on the shape of the instruction and the @@ -454,10 +479,13 @@ | U16 -> T_uint16x4 | U32 -> T_uint32x2 | U64 -> T_uint64x1 + | P64 -> T_poly64x1 + | P128 -> T_poly128 + | F16 -> T_float16x4 | F32 -> T_float32x2 | P8 -> T_poly8x8 | P16 -> T_poly16x4 - | _ -> failwith "Bad elt type" + | _ -> failwith "Bad elt type for Dreg" end | Qreg -> begin match elt with @@ -472,7 +500,9 @@ | F32 -> T_float32x4 | P8 -> T_poly8x16 | P16 -> T_poly16x8 - | _ -> failwith "Bad elt type" + | P64 -> T_poly64x2 + | P128 -> T_poly128 + | _ -> failwith "Bad elt type for Qreg" end | Corereg -> begin match elt with @@ -486,8 +516,10 @@ | U64 -> T_uint64 | P8 -> T_poly8 | P16 -> T_poly16 + | P64 -> T_poly64 + | P128 -> T_poly128 | F32 -> T_float32 - | _ -> failwith "Bad elt type" + | _ -> failwith "Bad elt type for Corereg" end | Immed -> T_immediate (0, 0) @@ -506,10 +538,10 @@ let vectype_size = function T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 - | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 + | T_float32x2 | T_poly8x8 | T_poly64x1 | T_poly16x4 | T_float16x4 -> 64 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 - | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 + | T_float32x4 | T_poly8x16 | T_poly64x2 | T_poly16x8 -> 128 | _ -> raise Not_found let inttype_for_array num elttype = @@ -1020,14 +1052,22 @@ "vRsraQ_n", shift_right_acc, su_8_64; (* Vector shift right and insert. *) + Vsri, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, + [P64]; Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, P8 :: P16 :: su_8_64; + Vsri, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, + [P64]; Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, P8 :: P16 :: su_8_64; (* Vector shift left and insert. *) + Vsli, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, + [P64]; Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, P8 :: P16 :: su_8_64; + Vsli, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, + [P64]; Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, P8 :: P16 :: su_8_64; @@ -1114,6 +1154,11 @@ (* Create vector from literal bit pattern. *) Vcreate, + [Requires_feature "CRYPTO"; No_op], (* Not really, but it can yield various things that are too + hard for the test generator at this time. *) + Use_operands [| Dreg; Corereg |], "vcreate", create_vector, + [P64]; + Vcreate, [No_op], (* Not really, but it can yield various things that are too hard for the test generator at this time. *) Use_operands [| Dreg; Corereg |], "vcreate", create_vector, @@ -1127,6 +1172,12 @@ Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, pf_su_8_32; Vdup_n, + [No_op; Requires_feature "CRYPTO"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, + [P64]; + Vdup_n, [No_op; Instruction_name ["vmov"]; Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], @@ -1133,6 +1184,13 @@ Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, [S64; U64]; Vdup_n, + [No_op; Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, + [P64]; + Vdup_n, [Disassembles_as [Use_operands [| Qreg; Alternatives [ Corereg; Element_of_dreg ] |]]], @@ -1185,6 +1243,9 @@ [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; Vdup_lane, + [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Unary_scalar Dreg, "vdup_lane", bits_2, [P64]; + Vdup_lane, [No_op; Const_valuator (fun _ -> 0)], Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; Vdup_lane, @@ -1191,15 +1252,24 @@ [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; Vdup_lane, + [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Unary_scalar Qreg, "vdupQ_lane", bits_2, [P64]; + Vdup_lane, [No_op; Const_valuator (fun _ -> 0)], Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; (* Combining vectors. *) + Vcombine, [Requires_feature "CRYPTO"; No_op], + Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, + [P64]; Vcombine, [No_op], Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, pf_su_8_64; (* Splitting vectors. *) + Vget_high, [Requires_feature "CRYPTO"; No_op], + Use_operands [| Dreg; Qreg |], "vget_high", + notype_1, [P64]; Vget_high, [No_op], Use_operands [| Dreg; Qreg |], "vget_high", notype_1, pf_su_8_64; @@ -1208,8 +1278,11 @@ Fixed_vector_reg], Use_operands [| Dreg; Qreg |], "vget_low", notype_1, pf_su_8_32; - Vget_low, [No_op], + Vget_low, [Requires_feature "CRYPTO"; No_op], Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, [P64]; + Vget_low, [No_op], + Use_operands [| Dreg; Qreg |], "vget_low", notype_1, [S64; U64]; (* Conversions. *) @@ -1217,6 +1290,10 @@ [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)]; Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, @@ -1387,9 +1464,15 @@ [S16; S32]; (* Vector extract. *) + Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, + [P64]; Vext, [Const_valuator (fun _ -> 0)], Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, pf_su_8_64; + Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, + [P64]; Vext, [Const_valuator (fun _ -> 0)], Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, pf_su_8_64; @@ -1410,11 +1493,21 @@ (* Bit selection. *) Vbsl, + [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, + [P64]; + Vbsl, [Instruction_name ["vbsl"; "vbit"; "vbif"]; Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, pf_su_8_64; Vbsl, + [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], + Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, + [P64]; + Vbsl, [Instruction_name ["vbsl"; "vbit"; "vbif"]; Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, @@ -1436,10 +1529,21 @@ (* Element/structure loads. VLD1 variants. *) Vldx 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, + [P64]; + Vldx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]], Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, pf_su_8_64; + Vldx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, + [P64]; Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |]]], Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, @@ -1451,6 +1555,13 @@ Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], "vld1_lane", bits_3, pf_su_8_32; Vldx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, [P64]; + Vldx_lane 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]; Const_valuator (fun _ -> 0)], @@ -1462,6 +1573,12 @@ Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], "vld1Q_lane", bits_3, pf_su_8_32; Vldx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, [P64]; + Vldx_lane 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]], Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], @@ -1473,6 +1590,12 @@ Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", bits_1, pf_su_8_32; Vldx_dup 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, [P64]; + Vldx_dup 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]], Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", @@ -1485,6 +1608,12 @@ (* Treated identically to vld1_dup above as we now do a single load followed by a duplicate. *) Vldx_dup 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, [P64]; + Vldx_dup 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]], Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", @@ -1491,10 +1620,20 @@ bits_1, [S64; U64]; (* VST1 variants. *) + Vstx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg |], "vst1", + store_1, [P64]; Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); PtrTo Corereg |]]], Use_operands [| PtrTo Corereg; Dreg |], "vst1", store_1, pf_su_8_64; + Vstx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", + store_1, [P64]; Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); PtrTo Corereg |]]], Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", @@ -1506,6 +1645,13 @@ Use_operands [| PtrTo Corereg; Dreg; Immed |], "vst1_lane", store_3, pf_su_8_32; Vstx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, [P64]; + Vstx_lane 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]; Const_valuator (fun _ -> 0)], @@ -1517,6 +1663,12 @@ Use_operands [| PtrTo Corereg; Qreg; Immed |], "vst1Q_lane", store_3, pf_su_8_32; Vstx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, [P64]; + Vstx_lane 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); CstPtrTo Corereg |]]], Use_operands [| PtrTo Corereg; Qreg; Immed |], @@ -1525,6 +1677,9 @@ (* VLD2 variants. *) Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], "vld2", bits_1, pf_su_8_32; + Vldx 2, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, [P64]; Vldx 2, [Instruction_name ["vld1"]], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], "vld2", bits_1, [S64; U64]; @@ -1556,6 +1711,12 @@ Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], "vld2_dup", bits_1, pf_su_8_32; Vldx_dup 2, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (2, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, [P64]; + Vldx_dup 2, [Instruction_name ["vld1"]; Disassembles_as [Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |]]], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], @@ -1566,6 +1727,12 @@ PtrTo Corereg |]]], Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", store_1, pf_su_8_32; + Vstx 2, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, [P64]; Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); PtrTo Corereg |]]; Instruction_name ["vst1"]], @@ -1594,6 +1761,9 @@ (* VLD3 variants. *) Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], "vld3", bits_1, pf_su_8_32; + Vldx 3, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, [P64]; Vldx 3, [Instruction_name ["vld1"]], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], "vld3", bits_1, [S64; U64]; @@ -1625,6 +1795,12 @@ Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], "vld3_dup", bits_1, pf_su_8_32; Vldx_dup 3, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (3, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, [P64]; + Vldx_dup 3, [Instruction_name ["vld1"]; Disassembles_as [Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |]]], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], @@ -1635,6 +1811,12 @@ PtrTo Corereg |]]], Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", store_1, pf_su_8_32; + Vstx 3, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, [P64]; Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); PtrTo Corereg |]]; Instruction_name ["vst1"]], @@ -1663,6 +1845,9 @@ (* VLD4/VST4 variants. *) Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], "vld4", bits_1, pf_su_8_32; + Vldx 4, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, [P64]; Vldx 4, [Instruction_name ["vld1"]], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], "vld4", bits_1, [S64; U64]; @@ -1694,6 +1879,12 @@ Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], "vld4_dup", bits_1, pf_su_8_32; Vldx_dup 4, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (4, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, [P64]; + Vldx_dup 4, [Instruction_name ["vld1"]; Disassembles_as [Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |]]], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], @@ -1703,6 +1894,12 @@ PtrTo Corereg |]]], Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", store_1, pf_su_8_32; + Vstx 4, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, [P64]; Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); PtrTo Corereg |]]; Instruction_name ["vst1"]], @@ -1754,27 +1951,33 @@ Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; ] +let type_in_crypto_only t + = (t == P64) or (t == P128) + +let cross_product s1 s2 + = List.filter (fun (e, e') -> e <> e') + (List.concat (List.map (fun e1 -> List.map (fun e2 -> (e1,e2)) s1) s2)) + let reinterp = - let elems = P8 :: P16 :: F32 :: su_8_64 in - List.fold_right - (fun convto acc -> - let types = List.fold_right - (fun convfrom acc -> - if convfrom <> convto then - Cast (convto, convfrom) :: acc - else - acc) - elems - [] - in - let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |], - "vreinterpret", conv_1, types - and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |], - "vreinterpretQ", conv_1, types in - dconv :: qconv :: acc) - elems - [] + let elems = P8 :: P16 :: F32 :: P64 :: su_8_64 in + let casts = cross_product elems elems in + List.map + (fun (convto, convfrom) -> + Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom) + then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |], + "vreinterpret", conv_1, [Cast (convto, convfrom)]) + casts +let reinterpq = + let elems = P8 :: P16 :: F32 :: P64 :: P128 :: su_8_64 in + let casts = cross_product elems elems in + List.map + (fun (convto, convfrom) -> + Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom) + then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |], + "vreinterpretQ", conv_1, [Cast (convto, convfrom)]) + casts + (* Output routines. *) let rec string_of_elt = function @@ -1782,7 +1985,8 @@ | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" - | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" + | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" + | P64 -> "p64" | P128 -> "p128" | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | NoElts -> failwith "No elts" @@ -1809,6 +2013,7 @@ | T_uint32x4 -> affix "uint32x4" | T_uint64x1 -> affix "uint64x1" | T_uint64x2 -> affix "uint64x2" + | T_float16x4 -> affix "float16x4" | T_float32x2 -> affix "float32x2" | T_float32x4 -> affix "float32x4" | T_poly8x8 -> affix "poly8x8" @@ -1825,6 +2030,11 @@ | T_uint64 -> affix "uint64" | T_poly8 -> affix "poly8" | T_poly16 -> affix "poly16" + | T_poly64 -> affix "poly64" + | T_poly64x1 -> affix "poly64x1" + | T_poly64x2 -> affix "poly64x2" + | T_poly128 -> affix "poly128" + | T_float16 -> affix "float16" | T_float32 -> affix "float32" | T_immediate _ -> "const int" | T_void -> "void" @@ -1832,6 +2042,8 @@ | T_intHI -> "__builtin_neon_hi" | T_intSI -> "__builtin_neon_si" | T_intDI -> "__builtin_neon_di" + | T_intTI -> "__builtin_neon_ti" + | T_floatHF -> "__builtin_neon_hf" | T_floatSF -> "__builtin_neon_sf" | T_arrayof (num, base) -> let basename = name (fun x -> x) base in @@ -1853,10 +2065,10 @@ | B_XImode -> "__builtin_neon_xi" let string_of_mode = function - V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" - | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" - | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" - | SF -> "sf" + V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si" + | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" + | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" + | HI -> "hi" | SI -> "si" | SF -> "sf" | TI -> "ti" (* Use uppercase chars for letters which form part of the intrinsic name, but should be omitted from the builtin name (the info is passed in an extra @@ -1963,3 +2175,181 @@ | _ -> assert false with Not_found -> [f shape] +(* The crypto intrinsics have unconventional shapes and are not that + numerous to be worth the trouble of encoding here. We implement them + explicitly here. *) +let crypto_intrinsics = +" +#ifdef __ARM_FEATURE_CRYPTO + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vldrq_p128 (poly128_t const * __ptr) +{ +#ifdef __ARM_BIG_ENDIAN + poly64_t* __ptmp = (poly64_t*) __ptr; + poly64_t __d0 = vld1_p64 (__ptmp); + poly64_t __d1 = vld1_p64 (__ptmp + 1); + return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); +#else + return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); +#endif +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vstrq_p128 (poly128_t * __ptr, poly128_t __val) +{ +#ifdef __ARM_BIG_ENDIAN + poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); + poly64_t __d0 = vget_high_p64 (__tmp); + poly64_t __d1 = vget_low_p64 (__tmp); + vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); +#else + vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); +#endif +} + +/* The vceq_p64 intrinsic does not map to a single instruction. + Instead we emulate it by performing a 32-bit variant of the vceq + and applying a pairwise min reduction to the result. + vceq_u32 will produce two 32-bit halves, each of which will contain either + all ones or all zeros depending on whether the corresponding 32-bit + halves of the poly64_t were equal. The whole poly64_t values are equal + if and only if both halves are equal, i.e. vceq_u32 returns all ones. + If the result is all zeroes for any half then the whole result is zeroes. + This is what the pairwise min reduction achieves. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vceq_u32 (__t_a, __t_b); + uint32x2_t __m = vpmin_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +/* The vtst_p64 intrinsic does not map to a single instruction. + We emulate it in way similar to vceq_p64 above but here we do + a reduction with max since if any two corresponding bits + in the two poly64_t's match, then the whole result must be all ones. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vtst_u32 (__t_a, __t_b); + uint32x2_t __m = vpmax_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aese (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aesd (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesmcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesmc (__data); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesimcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesimc (__data); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsha1h_u32 (uint32_t __hash_e) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + __t = __builtin_arm_crypto_sha1h (__t); + return vgetq_lane_u32 (__t, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) +{ + return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) +{ + return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_p64 (poly64_t __a, poly64_t __b) +{ + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) +{ + poly64_t __t1 = vget_high_p64 (__a); + poly64_t __t2 = vget_high_p64 (__b); + + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); +} + +#endif +" --- a/src/gcc/config/arm/constraints.md +++ b/src/gcc/config/arm/constraints.md @@ -21,7 +21,7 @@ ;; The following register constraints have been used: ;; - in ARM/Thumb-2 state: t, w, x, y, z ;; - in Thumb state: h, b -;; - in both states: l, c, k +;; - in both states: l, c, k, q, US ;; In ARM state, 'l' is an alias for 'r' ;; 'f' and 'v' were previously used for FPA and MAVERICK registers. @@ -86,6 +86,9 @@ (define_register_constraint "k" "STACK_REG" "@internal The stack register.") +(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS" + "@internal In ARM state with LDRD support, core registers, otherwise general registers.") + (define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS" "@internal Thumb only. The union of the low registers and the stack register.") @@ -93,6 +96,9 @@ (define_register_constraint "c" "CC_REG" "@internal The condition code register.") +(define_register_constraint "Cs" "CALLER_SAVE_REGS" + "@internal The caller save registers. Useful for sibcalls.") + (define_constraint "I" "In ARM/Thumb-2 state a constant that can be used as an immediate value in a Data Processing instruction. In Thumb-1 state a constant in the range @@ -164,9 +170,9 @@ && ival > 1020 && ival <= 1275"))) (define_constraint "Pd" - "@internal In Thumb-1 state a constant in the range 0 to 7" + "@internal In Thumb state a constant in the range 0 to 7" (and (match_code "const_int") - (match_test "TARGET_THUMB1 && ival >= 0 && ival <= 7"))) + (match_test "TARGET_THUMB && ival >= 0 && ival <= 7"))) (define_constraint "Pe" "@internal In Thumb-1 state a constant in the range 256 to +510" @@ -208,6 +214,11 @@ (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255"))) +(define_constraint "Pz" + "@internal In Thumb-2 state the constant 0" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && (ival == 0)"))) + (define_constraint "G" "In ARM/Thumb-2 state the floating-point constant 0." (and (match_code "const_double") @@ -248,6 +259,24 @@ (and (match_code "const_int") (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)"))) +(define_constraint "De" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn anddi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)"))) + +(define_constraint "Df" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn iordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)"))) + +(define_constraint "Dg" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn xordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)"))) + (define_constraint "Di" "@internal In ARM/Thumb-2 state a const_int or const_double where both the high @@ -305,6 +334,9 @@ (and (match_code "const_double") (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) +(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" + "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.") + (define_memory_constraint "Ua" "@internal An address valid for loading/storing register exclusive" @@ -385,6 +417,12 @@ 0) && GET_CODE (XEXP (op, 0)) != POST_INC"))) +(define_constraint "US" + "@internal + US is a symbol reference." + (match_code "symbol_ref") +) + ;; We used to have constraint letters for S and R in ARM state, but ;; all uses of these now appear to have been removed. @@ -391,3 +429,4 @@ ;; Additionally, we used to have a Q constraint in Thumb state, but ;; this wasn't really a valid memory constraint. Again, all uses of ;; this now seem to have been removed. + --- a/src/gcc/config/arm/cortex-a7.md +++ b/src/gcc/config/arm/cortex-a7.md @@ -88,9 +88,9 @@ ;; ALU instruction with an immediate operand can dual-issue. (define_insn_reservation "cortex_a7_alu_imm" 2 (and (eq_attr "tune" "cortexa7") - (and (ior (eq_attr "type" "simple_alu_imm") - (ior (eq_attr "type" "simple_alu_shift") - (and (eq_attr "insn" "mov") + (and (ior (eq_attr "type" "arlo_imm,mov_imm,mvn_imm") + (ior (eq_attr "type" "extend") + (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg") (not (eq_attr "length" "8"))))) (eq_attr "neon_type" "none"))) "cortex_a7_ex2|cortex_a7_ex1") @@ -99,13 +99,15 @@ ;; with a younger immediate-based instruction. (define_insn_reservation "cortex_a7_alu_reg" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "alu_reg") + (and (eq_attr "type" "arlo_reg,shift,shift_reg,mov_reg,mvn_reg") (eq_attr "neon_type" "none"))) "cortex_a7_ex1") (define_insn_reservation "cortex_a7_alu_shift" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "alu_shift,alu_shift_reg") + (and (eq_attr "type" "arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg") (eq_attr "neon_type" "none"))) "cortex_a7_ex1") @@ -127,8 +129,9 @@ (define_insn_reservation "cortex_a7_mul" 2 (and (eq_attr "tune" "cortexa7") - (and (eq_attr "type" "mult") - (eq_attr "neon_type" "none"))) + (and (eq_attr "neon_type" "none") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "cortex_a7_both") ;; Forward the result of a multiply operation to the accumulator @@ -140,7 +143,7 @@ ;; The latency depends on the operands, so we use an estimate here. (define_insn_reservation "cortex_a7_idiv" 5 (and (eq_attr "tune" "cortexa7") - (eq_attr "insn" "udiv,sdiv")) + (eq_attr "type" "udiv,sdiv")) "cortex_a7_both*5") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/arm-arches.def +++ b/src/gcc/config/arm/arm-arches.def @@ -53,6 +53,7 @@ ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R) ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M) ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM) -ARM_ARCH("armv8-a", cortexa15, 8A, FL_CO_PROC | FL_FOR_ARCH8A) +ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A) +ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A) ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT) ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2) --- a/src/gcc/config/arm/t-arm +++ b/src/gcc/config/arm/t-arm @@ -39,6 +39,7 @@ $(srcdir)/config/arm/cortex-a8-neon.md \ $(srcdir)/config/arm/cortex-a9.md \ $(srcdir)/config/arm/cortex-a9-neon.md \ + $(srcdir)/config/arm/cortex-a53.md \ $(srcdir)/config/arm/cortex-m4-fpu.md \ $(srcdir)/config/arm/cortex-m4.md \ $(srcdir)/config/arm/cortex-r4f.md \ @@ -52,6 +53,7 @@ $(srcdir)/config/arm/iwmmxt.md \ $(srcdir)/config/arm/iwmmxt2.md \ $(srcdir)/config/arm/ldmstm.md \ + $(srcdir)/config/arm/ldrdstrd.md \ $(srcdir)/config/arm/marvell-f-iwmmxt.md \ $(srcdir)/config/arm/neon.md \ $(srcdir)/config/arm/predicates.md \ @@ -84,7 +86,8 @@ $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \ - $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def + $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \ + $(srcdir)/config/arm/arm_neon_builtins.def arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) --- a/src/gcc/config/arm/arm.opt +++ b/src/gcc/config/arm/arm.opt @@ -239,6 +239,10 @@ Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) Only generate absolute relocations on word sized values. +mrestrict-it +Target Report Var(arm_restrict_it) Init(2) +Generate IT blocks appropriate for ARMv8. + mfix-cortex-m3-ldrd Target Report Var(fix_cm3_ldrd) Init(2) Avoid overlapping destination and address registers on LDRD instructions @@ -247,3 +251,7 @@ munaligned-access Target Report Var(unaligned_access) Init(2) Enable unaligned word and halfword accesses to packed data. + +mneon-for-64bits +Target Report RejectNegative Var(use_neon_for_64bits) Init(0) +Use Neon to perform 64-bits operations rather than core registers. --- a/src/gcc/config/arm/arm926ejs.md +++ b/src/gcc/config/arm/arm926ejs.md @@ -58,7 +58,9 @@ ;; ALU operations with no shifted operand (define_insn_reservation "9_alu_op" 1 (and (eq_attr "tune" "arm926ejs") - (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,extend,arlo_shift,\ + mov_imm,mov_reg,mov_shift,\ + mvn_imm,mvn_reg,mvn_shift")) "e,m,w") ;; ALU operations with a shift-by-register operand @@ -67,7 +69,7 @@ ;; the execute stage. (define_insn_reservation "9_alu_shift_reg_op" 2 (and (eq_attr "tune" "arm926ejs") - (eq_attr "type" "alu_shift_reg")) + (eq_attr "type" "arlo_shift_reg,mov_shift_reg,mvn_shift_reg")) "e*2,m,w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -81,32 +83,32 @@ (define_insn_reservation "9_mult1" 3 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smlalxy,mul,mla")) + (eq_attr "type" "smlalxy,mul,mla")) "e*2,m,w") (define_insn_reservation "9_mult2" 4 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "muls,mlas")) + (eq_attr "type" "muls,mlas")) "e*3,m,w") (define_insn_reservation "9_mult3" 4 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "umull,umlal,smull,smlal")) + (eq_attr "type" "umull,umlal,smull,smlal")) "e*3,m,w") (define_insn_reservation "9_mult4" 5 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "umulls,umlals,smulls,smlals")) + (eq_attr "type" "umulls,umlals,smulls,smlals")) "e*4,m,w") (define_insn_reservation "9_mult5" 2 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smulxy,smlaxy,smlawx")) + (eq_attr "type" "smulxy,smlaxy,smlawx")) "e,m,w") (define_insn_reservation "9_mult6" 3 (and (eq_attr "tune" "arm926ejs") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "e*2,m,w") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/ldrdstrd.md +++ b/src/gcc/config/arm/ldrdstrd.md @@ -0,0 +1,260 @@ +;; ARM ldrd/strd peephole optimizations. +;; +;; Copyright (C) 2013 Free Software Foundation, Inc. +;; +;; Written by Greta Yorsh + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The following peephole optimizations identify consecutive memory +;; accesses, and try to rearrange the operands to enable generation of +;; ldrd/strd. + +(define_peephole2 ; ldrd + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, true, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 0) (match_dup 2))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "arm_general_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "arm_general_register_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 2) (match_dup 0))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following peepholes reorder registers to enable LDRD/STRD. +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following two peephole optimizations are only relevant for ARM +;; mode where LDRD/STRD require consecutive registers. + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ]))] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation that sets the flags. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ])) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (parallel + [(set (match_dup 4) + (match_op_dup 5 [(match_dup 6) (match_dup 7)])) + (clobber (reg:CC CC_REGNUM))])] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +;; TODO: Handle LDRD/STRD with writeback: +;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY +;; (b) Patterns may be followed by an update of the base address. --- a/src/gcc/config/arm/predicates.md +++ b/src/gcc/config/arm/predicates.md @@ -31,6 +31,28 @@ || REGNO_REG_CLASS (REGNO (op)) != NO_REGS)); }) +(define_predicate "imm_for_neon_inv_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL)); +}) + +(define_predicate "neon_inv_logic_op2" + (ior (match_operand 0 "imm_for_neon_inv_logic_operand") + (match_operand 0 "s_register_operand"))) + +(define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); +}) + +(define_predicate "neon_logic_op2" + (ior (match_operand 0 "imm_for_neon_logic_operand") + (match_operand 0 "s_register_operand"))) + ;; Any general register. (define_predicate "arm_hard_general_register_operand" (match_code "reg") @@ -151,6 +173,23 @@ (ior (match_operand 0 "arm_rhs_operand") (match_operand 0 "arm_neg_immediate_operand"))) +(define_predicate "arm_anddi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), AND)")) + (match_operand 0 "neon_inv_logic_op2"))) + +(define_predicate "arm_iordi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)")) + (match_operand 0 "neon_logic_op2"))) + +(define_predicate "arm_xordi_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)")))) + (define_predicate "arm_adddi_operand" (ior (match_operand 0 "s_register_operand") (and (match_code "const_int") @@ -213,6 +252,10 @@ (and (match_code "plus,minus,ior,xor,and") (match_test "mode == GET_MODE (op)"))) +(define_special_predicate "shiftable_operator_strict_it" + (and (match_code "plus,and") + (match_test "mode == GET_MODE (op)"))) + ;; True for logical binary operators. (define_special_predicate "logical_binary_operator" (and (match_code "ior,xor,and") @@ -276,6 +319,24 @@ (define_special_predicate "lt_ge_comparison_operator" (match_code "lt,ge")) +;; The vsel instruction only accepts the ARM condition codes listed below. +(define_special_predicate "arm_vsel_comparison_operator" + (and (match_operand 0 "expandable_comparison_operator") + (match_test "maybe_get_arm_condition_code (op) == ARM_GE + || maybe_get_arm_condition_code (op) == ARM_GT + || maybe_get_arm_condition_code (op) == ARM_EQ + || maybe_get_arm_condition_code (op) == ARM_VS + || maybe_get_arm_condition_code (op) == ARM_LT + || maybe_get_arm_condition_code (op) == ARM_LE + || maybe_get_arm_condition_code (op) == ARM_NE + || maybe_get_arm_condition_code (op) == ARM_VC"))) + +(define_special_predicate "arm_cond_move_operator" + (if_then_else (match_test "arm_restrict_it") + (and (match_test "TARGET_FPU_ARMV8") + (match_operand 0 "arm_vsel_comparison_operator")) + (match_operand 0 "expandable_comparison_operator"))) + (define_special_predicate "noov_comparison_operator" (match_code "lt,ge,eq,ne")) @@ -512,28 +573,6 @@ (ior (match_operand 0 "s_register_operand") (match_operand 0 "imm_for_neon_rshift_operand"))) -(define_predicate "imm_for_neon_logic_operand" - (match_code "const_vector") -{ - return (TARGET_NEON - && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); -}) - -(define_predicate "imm_for_neon_inv_logic_operand" - (match_code "const_vector") -{ - return (TARGET_NEON - && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL)); -}) - -(define_predicate "neon_logic_op2" - (ior (match_operand 0 "imm_for_neon_logic_operand") - (match_operand 0 "s_register_operand"))) - -(define_predicate "neon_inv_logic_op2" - (ior (match_operand 0 "imm_for_neon_inv_logic_operand") - (match_operand 0 "s_register_operand"))) - ;; Predicates for named expanders that overlap multiple ISAs. (define_predicate "cmpdi_operand" @@ -623,3 +662,7 @@ (define_predicate "mem_noofs_operand" (and (match_code "mem") (match_code "reg" "0"))) + +(define_predicate "call_insn_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "s_register_operand"))) --- a/src/gcc/config/arm/arm_neon.h +++ b/src/gcc/config/arm/arm_neon.h @@ -42,9 +42,13 @@ typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_di int64x1_t; +typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8))); +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64x1_t; +#endif typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_uhi uint16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_usi uint32x2_t __attribute__ ((__vector_size__ (8))); @@ -56,6 +60,9 @@ typedef __builtin_neon_sf float32x4_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_poly8 poly8x16_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_poly16 poly16x8_t __attribute__ ((__vector_size__ (16))); +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16))); +#endif typedef __builtin_neon_uqi uint8x16_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16))); typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); @@ -64,6 +71,10 @@ typedef float float32_t; typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64_t; +typedef __builtin_neon_poly128 poly128_t; +#endif typedef struct int8x8x2_t { @@ -175,6 +186,22 @@ poly16x8_t val[2]; } poly16x8x2_t; +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x2_t +{ + poly64x1_t val[2]; +} poly64x1x2_t; +#endif + + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x2_t +{ + poly64x2_t val[2]; +} poly64x2x2_t; +#endif + + typedef struct int8x8x3_t { int8x8_t val[3]; @@ -285,6 +312,22 @@ poly16x8_t val[3]; } poly16x8x3_t; +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x3_t +{ + poly64x1_t val[3]; +} poly64x1x3_t; +#endif + + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x3_t +{ + poly64x2_t val[3]; +} poly64x2x3_t; +#endif + + typedef struct int8x8x4_t { int8x8_t val[4]; @@ -395,7 +438,23 @@ poly16x8_t val[4]; } poly16x8x4_t; +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x4_t +{ + poly64x1_t val[4]; +} poly64x1x4_t; +#endif + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x4_t +{ + poly64x2_t val[4]; +} poly64x2x4_t; +#endif + + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { @@ -4360,6 +4419,14 @@ return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 4); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -4420,6 +4487,14 @@ return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -4480,6 +4555,14 @@ return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -4540,6 +4623,14 @@ return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -5308,6 +5399,14 @@ return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vcreate_p64 (uint64_t __a) +{ + return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vcreate_s8 (uint64_t __a) { @@ -5428,6 +5527,14 @@ return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vdup_n_p64 (poly64_t __a) +{ + return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +#endif __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdup_n_s64 (int64_t __a) { @@ -5440,6 +5547,14 @@ return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vdupq_n_p64 (poly64_t __a) +{ + return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vdupq_n_s8 (int8_t __a) { @@ -5692,6 +5807,14 @@ return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vdup_lane_p64 (poly64x1_t __a, const int __b) +{ + return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b); +} + +#endif __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vdup_lane_s64 (int64x1_t __a, const int __b) { @@ -5758,6 +5881,14 @@ return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_p64 (poly64x1_t __a, const int __b) +{ + return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); +} + +#endif __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vdupq_lane_s64 (int64x1_t __a, const int __b) { @@ -5770,6 +5901,14 @@ return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vcombine_p64 (poly64x1_t __a, poly64x1_t __b) +{ + return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -5836,6 +5975,14 @@ return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vget_high_p64 (poly64x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vget_high_s8 (int8x16_t __a) { @@ -5956,6 +6103,14 @@ return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vget_low_p64 (poly64x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); +} + +#endif __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vget_low_s64 (int64x2_t __a) { @@ -6016,6 +6171,22 @@ return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0); } +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_f16_f32 (float32x4_t __a) +{ + return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); +} + +#endif +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_f32_f16 (float16x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); +} + +#endif __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vcvt_n_s32_f32 (float32x2_t __a, const int __b) { @@ -7024,6 +7195,14 @@ return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vext_s8 (int8x8_t __a, int8x8_t __b, const int __c) { @@ -7090,6 +7269,14 @@ return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c) { @@ -7372,6 +7559,14 @@ return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) +{ + return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) { @@ -7438,6 +7633,14 @@ return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) +{ + return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) { @@ -7990,6 +8193,14 @@ return __rv; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_p64 (const poly64_t * __a) +{ + return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +#endif __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vld1_s8 (const int8_t * __a) { @@ -8056,6 +8267,14 @@ return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_p64 (const poly64_t * __a) +{ + return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +#endif __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vld1q_s8 (const int8_t * __a) { @@ -8176,6 +8395,14 @@ return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); +} + +#endif __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c) { @@ -8242,6 +8469,14 @@ return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +#endif __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c) { @@ -8308,6 +8543,14 @@ return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_dup_p64 (const poly64_t * __a) +{ + return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +#endif __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_dup_s64 (const int64_t * __a) { @@ -8374,6 +8617,14 @@ return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_p64 (const poly64_t * __a) +{ + return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +#endif __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_dup_s64 (const int64_t * __a) { @@ -8386,7 +8637,15 @@ return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p64 (poly64_t * __a, poly64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1_s8 (int8_t * __a, int8x8_t __b) { __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b); @@ -8452,7 +8711,15 @@ __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p64 (poly64_t * __a, poly64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_s8 (int8_t * __a, int8x16_t __b) { __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b); @@ -8572,7 +8839,15 @@ __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c) { __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); @@ -8638,7 +8913,15 @@ __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c) { __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c); @@ -8722,6 +9005,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) +vld2_p64 (const poly64_t * __a) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) vld2_s64 (const int64_t * __a) { @@ -9017,6 +9310,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) vld2_dup_s64 (const int64_t * __a) { @@ -9096,7 +9399,16 @@ __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p64 (poly64_t * __a, poly64x1x2_t __b) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst2_s64 (int64_t * __a, int64x1x2_t __b) { union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; @@ -9350,6 +9662,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) +vld3_p64 (const poly64_t * __a) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) vld3_s64 (const int64_t * __a) { @@ -9645,6 +9967,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) vld3_dup_s64 (const int64_t * __a) { @@ -9724,7 +10056,16 @@ __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p64 (poly64_t * __a, poly64x1x3_t __b) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst3_s64 (int64_t * __a, int64x1x3_t __b) { union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; @@ -9978,6 +10319,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) +vld4_p64 (const poly64_t * __a) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) vld4_s64 (const int64_t * __a) { @@ -10273,6 +10624,16 @@ return __rv.__i; } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) vld4_dup_s64 (const int64_t * __a) { @@ -10352,7 +10713,16 @@ __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p64 (poly64_t * __a, poly64x1x4_t __b) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) vst4_s64 (int64_t * __a, int64x1x4_t __b) { union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; @@ -11016,23 +11386,25 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_s8 (int8x8_t __a) +vreinterpret_p8_p16 (poly16x4_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_s16 (int16x4_t __a) +vreinterpret_p8_f32 (float32x2_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_s32 (int32x2_t __a) +vreinterpret_p8_p64 (poly64x1_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } +#endif __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s64 (int64x1_t __a) { @@ -11040,101 +11412,79 @@ } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_f32 (float32x2_t __a) +vreinterpret_p8_u64 (uint64x1_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_u8 (uint8x8_t __a) +vreinterpret_p8_s8 (int8x8_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_u16 (uint16x4_t __a) +vreinterpret_p8_s16 (int16x4_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_u32 (uint32x2_t __a) +vreinterpret_p8_s32 (int32x2_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_u64 (uint64x1_t __a) +vreinterpret_p8_u8 (uint8x8_t __a) { - return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vreinterpret_p8_p16 (poly16x4_t __a) +vreinterpret_p8_u16 (uint16x4_t __a) { return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_s8 (int8x16_t __a) +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u32 (uint32x2_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_s16 (int16x8_t __a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p8 (poly8x8_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_s32 (int32x4_t __a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_f32 (float32x2_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_s64 (int64x2_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p64 (poly64x1_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_f32 (float32x4_t __a) +#endif +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s64 (int64x1_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_u8 (uint8x16_t __a) +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u64 (uint64x1_t __a) { - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); } -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_u16 (uint16x8_t __a) -{ - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_u32 (uint32x4_t __a) -{ - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_u64 (uint64x2_t __a) -{ - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_p8_p16 (poly16x8_t __a) -{ - return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); -} - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s8 (int8x8_t __a) { @@ -11154,18 +11504,6 @@ } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vreinterpret_p16_s64 (int64x1_t __a) -{ - return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vreinterpret_p16_f32 (float32x2_t __a) -{ - return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_u8 (uint8x8_t __a) { return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); @@ -11183,78 +11521,38 @@ return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); } -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vreinterpret_p16_u64 (uint64x1_t __a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p8 (poly8x8_t __a) { - return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); } -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vreinterpret_p16_p8 (poly8x8_t __a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p16 (poly16x4_t __a) { - return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_s8 (int8x16_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p64 (poly64x1_t __a) { - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_s16 (int16x8_t __a) +#endif +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s64 (int64x1_t __a) { - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_s32 (int32x4_t __a) +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u64 (uint64x1_t __a) { - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a); } -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_s64 (int64x2_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_f32 (float32x4_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_u8 (uint8x16_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_u16 (uint16x8_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_u32 (uint32x4_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_u64 (uint64x2_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_p16_p8 (poly8x16_t __a) -{ - return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); -} - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s8 (int8x8_t __a) { @@ -11274,12 +11572,6 @@ } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vreinterpret_f32_s64 (int64x1_t __a) -{ - return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_u8 (uint8x8_t __a) { return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); @@ -11297,85 +11589,127 @@ return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a); } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vreinterpret_f32_u64 (uint64x1_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_p8 (poly8x8_t __a) { - return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vreinterpret_f32_p8 (poly8x8_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_p16 (poly16x4_t __a) { - return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vreinterpret_f32_p16 (poly16x4_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_f32 (float32x2_t __a) { - return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_s8 (int8x16_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s64 (int64x1_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a); + return (poly64x1_t)__builtin_neon_vreinterpretdidi (__a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_s16 (int16x8_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u64 (uint64x1_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a); + return (poly64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_s32 (int32x4_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s8 (int8x8_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_s64 (int64x2_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s16 (int16x4_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_u8 (uint8x16_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s32 (int32x2_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv2si (__a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_u16 (uint16x8_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u8 (uint8x8_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_u32 (uint32x4_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u16 (uint16x4_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_u64 (uint64x2_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u32 (uint32x2_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); + return (poly64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_p8 (poly8x16_t __a) +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p8 (poly8x8_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_f32_p16 (poly16x8_t __a) +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p16 (poly16x4_t __a) { - return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_f32 (float32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p64 (poly64x1_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u64 (uint64x1_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_s8 (int8x8_t __a) { return (int64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); @@ -11394,12 +11728,6 @@ } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vreinterpret_s64_f32 (float32x2_t __a) -{ - return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u8 (uint8x8_t __a) { return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); @@ -11417,552 +11745,1206 @@ return (int64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vreinterpret_s64_u64 (uint64x1_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p8 (poly8x8_t __a) { - return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vreinterpret_s64_p8 (poly8x8_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p16 (poly16x4_t __a) { - return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vreinterpret_s64_p16 (poly16x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_f32 (float32x2_t __a) { - return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_s8 (int8x16_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p64 (poly64x1_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); + return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_s16 (int16x8_t __a) +#endif +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s64 (int64x1_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); + return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_s32 (int32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s8 (int8x8_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_f32 (float32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s16 (int16x4_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_u8 (uint8x16_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s32 (int32x2_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_u16 (uint16x8_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u8 (uint8x8_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_u32 (uint32x4_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u16 (uint16x4_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_u64 (uint64x2_t __a) +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u32 (uint32x2_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_p8 (poly8x16_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p8 (poly8x8_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); } -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_s64_p16 (poly16x8_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p16 (poly16x4_t __a) { - return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_s8 (int8x8_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_f32 (float32x2_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_s16 (int16x4_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p64 (poly64x1_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); + return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_s32 (int32x2_t __a) +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s64 (int64x1_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a); + return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_s64 (int64x1_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u64 (uint64x1_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); + return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_f32 (float32x2_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s16 (int16x4_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_u8 (uint8x8_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s32 (int32x2_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_u16 (uint16x4_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u8 (uint8x8_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_u32 (uint32x2_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u16 (uint16x4_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_p8 (poly8x8_t __a) +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u32 (uint32x2_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); } -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vreinterpret_u64_p16 (poly16x4_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p8 (poly8x8_t __a) { - return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_s8 (int8x16_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p16 (poly16x4_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_s16 (int16x8_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_f32 (float32x2_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_s32 (int32x4_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p64 (poly64x1_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a); + return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_s64 (int64x2_t __a) +#endif +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s64 (int64x1_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a); + return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_f32 (float32x4_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u64 (uint64x1_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); + return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_u8 (uint8x16_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s8 (int8x8_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_u16 (uint16x8_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s32 (int32x2_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_u32 (uint32x4_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u8 (uint8x8_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_p8 (poly8x16_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u16 (uint16x4_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); } -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vreinterpretq_u64_p16 (poly16x8_t __a) +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u32 (uint32x2_t __a) { - return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_s16 (int16x4_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p8 (poly8x8_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_s32 (int32x2_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p16 (poly16x4_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_s64 (int64x1_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_f32 (float32x2_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_f32 (float32x2_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p64 (poly64x1_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); + return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_u8 (uint8x8_t __a) +#endif +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s64 (int64x1_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_u16 (uint16x4_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u64 (uint64x1_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_u32 (uint32x2_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s8 (int8x8_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_u64 (uint64x1_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s16 (int16x4_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_p8 (poly8x8_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u8 (uint8x8_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vreinterpret_s8_p16 (poly16x4_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u16 (uint16x4_t __a) { - return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_s16 (int16x8_t __a) +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u32 (uint32x2_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); + return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_s32 (int32x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p8 (poly8x8_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_s64 (int64x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p16 (poly16x4_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_f32 (float32x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_f32 (float32x2_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_u8 (uint8x16_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p64 (poly64x1_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_u16 (uint16x8_t __a) +#endif +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s64 (int64x1_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_u32 (uint32x4_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u64 (uint64x1_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_u64 (uint64x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s8 (int8x8_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_p8 (poly8x16_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s16 (int16x4_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); } -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_s8_p16 (poly16x8_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s32 (int32x2_t __a) { - return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_s8 (int8x8_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u16 (uint16x4_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_s32 (int32x2_t __a) +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u32 (uint32x2_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_s64 (int64x1_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p8 (poly8x8_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_f32 (float32x2_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p16 (poly16x4_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_u8 (uint8x8_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_f32 (float32x2_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_u16 (uint16x4_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p64 (poly64x1_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_u32 (uint32x2_t __a) +#endif +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s64 (int64x1_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_u64 (uint64x1_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u64 (uint64x1_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_p8 (poly8x8_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s8 (int8x8_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); } -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vreinterpret_s16_p16 (poly16x4_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s16 (int16x4_t __a) { - return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_s8 (int8x16_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s32 (int32x2_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_s32 (int32x4_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u8 (uint8x8_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_s64 (int64x2_t __a) +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u32 (uint32x2_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_f32 (float32x4_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p8 (poly8x8_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_u8 (uint8x16_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p16 (poly16x4_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_u16 (uint16x8_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_f32 (float32x2_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_u32 (uint32x4_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p64 (poly64x1_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_u64 (uint64x2_t __a) +#endif +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s64 (int64x1_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_p8 (poly8x16_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u64 (uint64x1_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); } -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_s16_p16 (poly16x8_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s8 (int8x8_t __a) { - return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_s8 (int8x8_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s16 (int16x4_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_s16 (int16x4_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s32 (int32x2_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_s64 (int64x1_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u8 (uint8x8_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_f32 (float32x2_t __a) +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u16 (uint16x4_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_u8 (uint8x8_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p16 (poly16x8_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_u16 (uint16x4_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f32 (float32x4_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_u32 (uint32x2_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p64 (poly64x2_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_u64 (uint64x1_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p128 (poly128_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_p8 (poly8x8_t __a) +#endif +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s64 (int64x2_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); } -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vreinterpret_s32_p16 (poly16x4_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u64 (uint64x2_t __a) { - return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_s8 (int8x16_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s8 (int8x16_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_s16 (int16x8_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s16 (int16x8_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_s64 (int64x2_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s32 (int32x4_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_f32 (float32x4_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u8 (uint8x16_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_u8 (uint8x16_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u16 (uint16x8_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_u16 (uint16x8_t __a) +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u32 (uint32x4_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_u32 (uint32x4_t __a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p8 (poly8x16_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a); + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); } -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_s32_u64 (uint64x2_t __a) +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f32 (float32x4_t __a) { - return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); } +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p64 (poly64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p128 (poly128_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s64 (int64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u64 (uint64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s8 (int8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s16 (int16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s32 (int32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u8 (uint8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u16 (uint16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u32 (uint32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p8 (poly8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p16 (poly16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p64 (poly64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p128 (poly128_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s64 (int64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u64 (uint64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s8 (int8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s16 (int16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u8 (uint8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u16 (uint16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p8 (poly8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p16 (poly16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_f32 (float32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p128 (poly128_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s64 (int64x2_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div2di (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u64 (uint64x2_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s8 (int8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s16 (int16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s32 (int32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u8 (uint8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u16 (uint16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u32 (uint32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p8 (poly8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p16 (poly16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_f32 (float32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4sf (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p64 (poly64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s64 (int64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u64 (uint64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s8 (int8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s16 (int16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s32 (int32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4si (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u8 (uint8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u16 (uint16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u32 (uint32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4si ((int32x4_t) __a); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p8 (poly8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p16 (poly16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f32 (float32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p64 (poly64x2_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p128 (poly128_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u64 (uint64x2_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s8 (int8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s16 (int16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s32 (int32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u8 (uint8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u16 (uint16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u32 (uint32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p8 (poly8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p16 (poly16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f32 (float32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p64 (poly64x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p128 (poly128_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s64 (int64x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s8 (int8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s16 (int16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s32 (int32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u8 (uint8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u16 (uint16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u32 (uint32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p8 (poly8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p16 (poly16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f32 (float32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p64 (poly64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p128 (poly128_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s64 (int64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u64 (uint64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s16 (int16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s32 (int32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u8 (uint8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u16 (uint16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u32 (uint32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p8 (poly8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p16 (poly16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f32 (float32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p64 (poly64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p128 (poly128_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s64 (int64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u64 (uint64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s8 (int8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s32 (int32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u8 (uint8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u16 (uint16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u32 (uint32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_p8 (poly8x16_t __a) { @@ -11975,109 +12957,111 @@ return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_s8 (int8x8_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f32 (float32x4_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_s16 (int16x4_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p64 (poly64x2_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_s32 (int32x2_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p128 (poly128_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); + return (int32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_s64 (int64x1_t __a) +#endif +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s64 (int64x2_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_f32 (float32x2_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u64 (uint64x2_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_u16 (uint16x4_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s8 (int8x16_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_u32 (uint32x2_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s16 (int16x8_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_u64 (uint64x1_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u8 (uint8x16_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_p8 (poly8x8_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u16 (uint16x8_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); } -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vreinterpret_u8_p16 (poly16x4_t __a) +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u32 (uint32x4_t __a) { - return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); + return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_s8 (int8x16_t __a) +vreinterpretq_u8_p8 (poly8x16_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_s16 (int16x8_t __a) +vreinterpretq_u8_p16 (poly16x8_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_s32 (int32x4_t __a) +vreinterpretq_u8_f32 (float32x4_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); } +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_s64 (int64x2_t __a) +vreinterpretq_u8_p64 (poly64x2_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); } +#endif +#ifdef __ARM_FEATURE_CRYPTO __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_f32 (float32x4_t __a) +vreinterpretq_u8_p128 (poly128_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); } +#endif __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_u16 (uint16x8_t __a) +vreinterpretq_u8_s64 (int64x2_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_u32 (uint32x4_t __a) -{ - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_u64 (uint64x2_t __a) { return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); @@ -12084,75 +13068,79 @@ } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_p8 (poly8x16_t __a) +vreinterpretq_u8_s8 (int8x16_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vreinterpretq_u8_p16 (poly16x8_t __a) +vreinterpretq_u8_s16 (int16x8_t __a) { - return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_s8 (int8x8_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s32 (int32x4_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_s16 (int16x4_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u16 (uint16x8_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_s32 (int32x2_t __a) +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u32 (uint32x4_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_s64 (int64x1_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p8 (poly8x16_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_f32 (float32x2_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p16 (poly16x8_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_u8 (uint8x8_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f32 (float32x4_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_u32 (uint32x2_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p64 (poly64x2_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_u64 (uint64x1_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p128 (poly128_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_p8 (poly8x8_t __a) +#endif +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s64 (int64x2_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); } -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vreinterpret_u16_p16 (poly16x4_t __a) +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u64 (uint64x2_t __a) { - return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) @@ -12174,167 +13162,266 @@ } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_s64 (int64x2_t __a) +vreinterpretq_u16_u8 (uint8x16_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_f32 (float32x4_t __a) +vreinterpretq_u16_u32 (uint32x4_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_u8 (uint8x16_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p8 (poly8x16_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_u32 (uint32x4_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p16 (poly16x8_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_u64 (uint64x2_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f32 (float32x4_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_p8 (poly8x16_t __a) +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p64 (poly64x2_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vreinterpretq_u16_p16 (poly16x8_t __a) +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p128 (poly128_t __a) { - return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_s8 (int8x8_t __a) +#endif +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s64 (int64x2_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_s16 (int16x4_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u64 (uint64x2_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_s32 (int32x2_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s8 (int8x16_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_s64 (int64x1_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s16 (int16x8_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_f32 (float32x2_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s32 (int32x4_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_u8 (uint8x8_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u8 (uint8x16_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_u16 (uint16x4_t __a) +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u16 (uint16x8_t __a) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_u64 (uint64x1_t __a) + +#ifdef __ARM_FEATURE_CRYPTO + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vldrq_p128 (poly128_t const * __ptr) { - return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); +#ifdef __ARM_BIG_ENDIAN + poly64_t* __ptmp = (poly64_t*) __ptr; + poly64_t __d0 = vld1_p64 (__ptmp); + poly64_t __d1 = vld1_p64 (__ptmp + 1); + return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); +#else + return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); +#endif } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_p8 (poly8x8_t __a) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vstrq_p128 (poly128_t * __ptr, poly128_t __val) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +#ifdef __ARM_BIG_ENDIAN + poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); + poly64_t __d0 = vget_high_p64 (__tmp); + poly64_t __d1 = vget_low_p64 (__tmp); + vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); +#else + vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); +#endif } -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vreinterpret_u32_p16 (poly16x4_t __a) +/* The vceq_p64 intrinsic does not map to a single instruction. + Instead we emulate it by performing a 32-bit variant of the vceq + and applying a pairwise min reduction to the result. + vceq_u32 will produce two 32-bit halves, each of which will contain either + all ones or all zeros depending on whether the corresponding 32-bit + halves of the poly64_t were equal. The whole poly64_t values are equal + if and only if both halves are equal, i.e. vceq_u32 returns all ones. + If the result is all zeroes for any half then the whole result is zeroes. + This is what the pairwise min reduction achieves. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_p64 (poly64x1_t __a, poly64x1_t __b) { - return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vceq_u32 (__t_a, __t_b); + uint32x2_t __m = vpmin_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); } -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_s8 (int8x16_t __a) +/* The vtst_p64 intrinsic does not map to a single instruction. + We emulate it in way similar to vceq_p64 above but here we do + a reduction with max since if any two corresponding bits + in the two poly64_t's match, then the whole result must be all ones. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_p64 (poly64x1_t __a, poly64x1_t __b) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vtst_u32 (__t_a, __t_b); + uint32x2_t __m = vpmax_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); } +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aese (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aesd (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesmcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesmc (__data); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesimcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesimc (__data); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsha1h_u32 (uint32_t __hash_e) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + __t = __builtin_arm_crypto_sha1h (__t); + return vgetq_lane_u32 (__t, 0); +} + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_s16 (int16x8_t __a) +vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_s32 (int32x4_t __a) +vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a); + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_s64 (int64x2_t __a) +vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_f32 (float32x4_t __a) +vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); + return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_u8 (uint8x16_t __a) +vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); + return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_u16 (uint16x8_t __a) +vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); + return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_u64 (uint64x2_t __a) +vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); + return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_p8 (poly8x16_t __a) +vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); + return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vreinterpretq_u32_p16 (poly16x8_t __a) +vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) { - return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); + return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); } +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_p64 (poly64_t __a, poly64_t __b) +{ + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) +{ + poly64_t __t1 = vget_high_p64 (__a); + poly64_t __t2 = vget_high_p64 (__b); + + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); +} + +#endif #ifdef __cplusplus } #endif --- a/src/gcc/config/arm/arm-ldmstm.ml +++ b/src/gcc/config/arm/arm-ldmstm.ml @@ -149,6 +149,8 @@ | IA, true, true -> true | _ -> false +exception InvalidAddrMode of string;; + let target addrmode thumb = match addrmode, thumb with IA, true -> "TARGET_THUMB1" @@ -155,6 +157,7 @@ | IA, false -> "TARGET_32BIT" | DB, false -> "TARGET_32BIT" | _, false -> "TARGET_ARM" + | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.") let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = let astr = string_of_addrmode addrmode in @@ -184,8 +187,10 @@ done; Printf.printf "}\"\n"; Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs; - begin if not thumb then + if not thumb then begin Printf.printf "\n (set_attr \"predicable\" \"yes\")"; + if addrmode == IA || addrmode == DB then + Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")"; end; Printf.printf "])\n\n" --- a/src/gcc/config/arm/iwmmxt.md +++ b/src/gcc/config/arm/iwmmxt.md @@ -33,7 +33,7 @@ "TARGET_REALLY_IWMMXT" "tbcstb%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tbcst")] + (set_attr "type" "wmmx_tbcst")] ) (define_insn "tbcstv4hi" @@ -42,7 +42,7 @@ "TARGET_REALLY_IWMMXT" "tbcsth%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tbcst")] + (set_attr "type" "wmmx_tbcst")] ) (define_insn "tbcstv2si" @@ -51,7 +51,7 @@ "TARGET_REALLY_IWMMXT" "tbcstw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tbcst")] + (set_attr "type" "wmmx_tbcst")] ) (define_insn "iwmmxt_iordi3" @@ -65,7 +65,7 @@ #" [(set_attr "predicable" "yes") (set_attr "length" "4,8,8") - (set_attr "wtype" "wor,none,none")] + (set_attr "type" "wmmx_wor,*,*")] ) (define_insn "iwmmxt_xordi3" @@ -79,7 +79,7 @@ #" [(set_attr "predicable" "yes") (set_attr "length" "4,8,8") - (set_attr "wtype" "wxor,none,none")] + (set_attr "type" "wmmx_wxor,*,*")] ) (define_insn "iwmmxt_anddi3" @@ -93,7 +93,7 @@ #" [(set_attr "predicable" "yes") (set_attr "length" "4,8,8") - (set_attr "wtype" "wand,none,none")] + (set_attr "type" "wmmx_wand,*,*")] ) (define_insn "iwmmxt_nanddi3" @@ -103,7 +103,7 @@ "TARGET_REALLY_IWMMXT" "wandn%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wandn")] + (set_attr "type" "wmmx_wandn")] ) (define_insn "*iwmmxt_arm_movdi" @@ -155,10 +155,9 @@ (const_int 8) (const_int 4))] (const_int 4))) - (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "type" "*,*,*,load2,store2,wmmx_wmov,wmmx_tmcrr,wmmx_tmrrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*") - (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*") - (set_attr "wtype" "*,*,*,*,*,wmov,tmcrr,tmrrc,wldr,wstr,*,*,*,*,*")] + (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")] ) (define_insn "*iwmmxt_movsi_insn" @@ -188,7 +187,7 @@ default: gcc_unreachable (); }" - [(set_attr "type" "*,*,*,*,load1,store1,*,*,*,*,r_2_f,f_2_r,fcpys,f_loads,f_stores") + [(set_attr "type" "*,*,*,*,load1,store1,wmmx_tmcr,wmmx_tmrc,wmmx_wldr,wmmx_wstr,r_2_f,f_2_r,fcpys,f_loads,f_stores") (set_attr "length" "*,*,*,*,*, *,*,*, 16, *,*,*,*,*,*") (set_attr "pool_range" "*,*,*,*,4096, *,*,*,1024, *,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084, *,*,*, *, 1012,*,*,*,1008,*") @@ -200,8 +199,7 @@ ;; Also - we have to pretend that these insns clobber the condition code ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize ;; them. - (set_attr "conds" "clob") - (set_attr "wtype" "*,*,*,*,*,*,tmcr,tmrc,wldr,wstr,*,*,*,*,*")] + (set_attr "conds" "clob")] ) ;; Because iwmmxt_movsi_insn is not predicable, we provide the @@ -249,10 +247,9 @@ }" [(set_attr "predicable" "yes") (set_attr "length" "4, 4, 4,4,4,8, 8,8") - (set_attr "type" "*,*,*,*,*,*,load1,store1") + (set_attr "type" "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load1,store1") (set_attr "pool_range" "*, *, 256,*,*,*, 256,*") - (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*") - (set_attr "wtype" "wmov,wstr,wldr,tmrrc,tmcrr,*,*,*")] + (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")] ) (define_expand "iwmmxt_setwcgr0" @@ -318,7 +315,7 @@ "TARGET_REALLY_IWMMXT" "wand\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wand")] + (set_attr "type" "wmmx_wand")] ) (define_insn "*ior3_iwmmxt" @@ -328,7 +325,7 @@ "TARGET_REALLY_IWMMXT" "wor\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wor")] + (set_attr "type" "wmmx_wor")] ) (define_insn "*xor3_iwmmxt" @@ -338,7 +335,7 @@ "TARGET_REALLY_IWMMXT" "wxor\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wxor")] + (set_attr "type" "wmmx_wxor")] ) @@ -351,7 +348,7 @@ "TARGET_REALLY_IWMMXT" "wadd%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "ssaddv8qi3" @@ -361,7 +358,7 @@ "TARGET_REALLY_IWMMXT" "waddbss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "ssaddv4hi3" @@ -371,7 +368,7 @@ "TARGET_REALLY_IWMMXT" "waddhss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "ssaddv2si3" @@ -381,7 +378,7 @@ "TARGET_REALLY_IWMMXT" "waddwss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "usaddv8qi3" @@ -391,7 +388,7 @@ "TARGET_REALLY_IWMMXT" "waddbus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "usaddv4hi3" @@ -401,7 +398,7 @@ "TARGET_REALLY_IWMMXT" "waddhus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "usaddv2si3" @@ -411,7 +408,7 @@ "TARGET_REALLY_IWMMXT" "waddwus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "*sub3_iwmmxt" @@ -421,7 +418,7 @@ "TARGET_REALLY_IWMMXT" "wsub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "sssubv8qi3" @@ -431,7 +428,7 @@ "TARGET_REALLY_IWMMXT" "wsubbss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "sssubv4hi3" @@ -441,7 +438,7 @@ "TARGET_REALLY_IWMMXT" "wsubhss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "sssubv2si3" @@ -451,7 +448,7 @@ "TARGET_REALLY_IWMMXT" "wsubwss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "ussubv8qi3" @@ -461,7 +458,7 @@ "TARGET_REALLY_IWMMXT" "wsubbus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "ussubv4hi3" @@ -471,7 +468,7 @@ "TARGET_REALLY_IWMMXT" "wsubhus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "ussubv2si3" @@ -481,7 +478,7 @@ "TARGET_REALLY_IWMMXT" "wsubwus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsub")] + (set_attr "type" "wmmx_wsub")] ) (define_insn "*mulv4hi3_iwmmxt" @@ -491,7 +488,7 @@ "TARGET_REALLY_IWMMXT" "wmulul%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "smulv4hi3_highpart" @@ -504,7 +501,7 @@ "TARGET_REALLY_IWMMXT" "wmulsm%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "umulv4hi3_highpart" @@ -517,7 +514,7 @@ "TARGET_REALLY_IWMMXT" "wmulum%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "iwmmxt_wmacs" @@ -528,7 +525,7 @@ "TARGET_REALLY_IWMMXT" "wmacs%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmac")] + (set_attr "type" "wmmx_wmac")] ) (define_insn "iwmmxt_wmacsz" @@ -538,7 +535,7 @@ "TARGET_REALLY_IWMMXT" "wmacsz%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmac")] + (set_attr "type" "wmmx_wmac")] ) (define_insn "iwmmxt_wmacu" @@ -549,7 +546,7 @@ "TARGET_REALLY_IWMMXT" "wmacu%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmac")] + (set_attr "type" "wmmx_wmac")] ) (define_insn "iwmmxt_wmacuz" @@ -559,7 +556,7 @@ "TARGET_REALLY_IWMMXT" "wmacuz%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmac")] + (set_attr "type" "wmmx_wmac")] ) ;; Same as xordi3, but don't show input operands so that we don't think @@ -570,7 +567,7 @@ "TARGET_REALLY_IWMMXT" "wxor%?\\t%0, %0, %0" [(set_attr "predicable" "yes") - (set_attr "wtype" "wxor")] + (set_attr "type" "wmmx_wxor")] ) ;; Seems like cse likes to generate these, so we have to support them. @@ -584,7 +581,7 @@ "TARGET_REALLY_IWMMXT" "wxor%?\\t%0, %0, %0" [(set_attr "predicable" "yes") - (set_attr "wtype" "wxor")] + (set_attr "type" "wmmx_wxor")] ) (define_insn "iwmmxt_clrv4hi" @@ -594,7 +591,7 @@ "TARGET_REALLY_IWMMXT" "wxor%?\\t%0, %0, %0" [(set_attr "predicable" "yes") - (set_attr "wtype" "wxor")] + (set_attr "type" "wmmx_wxor")] ) (define_insn "iwmmxt_clrv2si" @@ -603,7 +600,7 @@ "TARGET_REALLY_IWMMXT" "wxor%?\\t%0, %0, %0" [(set_attr "predicable" "yes") - (set_attr "wtype" "wxor")] + (set_attr "type" "wmmx_wxor")] ) ;; Unsigned averages/sum of absolute differences @@ -627,7 +624,7 @@ "TARGET_REALLY_IWMMXT" "wavg2br%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg2")] + (set_attr "type" "wmmx_wavg2")] ) (define_insn "iwmmxt_uavgrndv4hi3" @@ -645,7 +642,7 @@ "TARGET_REALLY_IWMMXT" "wavg2hr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg2")] + (set_attr "type" "wmmx_wavg2")] ) (define_insn "iwmmxt_uavgv8qi3" @@ -658,7 +655,7 @@ "TARGET_REALLY_IWMMXT" "wavg2b%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg2")] + (set_attr "type" "wmmx_wavg2")] ) (define_insn "iwmmxt_uavgv4hi3" @@ -671,7 +668,7 @@ "TARGET_REALLY_IWMMXT" "wavg2h%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg2")] + (set_attr "type" "wmmx_wavg2")] ) ;; Insert/extract/shuffle @@ -690,7 +687,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "wtype" "tinsr")] + (set_attr "type" "wmmx_tinsr")] ) (define_insn "iwmmxt_tinsrh" @@ -707,7 +704,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "wtype" "tinsr")] + (set_attr "type" "wmmx_tinsr")] ) (define_insn "iwmmxt_tinsrw" @@ -724,7 +721,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "wtype" "tinsr")] + (set_attr "type" "wmmx_tinsr")] ) (define_insn "iwmmxt_textrmub" @@ -735,7 +732,7 @@ "TARGET_REALLY_IWMMXT" "textrmub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrm")] + (set_attr "type" "wmmx_textrm")] ) (define_insn "iwmmxt_textrmsb" @@ -746,7 +743,7 @@ "TARGET_REALLY_IWMMXT" "textrmsb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrm")] + (set_attr "type" "wmmx_textrm")] ) (define_insn "iwmmxt_textrmuh" @@ -757,7 +754,7 @@ "TARGET_REALLY_IWMMXT" "textrmuh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrm")] + (set_attr "type" "wmmx_textrm")] ) (define_insn "iwmmxt_textrmsh" @@ -768,7 +765,7 @@ "TARGET_REALLY_IWMMXT" "textrmsh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrm")] + (set_attr "type" "wmmx_textrm")] ) ;; There are signed/unsigned variants of this instruction, but they are @@ -780,7 +777,7 @@ "TARGET_REALLY_IWMMXT" "textrmsw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrm")] + (set_attr "type" "wmmx_textrm")] ) (define_insn "iwmmxt_wshufh" @@ -790,7 +787,7 @@ "TARGET_REALLY_IWMMXT" "wshufh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wshufh")] + (set_attr "type" "wmmx_wshufh")] ) ;; Mask-generating comparisons @@ -812,7 +809,7 @@ "TARGET_REALLY_IWMMXT" "wcmpeqb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpeq")] + (set_attr "type" "wmmx_wcmpeq")] ) (define_insn "eqv4hi3" @@ -823,7 +820,7 @@ "TARGET_REALLY_IWMMXT" "wcmpeqh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpeq")] + (set_attr "type" "wmmx_wcmpeq")] ) (define_insn "eqv2si3" @@ -835,7 +832,7 @@ "TARGET_REALLY_IWMMXT" "wcmpeqw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpeq")] + (set_attr "type" "wmmx_wcmpeq")] ) (define_insn "gtuv8qi3" @@ -846,7 +843,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtub%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) (define_insn "gtuv4hi3" @@ -857,7 +854,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtuh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) (define_insn "gtuv2si3" @@ -868,7 +865,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtuw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) (define_insn "gtv8qi3" @@ -879,7 +876,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtsb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) (define_insn "gtv4hi3" @@ -890,7 +887,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtsh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) (define_insn "gtv2si3" @@ -901,7 +898,7 @@ "TARGET_REALLY_IWMMXT" "wcmpgtsw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wcmpgt")] + (set_attr "type" "wmmx_wcmpgt")] ) ;; Max/min insns @@ -913,7 +910,7 @@ "TARGET_REALLY_IWMMXT" "wmaxs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmax")] + (set_attr "type" "wmmx_wmax")] ) (define_insn "*umax3_iwmmxt" @@ -923,7 +920,7 @@ "TARGET_REALLY_IWMMXT" "wmaxu%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmax")] + (set_attr "type" "wmmx_wmax")] ) (define_insn "*smin3_iwmmxt" @@ -933,7 +930,7 @@ "TARGET_REALLY_IWMMXT" "wmins%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmin")] + (set_attr "type" "wmmx_wmin")] ) (define_insn "*umin3_iwmmxt" @@ -943,7 +940,7 @@ "TARGET_REALLY_IWMMXT" "wminu%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmin")] + (set_attr "type" "wmmx_wmin")] ) ;; Pack/unpack insns. @@ -956,7 +953,7 @@ "TARGET_REALLY_IWMMXT" "wpackhss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wpackwss" @@ -967,7 +964,7 @@ "TARGET_REALLY_IWMMXT" "wpackwss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wpackdss" @@ -978,7 +975,7 @@ "TARGET_REALLY_IWMMXT" "wpackdss%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wpackhus" @@ -989,7 +986,7 @@ "TARGET_REALLY_IWMMXT" "wpackhus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wpackwus" @@ -1000,7 +997,7 @@ "TARGET_REALLY_IWMMXT" "wpackwus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wpackdus" @@ -1011,7 +1008,7 @@ "TARGET_REALLY_IWMMXT" "wpackdus%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wpack")] + (set_attr "type" "wmmx_wpack")] ) (define_insn "iwmmxt_wunpckihb" @@ -1039,7 +1036,7 @@ "TARGET_REALLY_IWMMXT" "wunpckihb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckih")] + (set_attr "type" "wmmx_wunpckih")] ) (define_insn "iwmmxt_wunpckihh" @@ -1059,7 +1056,7 @@ "TARGET_REALLY_IWMMXT" "wunpckihh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckih")] + (set_attr "type" "wmmx_wunpckih")] ) (define_insn "iwmmxt_wunpckihw" @@ -1075,7 +1072,7 @@ "TARGET_REALLY_IWMMXT" "wunpckihw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckih")] + (set_attr "type" "wmmx_wunpckih")] ) (define_insn "iwmmxt_wunpckilb" @@ -1103,7 +1100,7 @@ "TARGET_REALLY_IWMMXT" "wunpckilb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckil")] + (set_attr "type" "wmmx_wunpckil")] ) (define_insn "iwmmxt_wunpckilh" @@ -1123,7 +1120,7 @@ "TARGET_REALLY_IWMMXT" "wunpckilh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckil")] + (set_attr "type" "wmmx_wunpckil")] ) (define_insn "iwmmxt_wunpckilw" @@ -1139,7 +1136,7 @@ "TARGET_REALLY_IWMMXT" "wunpckilw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckil")] + (set_attr "type" "wmmx_wunpckil")] ) (define_insn "iwmmxt_wunpckehub" @@ -1151,7 +1148,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehub%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckehuh" @@ -1162,7 +1159,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehuh%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckehuw" @@ -1173,7 +1170,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehuw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckehsb" @@ -1185,7 +1182,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehsb%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckehsh" @@ -1196,7 +1193,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehsh%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckehsw" @@ -1207,7 +1204,7 @@ "TARGET_REALLY_IWMMXT" "wunpckehsw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckeh")] + (set_attr "type" "wmmx_wunpckeh")] ) (define_insn "iwmmxt_wunpckelub" @@ -1219,7 +1216,7 @@ "TARGET_REALLY_IWMMXT" "wunpckelub%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) (define_insn "iwmmxt_wunpckeluh" @@ -1230,7 +1227,7 @@ "TARGET_REALLY_IWMMXT" "wunpckeluh%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) (define_insn "iwmmxt_wunpckeluw" @@ -1241,7 +1238,7 @@ "TARGET_REALLY_IWMMXT" "wunpckeluw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) (define_insn "iwmmxt_wunpckelsb" @@ -1253,7 +1250,7 @@ "TARGET_REALLY_IWMMXT" "wunpckelsb%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) (define_insn "iwmmxt_wunpckelsh" @@ -1264,7 +1261,7 @@ "TARGET_REALLY_IWMMXT" "wunpckelsh%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) (define_insn "iwmmxt_wunpckelsw" @@ -1275,7 +1272,7 @@ "TARGET_REALLY_IWMMXT" "wunpckelsw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wunpckel")] + (set_attr "type" "wmmx_wunpckel")] ) ;; Shifts @@ -1298,7 +1295,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wror, wror")] + (set_attr "type" "wmmx_wror, wmmx_wror")] ) (define_insn "ashr3_iwmmxt" @@ -1319,7 +1316,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsra, wsra")] + (set_attr "type" "wmmx_wsra, wmmx_wsra")] ) (define_insn "lshr3_iwmmxt" @@ -1340,7 +1337,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsrl, wsrl")] + (set_attr "type" "wmmx_wsrl, wmmx_wsrl")] ) (define_insn "ashl3_iwmmxt" @@ -1361,7 +1358,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsll, wsll")] + (set_attr "type" "wmmx_wsll, wmmx_wsll")] ) (define_insn "ror3_di" @@ -1382,7 +1379,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wror, wror")] + (set_attr "type" "wmmx_wror, wmmx_wror")] ) (define_insn "ashr3_di" @@ -1403,7 +1400,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsra, wsra")] + (set_attr "type" "wmmx_wsra, wmmx_wsra")] ) (define_insn "lshr3_di" @@ -1424,7 +1421,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsrl, wsrl")] + (set_attr "type" "wmmx_wsrl, wmmx_wsrl")] ) (define_insn "ashl3_di" @@ -1445,7 +1442,7 @@ " [(set_attr "predicable" "yes") (set_attr "arch" "*, iwmmxt2") - (set_attr "wtype" "wsll, wsll")] + (set_attr "type" "wmmx_wsll, wmmx_wsll")] ) (define_insn "iwmmxt_wmadds" @@ -1464,7 +1461,7 @@ "TARGET_REALLY_IWMMXT" "wmadds%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_wmaddu" @@ -1483,7 +1480,7 @@ "TARGET_REALLY_IWMMXT" "wmaddu%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_tmia" @@ -1496,7 +1493,7 @@ "TARGET_REALLY_IWMMXT" "tmia%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmia")] + (set_attr "type" "wmmx_tmia")] ) (define_insn "iwmmxt_tmiaph" @@ -1514,7 +1511,7 @@ "TARGET_REALLY_IWMMXT" "tmiaph%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmiaph")] + (set_attr "type" "wmmx_tmiaph")] ) (define_insn "iwmmxt_tmiabb" @@ -1527,7 +1524,7 @@ "TARGET_REALLY_IWMMXT" "tmiabb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmiaxy")] + (set_attr "type" "wmmx_tmiaxy")] ) (define_insn "iwmmxt_tmiatb" @@ -1544,7 +1541,7 @@ "TARGET_REALLY_IWMMXT" "tmiatb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmiaxy")] + (set_attr "type" "wmmx_tmiaxy")] ) (define_insn "iwmmxt_tmiabt" @@ -1561,7 +1558,7 @@ "TARGET_REALLY_IWMMXT" "tmiabt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmiaxy")] + (set_attr "type" "wmmx_tmiaxy")] ) (define_insn "iwmmxt_tmiatt" @@ -1580,7 +1577,7 @@ "TARGET_REALLY_IWMMXT" "tmiatt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmiaxy")] + (set_attr "type" "wmmx_tmiaxy")] ) (define_insn "iwmmxt_tmovmskb" @@ -1589,7 +1586,7 @@ "TARGET_REALLY_IWMMXT" "tmovmskb%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmovmsk")] + (set_attr "type" "wmmx_tmovmsk")] ) (define_insn "iwmmxt_tmovmskh" @@ -1598,7 +1595,7 @@ "TARGET_REALLY_IWMMXT" "tmovmskh%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmovmsk")] + (set_attr "type" "wmmx_tmovmsk")] ) (define_insn "iwmmxt_tmovmskw" @@ -1607,7 +1604,7 @@ "TARGET_REALLY_IWMMXT" "tmovmskw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "tmovmsk")] + (set_attr "type" "wmmx_tmovmsk")] ) (define_insn "iwmmxt_waccb" @@ -1616,7 +1613,7 @@ "TARGET_REALLY_IWMMXT" "waccb%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wacc")] + (set_attr "type" "wmmx_wacc")] ) (define_insn "iwmmxt_wacch" @@ -1625,7 +1622,7 @@ "TARGET_REALLY_IWMMXT" "wacch%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wacc")] + (set_attr "type" "wmmx_wacc")] ) (define_insn "iwmmxt_waccw" @@ -1634,7 +1631,7 @@ "TARGET_REALLY_IWMMXT" "waccw%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wacc")] + (set_attr "type" "wmmx_wacc")] ) ;; use unspec here to prevent 8 * imm to be optimized by cse @@ -1651,7 +1648,7 @@ "TARGET_REALLY_IWMMXT" "waligni%?\\t%0, %1, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "waligni")] + (set_attr "type" "wmmx_waligni")] ) (define_insn "iwmmxt_walignr" @@ -1666,7 +1663,7 @@ "TARGET_REALLY_IWMMXT" "walignr%U3%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "walignr")] + (set_attr "type" "wmmx_walignr")] ) (define_insn "iwmmxt_walignr0" @@ -1681,7 +1678,7 @@ "TARGET_REALLY_IWMMXT" "walignr0%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "walignr")] + (set_attr "type" "wmmx_walignr")] ) (define_insn "iwmmxt_walignr1" @@ -1696,7 +1693,7 @@ "TARGET_REALLY_IWMMXT" "walignr1%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "walignr")] + (set_attr "type" "wmmx_walignr")] ) (define_insn "iwmmxt_walignr2" @@ -1711,7 +1708,7 @@ "TARGET_REALLY_IWMMXT" "walignr2%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "walignr")] + (set_attr "type" "wmmx_walignr")] ) (define_insn "iwmmxt_walignr3" @@ -1726,7 +1723,7 @@ "TARGET_REALLY_IWMMXT" "walignr3%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "walignr")] + (set_attr "type" "wmmx_walignr")] ) (define_insn "iwmmxt_wsadb" @@ -1738,7 +1735,7 @@ "TARGET_REALLY_IWMMXT" "wsadb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsad")] + (set_attr "type" "wmmx_wsad")] ) (define_insn "iwmmxt_wsadh" @@ -1750,7 +1747,7 @@ "TARGET_REALLY_IWMMXT" "wsadh%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsad")] + (set_attr "type" "wmmx_wsad")] ) (define_insn "iwmmxt_wsadbz" @@ -1760,7 +1757,7 @@ "TARGET_REALLY_IWMMXT" "wsadbz%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsad")] + (set_attr "type" "wmmx_wsad")] ) (define_insn "iwmmxt_wsadhz" @@ -1770,7 +1767,7 @@ "TARGET_REALLY_IWMMXT" "wsadhz%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsad")] + (set_attr "type" "wmmx_wsad")] ) (include "iwmmxt2.md") --- a/src/gcc/config/arm/cortex-a53.md +++ b/src/gcc/config/arm/cortex-a53.md @@ -0,0 +1,300 @@ +;; ARM Cortex-A53 pipeline description +;; Copyright (C) 2013 Free Software Foundation, Inc. +;; +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a53") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two main integer execution pipelines, described as +;; slot 0 and issue slot 1. + +(define_cpu_unit "cortex_a53_slot0" "cortex_a53") +(define_cpu_unit "cortex_a53_slot1" "cortex_a53") + +(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1") +(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1") + +;; The load/store pipeline. Load/store instructions can dual-issue from +;; either pipeline, but two load/stores cannot simultaneously issue. + +(define_cpu_unit "cortex_a53_ls" "cortex_a53") + +;; The store pipeline. Shared between both execution pipelines. + +(define_cpu_unit "cortex_a53_store" "cortex_a53") + +;; The branch pipeline. Branches can dual-issue with other instructions +;; (except when those instructions take multiple cycles to issue). + +(define_cpu_unit "cortex_a53_branch" "cortex_a53") + +;; The integer divider. + +(define_cpu_unit "cortex_a53_idiv" "cortex_a53") + +;; The floating-point add pipeline used to model the usage +;; of the add pipeline by fmac instructions. + +(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_alu" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) + "cortex_a53_slot_any") + +(define_insn_reservation "cortex_a53_alu_shift" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "cortex_a53_slot_any") + +;; Forwarding path for unshifted operands. + +(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_alu") + +(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results so there's no need to specify +;; bypasses. Multiplies can only single-issue currently. + +(define_insn_reservation "cortex_a53_mul" 3 + (and (eq_attr "tune" "cortexa53") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "cortex_a53_single_issue") + +;; A multiply with a single-register result or an MLA, followed by an +;; MLA with an accumulator dependency, has its result forwarded so two +;; such instructions can issue back-to-back. + +(define_bypass 1 "cortex_a53_mul" + "cortex_a53_mul" + "arm_mac_accumulator_is_mul_result") + +;; Punt with a high enough latency for divides. +(define_insn_reservation "cortex_a53_udiv" 8 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "udiv")) + "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7") + +(define_insn_reservation "cortex_a53_sdiv" 9 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "sdiv")) + "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8") + + +(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv" + "cortex_a53_alu") +(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage. + +(define_insn_reservation "cortex_a53_load1" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load_byte,load1")) + "cortex_a53_slot_any+cortex_a53_ls") + +(define_insn_reservation "cortex_a53_store1" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store1")) + "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store") + +(define_insn_reservation "cortex_a53_load2" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load2")) + "cortex_a53_single_issue+cortex_a53_ls") + +(define_insn_reservation "cortex_a53_store2" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store2")) + "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store") + +(define_insn_reservation "cortex_a53_load3plus" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load3,load4")) + "(cortex_a53_single_issue+cortex_a53_ls)*2") + +(define_insn_reservation "cortex_a53_store3plus" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store3,store4")) + "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2") + +;; Load/store addresses are required early in Issue. +(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_load*" + "arm_early_load_addr_dep") +(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_store*" + "arm_early_store_addr_dep") + +;; Load data can forward in the ALU pipeline +(define_bypass 2 "cortex_a53_load1,cortex_a53_load2" + "cortex_a53_alu") +(define_bypass 2 "cortex_a53_load1,cortex_a53_load2" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;; ALU ops can forward to stores. +(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus" + "arm_no_early_store_addr_dep") + +(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus" + "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus" + "arm_no_early_store_addr_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Currently models all branches as dual-issuable from either execution +;; slot, which isn't true for all cases. We still need to model indirect +;; branches. + +(define_insn_reservation "cortex_a53_branch" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "branch,call")) + "cortex_a53_slot_any+cortex_a53_branch") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_fpalu" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\ + fcmps, fcmpd")) + "cortex_a53_slot0+cortex_a53_fpadd_pipe") + +(define_insn_reservation "cortex_a53_fconst" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fconsts,fconstd")) + "cortex_a53_slot0+cortex_a53_fpadd_pipe") + +(define_insn_reservation "cortex_a53_fpmul" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fmuls,fmuld")) + "cortex_a53_slot0") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued after +;; the multiply completes. Model that accordingly. + +(define_insn_reservation "cortex_a53_fpmac" 8 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fmacs,fmacd,ffmas,ffmad")) + "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; fsqrt really takes one cycle less, but that is not modelled. + +(define_insn_reservation "cortex_a53_fdivs" 14 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fdivs")) + "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13") + +(define_insn_reservation "cortex_a53_fdivd" 29 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fdivd")) + "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_r2f" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "r_2_f")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f2r" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_2_r")) + "cortex_a53_slot0") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_f_flags" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_flag")) + "cortex_a53_slot0") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_f_loads" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_loads")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_loadd" 5 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_loadd")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_stores" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_stores")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_stored" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_stored")) + "cortex_a53_slot0") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a53_f_loads" + "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\ + cortex_a53_fdivs, cortex_a53_fdivd,\ + cortex_a53_f2r") + +(define_bypass 2 "cortex_a53_f_loadd" + "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\ + cortex_a53_fdivs, cortex_a53_fdivd,\ + cortex_a53_f2r") + --- a/src/gcc/config/arm/bpabi.h +++ b/src/gcc/config/arm/bpabi.h @@ -60,6 +60,7 @@ |mcpu=cortex-a7 \ |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \ |mcpu=marvell-pj4 \ + |mcpu=cortex-a53 \ |mcpu=generic-armv7-a \ |march=armv7-m|mcpu=cortex-m3 \ |march=armv7e-m|mcpu=cortex-m4 \ @@ -71,6 +72,7 @@ " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \ |mcpu=cortex-a7 \ |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \ + |mcpu=cortex-a53 \ |mcpu=marvell-pj4 \ |mcpu=generic-armv7-a \ |march=armv7-m|mcpu=cortex-m3 \ --- a/src/gcc/config/arm/marvell-f-iwmmxt.md +++ b/src/gcc/config/arm/marvell-f-iwmmxt.md @@ -63,52 +63,62 @@ ;; An attribute appended to instructions for classification (define_attr "wmmxt_shift" "yes,no" - (if_then_else (eq_attr "wtype" "wror, wsll, wsra, wsrl") + (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_pack" "yes,no" - (if_then_else (eq_attr "wtype" "waligni, walignr, wmerge, wpack, wshufh, wunpckeh, wunpckih, wunpckel, wunpckil") + (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\ + wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\ + wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_mult_c1" "yes,no" - (if_then_else (eq_attr "wtype" "wmac, wmadd, wmiaxy, wmiawxy, wmulw, wqmiaxy, wqmulwm") + (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\ + wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\ + wmmx_wqmulwm") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_mult_c2" "yes,no" - (if_then_else (eq_attr "wtype" "wmul, wqmulm") + (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_alu_c1" "yes,no" - (if_then_else (eq_attr "wtype" "wabs, wabsdiff, wand, wandn, wmov, wor, wxor") + (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\ + wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_alu_c2" "yes,no" - (if_then_else (eq_attr "wtype" "wacc, wadd, waddsubhx, wavg2, wavg4, wcmpeq, wcmpgt, wmax, wmin, wsub, waddbhus, wsubaddhx") + (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\ + wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\ + wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\ + wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_alu_c3" "yes,no" - (if_then_else (eq_attr "wtype" "wsad") + (if_then_else (eq_attr "type" "wmmx_wsad") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_transfer_c1" "yes,no" - (if_then_else (eq_attr "wtype" "tbcst, tinsr, tmcr, tmcrr") + (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\ + wmmx_tmcr, wmmx_tmcrr") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_transfer_c2" "yes,no" - (if_then_else (eq_attr "wtype" "textrm, tmovmsk, tmrc, tmrrc") + (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\ + wmmx_tmrc, wmmx_tmrrc") (const_string "yes") (const_string "no")) ) (define_attr "wmmxt_transfer_c3" "yes,no" - (if_then_else (eq_attr "wtype" "tmia, tmiaph, tmiaxy") + (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy") (const_string "yes") (const_string "no")) ) @@ -169,11 +179,11 @@ (define_insn_reservation "marvell_f_iwmmxt_wstr" 0 (and (eq_attr "marvell_f_iwmmxt" "yes") - (eq_attr "wtype" "wstr")) + (eq_attr "type" "wmmx_wstr")) "mf_iwmmxt_pipeline") ;There is a forwarding path from MW stage (define_insn_reservation "marvell_f_iwmmxt_wldr" 5 (and (eq_attr "marvell_f_iwmmxt" "yes") - (eq_attr "wtype" "wldr")) + (eq_attr "type" "wmmx_wldr")) "mf_iwmmxt_pipeline") --- a/src/gcc/config/arm/t-mlibs +++ b/src/gcc/config/arm/t-mlibs @@ -0,0 +1,21 @@ +# A set of predefined MULTILIB for different ARM targets. +# Through the configure option --with-multilib-list, user can customize the +# final MULTILIB implementation. + +comma := , +space := +space += + +MULTILIB_OPTIONS = marm +MULTILIB_DIRNAMES = arm +MULTILIB_OPTIONS += march=armv4t +MULTILIB_DIRNAMES += armv4t +MULTILIB_OPTIONS += mfloat-abi=soft +MULTILIB_DIRNAMES += soft + +MULTILIB_EXCEPTIONS = + +MULTILIB_REQUIRED = marm/march=armv4t/mfloat-abi=soft + +MULTILIB_OSDIRNAMES = marm/march.armv4t/mfloat-abi.soft=!arm-linux-gnueabi + --- a/src/gcc/config/arm/iterators.md +++ b/src/gcc/config/arm/iterators.md @@ -201,6 +201,20 @@ (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN]) +(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W + UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW]) + +(define_int_iterator CRYPTO_UNARY [UNSPEC_AESMC UNSPEC_AESIMC]) + +(define_int_iterator CRYPTO_BINARY [UNSPEC_AESD UNSPEC_AESE + UNSPEC_SHA1SU1 UNSPEC_SHA256SU0]) + +(define_int_iterator CRYPTO_TERNARY [UNSPEC_SHA1SU0 UNSPEC_SHA256H + UNSPEC_SHA256H2 UNSPEC_SHA256SU1]) + +(define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M + UNSPEC_SHA1P]) + ;;---------------------------------------------------------------------------- ;; Mode attributes ;;---------------------------------------------------------------------------- @@ -500,3 +514,54 @@ (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p") (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m") (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")]) + +(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h") + (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32CB "crc32cb") + (UNSPEC_CRC32CH "crc32ch") (UNSPEC_CRC32CW "crc32cw")]) + +(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI") + (UNSPEC_CRC32W "SI") (UNSPEC_CRC32CB "QI") + (UNSPEC_CRC32CH "HI") (UNSPEC_CRC32CW "SI")]) + +(define_int_attr crypto_pattern [(UNSPEC_SHA1H "sha1h") (UNSPEC_AESMC "aesmc") + (UNSPEC_AESIMC "aesimc") (UNSPEC_AESD "aesd") + (UNSPEC_AESE "aese") (UNSPEC_SHA1SU1 "sha1su1") + (UNSPEC_SHA256SU0 "sha256su0") (UNSPEC_SHA1C "sha1c") + (UNSPEC_SHA1M "sha1m") (UNSPEC_SHA1P "sha1p") + (UNSPEC_SHA1SU0 "sha1su0") (UNSPEC_SHA256H "sha256h") + (UNSPEC_SHA256H2 "sha256h2") + (UNSPEC_SHA256SU1 "sha256su1")]) + +(define_int_attr crypto_type + [(UNSPEC_AESE "neon_crypto_aes") (UNSPEC_AESD "neon_crypto_aes") + (UNSPEC_AESMC "neon_crypto_aes") (UNSPEC_AESIMC "neon_crypto_aes") + (UNSPEC_SHA1C "neon_crypto_sha1_slow") (UNSPEC_SHA1P "neon_crypto_sha1_slow") + (UNSPEC_SHA1M "neon_crypto_sha1_slow") (UNSPEC_SHA1SU1 "neon_crypto_sha1_fast") + (UNSPEC_SHA1SU0 "neon_crypto_sha1_xor") (UNSPEC_SHA256H "neon_crypto_sha256_slow") + (UNSPEC_SHA256H2 "neon_crypto_sha256_slow") (UNSPEC_SHA256SU0 "neon_crypto_sha256_fast") + (UNSPEC_SHA256SU1 "neon_crypto_sha256_slow")]) + +(define_int_attr crypto_size_sfx [(UNSPEC_SHA1H "32") (UNSPEC_AESMC "8") + (UNSPEC_AESIMC "8") (UNSPEC_AESD "8") + (UNSPEC_AESE "8") (UNSPEC_SHA1SU1 "32") + (UNSPEC_SHA256SU0 "32") (UNSPEC_SHA1C "32") + (UNSPEC_SHA1M "32") (UNSPEC_SHA1P "32") + (UNSPEC_SHA1SU0 "32") (UNSPEC_SHA256H "32") + (UNSPEC_SHA256H2 "32") (UNSPEC_SHA256SU1 "32")]) + +(define_int_attr crypto_mode [(UNSPEC_SHA1H "V4SI") (UNSPEC_AESMC "V16QI") + (UNSPEC_AESIMC "V16QI") (UNSPEC_AESD "V16QI") + (UNSPEC_AESE "V16QI") (UNSPEC_SHA1SU1 "V4SI") + (UNSPEC_SHA256SU0 "V4SI") (UNSPEC_SHA1C "V4SI") + (UNSPEC_SHA1M "V4SI") (UNSPEC_SHA1P "V4SI") + (UNSPEC_SHA1SU0 "V4SI") (UNSPEC_SHA256H "V4SI") + (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")]) + +;; Both kinds of return insn. +(define_code_iterator returns [return simple_return]) +(define_code_attr return_str [(return "") (simple_return "simple_")]) +(define_code_attr return_simple_p [(return "false") (simple_return "true")]) +(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)") + (simple_return " && use_simple_return_p ()")]) +(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)") + (simple_return " && use_simple_return_p ()")]) --- a/src/gcc/config/arm/sync.md +++ b/src/gcc/config/arm/sync.md @@ -65,6 +65,42 @@ (set_attr "conds" "unconditional") (set_attr "predicable" "no")]) +(define_insn "atomic_load" + [(set (match_operand:QHSI 0 "register_operand" "=r") + (unspec_volatile:QHSI + [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q") + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_LDA))] + "TARGET_HAVE_LDACQ" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE) + return \"ldr\\t%0, %1\"; + else + return \"lda\\t%0, %1\"; + } +) + +(define_insn "atomic_store" + [(set (match_operand:QHSI 0 "memory_operand" "=Q") + (unspec_volatile:QHSI + [(match_operand:QHSI 1 "general_operand" "r") + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_STL))] + "TARGET_HAVE_LDACQ" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE) + return \"str\t%1, %0\"; + else + return \"stl\t%1, %0\"; + } +) + ;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic, ;; even for a 64-bit aligned address. Instead we use a ldrexd unparied ;; with a store. @@ -88,7 +124,8 @@ UNSPEC_LL))] "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_expand "atomic_compare_and_swap" [(match_operand:SI 0 "s_register_operand" "") ;; bool out @@ -325,8 +362,20 @@ VUNSPEC_LL)))] "TARGET_HAVE_LDREXBH" "ldrex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) +(define_insn "arm_load_acquire_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX)))] + "TARGET_HAVE_LDACQ" + "ldaex%?\\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + (define_insn "arm_load_exclusivesi" [(set (match_operand:SI 0 "s_register_operand" "=r") (unspec_volatile:SI @@ -334,8 +383,19 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREX" "ldrex%?\t%0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) +(define_insn "arm_load_acquire_exclusivesi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec_volatile:SI + [(match_operand:SI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQ" + "ldaex%?\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + (define_insn "arm_load_exclusivedi" [(set (match_operand:DI 0 "s_register_operand" "=r") (unspec_volatile:DI @@ -343,8 +403,19 @@ VUNSPEC_LL))] "TARGET_HAVE_LDREXD" "ldrexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) +(define_insn "arm_load_acquire_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec_volatile:DI + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" + "ldaexd%?\t%0, %H0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + (define_insn "arm_store_exclusive" [(set (match_operand:SI 0 "s_register_operand" "=&r") (unspec_volatile:SI [(const_int 0)] VUNSPEC_SC)) @@ -367,4 +438,35 @@ } return "strex%?\t%0, %2, %C1"; } - [(set_attr "predicable" "yes")]) + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_store_release_exclusivedi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) + (set (match_operand:DI 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:DI + [(match_operand:DI 2 "s_register_operand" "r")] + VUNSPEC_SLX))] + "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" + { + rtx value = operands[2]; + /* See comment in arm_store_exclusive above. */ + gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); + operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); + return "stlexd%?\t%0, %2, %3, %C1"; + } + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_store_release_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) + (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:QHSI + [(match_operand:QHSI 2 "s_register_operand" "r")] + VUNSPEC_SLX))] + "TARGET_HAVE_LDACQ" + "stlex%?\t%0, %2, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) --- a/src/gcc/config/arm/fa726te.md +++ b/src/gcc/config/arm/fa726te.md @@ -78,7 +78,8 @@ ;; Move instructions. (define_insn_reservation "726te_shift_op" 1 (and (eq_attr "tune" "fa726te") - (eq_attr "insn" "mov,mvn")) + (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") ;; ALU operations with no shifted operand will finished in 1 cycle @@ -85,8 +86,7 @@ ;; Other ALU instructions 2 cycles. (define_insn_reservation "726te_alu_op" 1 (and (eq_attr "tune" "fa726te") - (and (eq_attr "type" "alu_reg,simple_alu_imm") - (not (eq_attr "insn" "mov,mvn")))) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") ;; ALU operations with a shift-by-register operand. @@ -95,14 +95,12 @@ ;; it takes 3 cycles. (define_insn_reservation "726te_alu_shift_op" 3 (and (eq_attr "tune" "fa726te") - (and (eq_attr "type" "simple_alu_shift,alu_shift") - (not (eq_attr "insn" "mov,mvn")))) + (eq_attr "type" "extend,arlo_shift")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") (define_insn_reservation "726te_alu_shift_reg_op" 3 (and (eq_attr "tune" "fa726te") - (and (eq_attr "type" "alu_shift_reg") - (not (eq_attr "insn" "mov,mvn")))) + (eq_attr "type" "arlo_shift_reg")) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Multiplication Instructions @@ -115,7 +113,7 @@ (define_insn_reservation "726te_mult_op" 3 (and (eq_attr "tune" "fa726te") - (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ + (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy")) "fa726te_issue+fa726te_mac_pipe") --- a/src/gcc/config/arm/neon-testgen.ml +++ b/src/gcc/config/arm/neon-testgen.ml @@ -163,10 +163,13 @@ match List.find (fun feature -> match feature with Requires_feature _ -> true | Requires_arch _ -> true + | Requires_FP_bit 1 -> true | _ -> false) features with Requires_feature "FMA" -> "arm_neonv2" + | Requires_feature "CRYPTO" -> "arm_crypto" | Requires_arch 8 -> "arm_v8_neon" + | Requires_FP_bit 1 -> "arm_neon_fp16" | _ -> assert false with Not_found -> "arm_neon" @@ -298,5 +301,5 @@ (* Program entry point. *) let _ = let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in - List.iter (test_intrinsic_group directory) (reinterp @ ops) + List.iter (test_intrinsic_group directory) (reinterp @ reinterpq @ ops) --- a/src/gcc/config/arm/arm.md +++ b/src/gcc/config/arm/arm.md @@ -74,6 +74,15 @@ ; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. (define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) +; We use this attribute to disable alternatives that can produce 32-bit +; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks +; that contain 32-bit instructions. +(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes")) + +; This attribute is used to disable a predicated alternative when we have +; arm_restrict_it. +(define_attr "predicable_short_it" "no,yes" (const_string "yes")) + ;; Operand number of an input operand that is shifted. Zero if the ;; given instruction does not shift one of its input operands. (define_attr "shift" "" (const_int 0)) @@ -84,6 +93,8 @@ (define_attr "fpu" "none,vfp" (const (symbol_ref "arm_fpu_attr"))) +(define_attr "predicated" "yes,no" (const_string "no")) + ; LENGTH of an instruction (in bytes) (define_attr "length" "" (const_int 4)) @@ -94,7 +105,7 @@ ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without ; arm_arch6. This attribute is used to compute attribute "enabled", ; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2" +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -129,24 +140,16 @@ (match_test "TARGET_32BIT && !arm_arch6")) (const_string "yes") - (and (eq_attr "arch" "onlya8") - (eq_attr "tune" "cortexa8")) + (and (eq_attr "arch" "avoid_neon_for_64bits") + (match_test "TARGET_NEON") + (not (match_test "TARGET_PREFER_NEON_64BITS"))) (const_string "yes") - (and (eq_attr "arch" "neon_onlya8") - (eq_attr "tune" "cortexa8") - (match_test "TARGET_NEON")) + (and (eq_attr "arch" "neon_for_64bits") + (match_test "TARGET_NEON") + (match_test "TARGET_PREFER_NEON_64BITS")) (const_string "yes") - (and (eq_attr "arch" "nota8") - (not (eq_attr "tune" "cortexa8"))) - (const_string "yes") - - (and (eq_attr "arch" "neon_nota8") - (not (eq_attr "tune" "cortexa8")) - (match_test "TARGET_NEON")) - (const_string "yes") - (and (eq_attr "arch" "iwmmxt2") (match_test "TARGET_REALLY_IWMMXT2")) (const_string "yes")] @@ -179,6 +182,15 @@ (cond [(eq_attr "insn_enabled" "no") (const_string "no") + (and (eq_attr "predicable_short_it" "no") + (and (eq_attr "predicated" "yes") + (match_test "arm_restrict_it"))) + (const_string "no") + + (and (eq_attr "enabled_for_depr_it" "no") + (match_test "arm_restrict_it")) + (const_string "no") + (eq_attr "arch_enabled" "no") (const_string "no") @@ -214,126 +226,341 @@ (set_attr "length" "4") (set_attr "pool_range" "250")]) -;; The instruction used to implement a particular pattern. This -;; information is used by pipeline descriptions to provide accurate -;; scheduling information. - -(define_attr "insn" - "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other" - (const_string "other")) - -; TYPE attribute is used to detect floating point instructions which, if -; running on a co-processor can run in parallel with other, basic instructions -; If write-buffer scheduling is enabled then it can also be used in the -; scheduling of writes. - -; Classification of each insn -; Note: vfp.md has different meanings for some of these, and some further -; types as well. See that file for details. -; simple_alu_imm a simple alu instruction that doesn't hit memory or fp -; regs or have a shifted source operand and has an immediate -; operand. This currently only tracks very basic immediate -; alu operations. -; alu_reg any alu instruction that doesn't hit memory or fp -; regs or have a shifted source operand -; and does not have an immediate operand. This is -; also the default -; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB -; alu_shift any data instruction that doesn't hit memory or fp -; regs, but has a source operand shifted by a constant -; alu_shift_reg any data instruction that doesn't hit memory or fp -; regs, but has a source operand shifted by a register value -; mult a multiply instruction -; block blockage insn, this blocks all functional units -; float a floating point arithmetic operation (subject to expansion) -; fdivd DFmode floating point division -; fdivs SFmode floating point division -; f_load[sd] A single/double load from memory. Used for VFP unit. -; f_store[sd] A single/double store to memory. Used for VFP unit. -; f_flag a transfer of co-processor flags to the CPSR -; f_2_r transfer float to core (no memory needed) -; r_2_f transfer core to float -; f_cvt convert floating<->integral -; branch a branch -; call a subroutine call -; load_byte load byte(s) from memory to arm registers -; load1 load 1 word from memory to arm registers -; load2 load 2 words from memory to arm registers -; load3 load 3 words from memory to arm registers -; load4 load 4 words from memory to arm registers -; store store 1 word to memory from arm registers -; store2 store 2 words -; store3 store 3 words -; store4 store 4 (or more) words +; TYPE attribute is used to classify instructions for use in scheduling. ; +; Instruction classification: +; +; arlo_imm any arithmetic or logical instruction that doesn't have +; a shifted operand and has an immediate operand. This +; excludes MOV, MVN and RSB(S) immediate. +; arlo_reg any arithmetic or logical instruction that doesn't have +; a shifted or an immediate operand. This excludes +; MOV and MVN but includes MOVT. This is also the default. +; arlo_shift any arithmetic or logical instruction that has a source +; operand shifted by a constant. This excludes +; simple shifts. +; arlo_shift_reg as arlo_shift, with the shift amount specified in a +; register. +; block blockage insn, this blocks all functional units. +; branch branch. +; call subroutine call. +; clz count leading zeros (CLZ). +; extend extend instruction (SXTB, SXTH, UXTB, UXTH). +; f_2_r transfer from float to core (no memory needed). +; f_cvt conversion between float and integral. +; f_flag transfer of co-processor flags to the CPSR. +; f_load[d,s] double/single load from memory. Used for VFP unit. +; f_minmax[d,s] double/single floating point minimum/maximum. +; f_rint[d,s] double/single floating point rount to integral. +; f_sel[d,s] double/single floating byte select. +; f_store[d,s] double/single store to memory. Used for VFP unit. +; fadd[d,s] double/single floating-point scalar addition. +; fcmp[d,s] double/single floating-point compare. +; fconst[d,s] double/single load immediate. +; fcpys single precision floating point cpy. +; fdiv[d,s] double/single precision floating point division. +; ffarith[d,s] double/single floating point abs/neg/cpy. +; ffma[d,s] double/single floating point fused multiply-accumulate. +; float floating point arithmetic operation. +; fmac[d,s] double/single floating point multiply-accumulate. +; fmul[d,s] double/single floating point multiply. +; load_byte load byte(s) from memory to arm registers. +; load1 load 1 word from memory to arm registers. +; load2 load 2 words from memory to arm registers. +; load3 load 3 words from memory to arm registers. +; load4 load 4 words from memory to arm registers. +; mla integer multiply accumulate. +; mlas integer multiply accumulate, flag setting. +; mov_imm simple MOV instruction that moves an immediate to +; register. This includes MOVW, but not MOVT. +; mov_reg simple MOV instruction that moves a register to another +; register. This includes MOVW, but not MOVT. +; mov_shift simple MOV instruction, shifted operand by a constant. +; mov_shift_reg simple MOV instruction, shifted operand by a register. +; mul integer multiply. +; muls integer multiply, flag setting. +; mvn_imm inverting move instruction, immediate. +; mvn_reg inverting move instruction, register. +; mvn_shift inverting move instruction, shifted operand by a constant. +; mvn_shift_reg inverting move instruction, shifted operand by a register. +; r_2_f transfer from core to float. +; sdiv signed division. +; shift simple shift operation (LSL, LSR, ASR, ROR) with an +; immediate. +; shift_reg simple shift by a register. +; smlad signed multiply accumulate dual. +; smladx signed multiply accumulate dual reverse. +; smlal signed multiply accumulate long. +; smlald signed multiply accumulate long dual. +; smlals signed multiply accumulate long, flag setting. +; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate. +; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate. +; smlawy signed multiply accumulate wide, 32x16-bit, +; 32-bit accumulate. +; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate. +; smlsd signed multiply subtract dual. +; smlsdx signed multiply subtract dual reverse. +; smlsld signed multiply subtract long dual. +; smmla signed most significant word multiply accumulate. +; smmul signed most significant word multiply. +; smmulr signed most significant word multiply, rounded. +; smuad signed dual multiply add. +; smuadx signed dual multiply add reverse. +; smull signed multiply long. +; smulls signed multiply long, flag setting. +; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate. +; smulxy signed multiply, 16x16-bit, 32-bit accumulate. +; smusd signed dual multiply subtract. +; smusdx signed dual multiply subtract reverse. +; store1 store 1 word to memory from arm registers. +; store2 store 2 words to memory from arm registers. +; store3 store 3 words to memory from arm registers. +; store4 store 4 (or more) words to memory from arm registers. +; udiv unsigned division. +; umaal unsigned multiply accumulate accumulate long. +; umlal unsigned multiply accumulate long. +; umlals unsigned multiply accumulate long, flag setting. +; umull unsigned multiply long. +; umulls unsigned multiply long, flag setting. +; +; The classification below is for instructions used by the Wireless MMX +; Technology. Each attribute value is used to classify an instruction of the +; same name or family. +; +; wmmx_tandc +; wmmx_tbcst +; wmmx_textrc +; wmmx_textrm +; wmmx_tinsr +; wmmx_tmcr +; wmmx_tmcrr +; wmmx_tmia +; wmmx_tmiaph +; wmmx_tmiaxy +; wmmx_tmrc +; wmmx_tmrrc +; wmmx_tmovmsk +; wmmx_torc +; wmmx_torvsc +; wmmx_wabs +; wmmx_wdiff +; wmmx_wacc +; wmmx_wadd +; wmmx_waddbhus +; wmmx_waddsubhx +; wmmx_waligni +; wmmx_walignr +; wmmx_wand +; wmmx_wandn +; wmmx_wavg2 +; wmmx_wavg4 +; wmmx_wcmpeq +; wmmx_wcmpgt +; wmmx_wmac +; wmmx_wmadd +; wmmx_wmax +; wmmx_wmerge +; wmmx_wmiawxy +; wmmx_wmiaxy +; wmmx_wmin +; wmmx_wmov +; wmmx_wmul +; wmmx_wmulw +; wmmx_wldr +; wmmx_wor +; wmmx_wpack +; wmmx_wqmiaxy +; wmmx_wqmulm +; wmmx_wqmulwm +; wmmx_wror +; wmmx_wsad +; wmmx_wshufh +; wmmx_wsll +; wmmx_wsra +; wmmx_wsrl +; wmmx_wstr +; wmmx_wsub +; wmmx_wsubaddhx +; wmmx_wunpckeh +; wmmx_wunpckel +; wmmx_wunpckih +; wmmx_wunpckil +; wmmx_wxor (define_attr "type" - "simple_alu_imm,\ - alu_reg,\ - simple_alu_shift,\ - alu_shift,\ - alu_shift_reg,\ - mult,\ + "arlo_imm,\ + arlo_reg,\ + arlo_shift,\ + arlo_shift_reg,\ block,\ - float,\ + branch,\ + call,\ + clz,\ + crc,\ + extend,\ + f_2_r,\ + f_cvt,\ + f_flag,\ + f_loadd,\ + f_loads,\ + f_minmaxd,\ + f_minmaxs,\ + f_rintd,\ + f_rints,\ + f_seld,\ + f_sels,\ + f_stored,\ + f_stores,\ + faddd,\ + fadds,\ + fcmpd,\ + fcmps,\ + fconstd,\ + fconsts,\ + fcpys,\ fdivd,\ fdivs,\ + ffarithd,\ + ffariths,\ + ffmad,\ + ffmas,\ + float,\ + fmacd,\ + fmacs,\ + fmuld,\ fmuls,\ - fmuld,\ - fmacs,\ - fmacd,\ - ffmas,\ - ffmad,\ - f_rints,\ - f_rintd,\ - f_minmaxs,\ - f_minmaxd,\ - f_flag,\ - f_loads,\ - f_loadd,\ - f_stores,\ - f_stored,\ - f_2_r,\ - r_2_f,\ - f_cvt,\ - branch,\ - call,\ load_byte,\ load1,\ load2,\ load3,\ load4,\ + mla,\ + mlas,\ + mov_imm,\ + mov_reg,\ + mov_shift,\ + mov_shift_reg,\ + mul,\ + muls,\ + mvn_imm,\ + mvn_reg,\ + mvn_shift,\ + mvn_shift_reg,\ + r_2_f,\ + sdiv,\ + shift,\ + shift_reg,\ + smlad,\ + smladx,\ + smlal,\ + smlald,\ + smlals,\ + smlalxy,\ + smlawx,\ + smlawy,\ + smlaxy,\ + smlsd,\ + smlsdx,\ + smlsld,\ + smmla,\ + smmul,\ + smmulr,\ + smuad,\ + smuadx,\ + smull,\ + smulls,\ + smulwy,\ + smulxy,\ + smusd,\ + smusdx,\ store1,\ store2,\ store3,\ store4,\ - fconsts,\ - fconstd,\ - fadds,\ - faddd,\ - ffariths,\ - ffarithd,\ - fcmps,\ - fcmpd,\ - fcpys" - (if_then_else - (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,\ - umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") - (const_string "mult") - (const_string "alu_reg"))) + udiv,\ + umaal,\ + umlal,\ + umlals,\ + umull,\ + umulls,\ + wmmx_tandc,\ + wmmx_tbcst,\ + wmmx_textrc,\ + wmmx_textrm,\ + wmmx_tinsr,\ + wmmx_tmcr,\ + wmmx_tmcrr,\ + wmmx_tmia,\ + wmmx_tmiaph,\ + wmmx_tmiaxy,\ + wmmx_tmrc,\ + wmmx_tmrrc,\ + wmmx_tmovmsk,\ + wmmx_torc,\ + wmmx_torvsc,\ + wmmx_wabs,\ + wmmx_wabsdiff,\ + wmmx_wacc,\ + wmmx_wadd,\ + wmmx_waddbhus,\ + wmmx_waddsubhx,\ + wmmx_waligni,\ + wmmx_walignr,\ + wmmx_wand,\ + wmmx_wandn,\ + wmmx_wavg2,\ + wmmx_wavg4,\ + wmmx_wcmpeq,\ + wmmx_wcmpgt,\ + wmmx_wmac,\ + wmmx_wmadd,\ + wmmx_wmax,\ + wmmx_wmerge,\ + wmmx_wmiawxy,\ + wmmx_wmiaxy,\ + wmmx_wmin,\ + wmmx_wmov,\ + wmmx_wmul,\ + wmmx_wmulw,\ + wmmx_wldr,\ + wmmx_wor,\ + wmmx_wpack,\ + wmmx_wqmiaxy,\ + wmmx_wqmulm,\ + wmmx_wqmulwm,\ + wmmx_wror,\ + wmmx_wsad,\ + wmmx_wshufh,\ + wmmx_wsll,\ + wmmx_wsra,\ + wmmx_wsrl,\ + wmmx_wstr,\ + wmmx_wsub,\ + wmmx_wsubaddhx,\ + wmmx_wunpckeh,\ + wmmx_wunpckel,\ + wmmx_wunpckih,\ + wmmx_wunpckil,\ + wmmx_wxor" + (const_string "arlo_reg")) +; Is this an (integer side) multiply with a 32-bit (or smaller) result? +(define_attr "mul32" "no,yes" + (if_then_else + (eq_attr "type" + "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\ + smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld") + (const_string "yes") + (const_string "no"))) + ; Is this an (integer side) multiply with a 64-bit result? (define_attr "mul64" "no,yes" (if_then_else - (eq_attr "insn" - "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") + (eq_attr "type" + "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") (const_string "yes") (const_string "no"))) -; wtype for WMMX insn scheduling purposes. -(define_attr "wtype" - "none,wor,wxor,wand,wandn,wmov,tmcrr,tmrrc,wldr,wstr,tmcr,tmrc,wadd,wsub,wmul,wmac,wavg2,tinsr,textrm,wshufh,wcmpeq,wcmpgt,wmax,wmin,wpack,wunpckih,wunpckil,wunpckeh,wunpckel,wror,wsra,wsrl,wsll,wmadd,tmia,tmiaph,tmiaxy,tbcst,tmovmsk,wacc,waligni,walignr,tandc,textrc,torc,torvsc,wsad,wabs,wabsdiff,waddsubhx,wsubaddhx,wavg4,wmulw,wqmulm,wqmulwm,waddbhus,wqmiaxy,wmiaxy,wmiawxy,wmerge" (const_string "none")) - ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) @@ -402,6 +629,13 @@ neon_mrrc,\ neon_ldm_2,\ neon_stm_2,\ + neon_crypto_aes,\ + neon_crypto_sha1_xor,\ + neon_crypto_sha1_fast,\ + neon_crypto_sha1_slow,\ + neon_crypto_sha256_fast,\ + neon_crypto_sha256_slow,\ + neon_mul_d_long,\ none" (const_string "none")) @@ -458,9 +692,19 @@ ; than one on the main cpu execution unit. (define_attr "core_cycles" "single,multi" (if_then_else (eq_attr "type" - "simple_alu_imm,alu_reg,\ - simple_alu_shift,alu_shift,\ - float,fdivd,fdivs") + "arlo_imm, arlo_reg,\ + extend, shift, arlo_shift, float, fdivd, fdivs,\ + wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\ + wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\ + wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\ + wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\ + wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\ + wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\ + wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\ + wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\ + wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\ + wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\ + wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge") (const_string "single") (const_string "multi"))) @@ -502,7 +746,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4,marvell_pj4") + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexa53,cortexm4,marvell_pj4") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -510,7 +754,7 @@ (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4,marvell_pj4") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -531,6 +775,7 @@ (include "cortex-a8.md") (include "cortex-a9.md") (include "cortex-a15.md") +(include "cortex-a53.md") (include "cortex-r4.md") (include "cortex-r4f.md") (include "cortex-m4.md") @@ -697,14 +942,17 @@ ;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will ;; put the duplicated register first, and not try the commutative version. (define_insn_and_split "*arm_addsi3" - [(set (match_operand:SI 0 "s_register_operand" "=rk, r,k, r,r, k, r, k,k,r, k, r") - (plus:SI (match_operand:SI 1 "s_register_operand" "%0, rk,k, r,rk,k, rk,k,r,rk,k, rk") - (match_operand:SI 2 "reg_or_int_operand" "rk, rI,rI,k,Pj,Pj,L, L,L,PJ,PJ,?n")))] + [(set (match_operand:SI 0 "s_register_operand" "=rk,l,l ,l ,r ,k ,r,r ,k ,r ,k,k,r ,k ,r") + (plus:SI (match_operand:SI 1 "s_register_operand" "%0 ,l,0 ,l ,rk,k ,r,rk,k ,rk,k,r,rk,k ,rk") + (match_operand:SI 2 "reg_or_int_operand" "rk ,l,Py,Pd,rI,rI,k,Pj,Pj,L ,L,L,PJ,PJ,?n")))] "TARGET_32BIT" "@ add%?\\t%0, %0, %2 add%?\\t%0, %1, %2 add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 add%?\\t%0, %2, %1 addw%?\\t%0, %1, %2 addw%?\\t%0, %1, %2 @@ -725,12 +973,13 @@ operands[1], 0); DONE; " - [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,16") + [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,4,4,4,16") (set_attr "predicable" "yes") - (set_attr "arch" "t2,*,*,*,t2,t2,*,*,a,t2,t2,*") + (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "arch" "t2,t2,t2,t2,*,*,*,t2,t2,*,*,a,t2,t2,*") (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "simple_alu_imm") - (const_string "alu_reg"))) + (const_string "arlo_imm") + (const_string "arlo_reg"))) ] ) @@ -811,7 +1060,7 @@ sub%.\\t%0, %1, #%n2 add%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm, simple_alu_imm, *")] + (set_attr "type" "arlo_imm,arlo_imm,*")] ) (define_insn "*addsi3_compare0_scratch" @@ -827,7 +1076,7 @@ cmn%?\\t%0, %1" [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm, simple_alu_imm, *") + (set_attr "type" "arlo_imm,arlo_imm,*") ] ) @@ -834,17 +1083,20 @@ (define_insn "*compare_negsi_si" [(set (reg:CC_Z CC_REGNUM) (compare:CC_Z - (neg:SI (match_operand:SI 0 "s_register_operand" "r")) - (match_operand:SI 1 "s_register_operand" "r")))] + (neg:SI (match_operand:SI 0 "s_register_operand" "l,r")) + (match_operand:SI 1 "s_register_operand" "l,r")))] "TARGET_32BIT" "cmn%?\\t%1, %0" [(set_attr "conds" "set") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*") + (set_attr "length" "2,4") + (set_attr "predicable_short_it" "yes,no")] ) ;; This is the canonicalization of addsi3_compare0_for_combiner when the ;; addend is a constant. -(define_insn "*cmpsi2_addneg" +(define_insn "cmpsi2_addneg" [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 1 "s_register_operand" "r,r") @@ -914,7 +1166,7 @@ sub%.\\t%0, %1, #%n2 add%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] + (set_attr "type" "arlo_imm,arlo_imm,*")] ) (define_insn "*addsi3_compare_op2" @@ -931,63 +1183,84 @@ add%.\\t%0, %1, %2 sub%.\\t%0, %1, #%n2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] + (set_attr "type" "arlo_imm,arlo_imm,*")] ) (define_insn "*compare_addsi2_op0" [(set (reg:CC_C CC_REGNUM) - (compare:CC_C - (plus:SI (match_operand:SI 0 "s_register_operand" "r,r,r") - (match_operand:SI 1 "arm_add_operand" "I,L,r")) - (match_dup 0)))] + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r") + (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r")) + (match_dup 0)))] "TARGET_32BIT" "@ + cmp%?\\t%0, #%n1 cmn%?\\t%0, %1 + cmn%?\\t%0, %1 cmp%?\\t%0, #%n1 cmn%?\\t%0, %1" [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] + (set_attr "arch" "t2,t2,*,*,*") + (set_attr "predicable_short_it" "yes,yes,no,no,no") + (set_attr "length" "2,2,4,4,4") + (set_attr "type" "arlo_imm,*,arlo_imm,arlo_imm,*")] ) (define_insn "*compare_addsi2_op1" [(set (reg:CC_C CC_REGNUM) - (compare:CC_C - (plus:SI (match_operand:SI 0 "s_register_operand" "r,r,r") - (match_operand:SI 1 "arm_add_operand" "I,L,r")) - (match_dup 1)))] + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r") + (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r")) + (match_dup 1)))] "TARGET_32BIT" "@ + cmp%?\\t%0, #%n1 cmn%?\\t%0, %1 + cmn%?\\t%0, %1 cmp%?\\t%0, #%n1 cmn%?\\t%0, %1" [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] -) + (set_attr "arch" "t2,t2,*,*,*") + (set_attr "predicable_short_it" "yes,yes,no,no,no") + (set_attr "length" "2,2,4,4,4") + (set_attr "type" + "arlo_imm,*,arlo_imm,arlo_imm,*")] + ) (define_insn "*addsi3_carryin_" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r") - (match_operand:SI 2 "arm_not_operand" "rI,K")) - (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r") + (match_operand:SI 2 "arm_not_operand" "0,rI,K")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] "TARGET_32BIT" "@ adc%?\\t%0, %1, %2 + adc%?\\t%0, %1, %2 sbc%?\\t%0, %1, #%B2" - [(set_attr "conds" "use")] + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,no,no")] ) (define_insn "*addsi3_carryin_alt2_" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) - (match_operand:SI 1 "s_register_operand" "%r,r")) - (match_operand:SI 2 "arm_rhs_operand" "rI,K")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") + (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "s_register_operand" "%l,r,r")) + (match_operand:SI 2 "arm_rhs_operand" "l,rI,K")))] "TARGET_32BIT" "@ adc%?\\t%0, %1, %2 + adc%?\\t%0, %1, %2 sbc%?\\t%0, %1, #%B2" - [(set_attr "conds" "use")] + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,no,no")] ) (define_insn "*addsi3_carryin_shift_" @@ -1001,9 +1274,11 @@ "TARGET_32BIT" "adc%?\\t%0, %1, %3%S2" [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "arlo_shift") + (const_string "arlo_shift_reg")))] ) (define_insn "*addsi3_carryin_clobercc_" @@ -1017,26 +1292,89 @@ [(set_attr "conds" "set")] ) -(define_expand "incscc" +(define_insn "*subsi3_carryin" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (plus:SI (match_operator:SI 2 "arm_comparison_operator" - [(match_operand:CC 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "s_register_operand" "0,?r")))] + (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I") + (match_operand:SI 2 "s_register_operand" "r,r")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] "TARGET_32BIT" - "" + "@ + sbc%?\\t%0, %1, %2 + rsc%?\\t%0, %2, %1" + [(set_attr "conds" "use") + (set_attr "arch" "*,a") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) -(define_insn "*arm_incscc" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (plus:SI (match_operator:SI 2 "arm_comparison_operator" - [(match_operand:CC 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "s_register_operand" "0,?r")))] +(define_insn "*subsi3_carryin_const" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r") + (match_operand:SI 2 "arm_not_operand" "K")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbc\\t%0, %1, #%B2" + [(set_attr "conds" "use")] +) + +(define_insn "*subsi3_carryin_compare" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r"))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI (match_dup 1) + (match_dup 2)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbcs\\t%0, %1, %2" + [(set_attr "conds" "set")] +) + +(define_insn "*subsi3_carryin_compare_const" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r") + (match_operand:SI 2 "arm_not_operand" "K"))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (plus:SI (match_dup 1) + (match_dup 2)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbcs\\t%0, %1, #%B2" + [(set_attr "conds" "set")] +) + +(define_insn "*subsi3_carryin_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")])) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbc%?\\t%0, %1, %3%S2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "arlo_shift") + (const_string "arlo_shift_reg")))] +) + +(define_insn "*rsbsi3_carryin_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")]) + (match_operand:SI 1 "s_register_operand" "r")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] "TARGET_ARM" - "@ - add%d2\\t%0, %1, #1 - mov%D2\\t%0, %1\;add%d2\\t%0, %1, #1" + "rsc%?\\t%0, %1, %3%S2" [(set_attr "conds" "use") - (set_attr "length" "4,8")] + (set_attr "predicable" "yes") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "arlo_shift") + (const_string "arlo_shift_reg")))] ) ; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant. @@ -1087,13 +1425,27 @@ " ) -(define_insn "*arm_subdi3" +(define_insn_and_split "*arm_subdi3" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r") (minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0") (match_operand:DI 2 "s_register_operand" "r,0,0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT && !TARGET_NEON" - "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2" + "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) @@ -1108,7 +1460,7 @@ [(set_attr "length" "4")] ) -(define_insn "*subdi_di_zesidi" +(define_insn_and_split "*subdi_di_zesidi" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") (zero_extend:DI @@ -1115,12 +1467,25 @@ (match_operand:SI 2 "s_register_operand" "r,r")))) (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT" - "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0" + "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = GEN_INT (~0); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) -(define_insn "*subdi_di_sesidi" +(define_insn_and_split "*subdi_di_sesidi" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") (sign_extend:DI @@ -1127,12 +1492,26 @@ (match_operand:SI 2 "s_register_operand" "r,r")))) (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT" - "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31" + "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) + (ashiftrt:SI (match_dup 2) + (const_int 31))) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) -(define_insn "*subdi_zesidi_di" +(define_insn_and_split "*subdi_zesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (minus:DI (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")) @@ -1139,12 +1518,26 @@ (match_operand:DI 1 "s_register_operand" "0,r"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0" + "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0" + ; is equivalent to: + ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))]) + (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) -(define_insn "*subdi_sesidi_di" +(define_insn_and_split "*subdi_sesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (minus:DI (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")) @@ -1151,12 +1544,29 @@ (match_operand:DI 1 "s_register_operand" "0,r"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31" + "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31" + ; is equivalent to: + ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))]) + (set (match_dup 3) (minus:SI (minus:SI + (ashiftrt:SI (match_dup 2) + (const_int 31)) + (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) -(define_insn "*subdi_zesidi_zesidi" +(define_insn_and_split "*subdi_zesidi_zesidi" [(set (match_operand:DI 0 "s_register_operand" "=r") (minus:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r")) @@ -1164,7 +1574,17 @@ (match_operand:SI 2 "s_register_operand" "r")))) (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT" - "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1" + "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) @@ -1201,12 +1621,16 @@ ; ??? Check Thumb-2 split length (define_insn_and_split "*arm_subsi3_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,rk,r") - (minus:SI (match_operand:SI 1 "reg_or_int_operand" "rI,r,r,k,?n") - (match_operand:SI 2 "reg_or_int_operand" "r,I,r,r, r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n") + (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))] "TARGET_32BIT" "@ + sub%?\\t%0, %1, %2 + sub%?\\t%0, %2 + sub%?\\t%0, %1, %2 rsb%?\\t%0, %2, %1 + rsb%?\\t%0, %2, %1 sub%?\\t%0, %1, %2 sub%?\\t%0, %1, %2 sub%?\\t%0, %1, %2 @@ -1219,9 +1643,11 @@ INTVAL (operands[1]), operands[0], operands[2], 0); DONE; " - [(set_attr "length" "4,4,4,4,16") + [(set_attr "length" "4,4,4,4,4,4,4,4,16") + (set_attr "arch" "t2,t2,t2,t2,*,*,*,*,*") (set_attr "predicable" "yes") - (set_attr "type" "*,simple_alu_imm,*,*,*")] + (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no") + (set_attr "type" "*,*,*,*,arlo_imm,arlo_imm,*,*,arlo_imm")] ) (define_peephole2 @@ -1251,10 +1677,10 @@ sub%.\\t%0, %1, %2 rsb%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,*,*")] + (set_attr "type" "arlo_imm,*,*")] ) -(define_insn "*subsi3_compare" +(define_insn "subsi3_compare" [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I") (match_operand:SI 2 "arm_rhs_operand" "I,r,r"))) @@ -1266,32 +1692,9 @@ sub%.\\t%0, %1, %2 rsb%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,*,*")] + (set_attr "type" "arlo_imm,*,*")] ) -(define_expand "decscc" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") - (match_operator:SI 2 "arm_comparison_operator" - [(match_operand 3 "cc_register" "") (const_int 0)])))] - "TARGET_32BIT" - "" -) - -(define_insn "*arm_decscc" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") - (match_operator:SI 2 "arm_comparison_operator" - [(match_operand 3 "cc_register" "") (const_int 0)])))] - "TARGET_ARM" - "@ - sub%d2\\t%0, %1, #1 - mov%D2\\t%0, %1\;sub%d2\\t%0, %1, #1" - [(set_attr "conds" "use") - (set_attr "length" "*,8") - (set_attr "type" "simple_alu_imm,*")] -) - (define_expand "subsf3" [(set (match_operand:SF 0 "s_register_operand" "") (minus:SF (match_operand:SF 1 "s_register_operand" "") @@ -1311,6 +1714,20 @@ ;; Multiplication insns +(define_expand "mulhi3" + [(set (match_operand:HI 0 "s_register_operand" "") + (mult:HI (match_operand:HI 1 "s_register_operand" "") + (match_operand:HI 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY" + " + { + rtx result = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], gen_lowpart (HImode, result)); + DONE; + }" +) + (define_expand "mulsi3" [(set (match_operand:SI 0 "s_register_operand" "") (mult:SI (match_operand:SI 2 "s_register_operand" "") @@ -1326,18 +1743,21 @@ (match_operand:SI 1 "s_register_operand" "%0,r")))] "TARGET_32BIT && !arm_arch6" "mul%?\\t%0, %2, %1" - [(set_attr "insn" "mul") + [(set_attr "type" "mul") (set_attr "predicable" "yes")] ) (define_insn "*arm_mulsi3_v6" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (mult:SI (match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r") + (match_operand:SI 2 "s_register_operand" "l,0,r")))] "TARGET_32BIT && arm_arch6" "mul%?\\t%0, %1, %2" - [(set_attr "insn" "mul") - (set_attr "predicable" "yes")] + [(set_attr "type" "mul") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,yes,no")] ) ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands @@ -1357,7 +1777,7 @@ return \"mul\\t%0, %2\"; " [(set_attr "length" "4,4,2") - (set_attr "insn" "mul")] + (set_attr "type" "muls")] ) (define_insn "*thumb_mulsi3_v6" @@ -1370,7 +1790,7 @@ mul\\t%0, %1 mul\\t%0, %1" [(set_attr "length" "2") - (set_attr "insn" "mul")] + (set_attr "type" "muls")] ) (define_insn "*mulsi3_compare0" @@ -1384,7 +1804,7 @@ "TARGET_ARM && !arm_arch6" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi3_compare0_v6" @@ -1398,7 +1818,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi_compare0_scratch" @@ -1411,7 +1831,7 @@ "TARGET_ARM && !arm_arch6" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) (define_insn "*mulsi_compare0_scratch_v6" @@ -1424,7 +1844,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mul%.\\t%0, %2, %1" [(set_attr "conds" "set") - (set_attr "insn" "muls")] + (set_attr "type" "muls")] ) ;; Unnamed templates to match MLA instruction. @@ -1437,7 +1857,7 @@ (match_operand:SI 3 "s_register_operand" "r,r,0,0")))] "TARGET_32BIT && !arm_arch6" "mla%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") + [(set_attr "type" "mla") (set_attr "predicable" "yes")] ) @@ -1449,8 +1869,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_32BIT && arm_arch6" "mla%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") - (set_attr "predicable" "yes")] + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulsi3addsi_compare0" @@ -1467,7 +1888,7 @@ "TARGET_ARM && arm_arch6" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_v6" @@ -1484,7 +1905,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_scratch" @@ -1499,7 +1920,7 @@ "TARGET_ARM && !arm_arch6" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3addsi_compare0_scratch_v6" @@ -1514,7 +1935,7 @@ "TARGET_ARM && arm_arch6 && optimize_size" "mla%.\\t%0, %2, %1, %3" [(set_attr "conds" "set") - (set_attr "insn" "mlas")] + (set_attr "type" "mlas")] ) (define_insn "*mulsi3subsi" @@ -1525,8 +1946,9 @@ (match_operand:SI 1 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch_thumb2" "mls%?\\t%0, %2, %1, %3" - [(set_attr "insn" "mla") - (set_attr "predicable" "yes")] + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "maddsidi4" @@ -1548,7 +1970,7 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "smlal") + [(set_attr "type" "smlal") (set_attr "predicable" "yes")] ) @@ -1561,8 +1983,9 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch6" "smlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "smlal") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ;; 32x32->64 widening multiply. @@ -1587,7 +2010,7 @@ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smull") + [(set_attr "type" "smull") (set_attr "predicable" "yes")] ) @@ -1598,8 +2021,9 @@ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch6" "smull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smull") - (set_attr "predicable" "yes")] + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umulsidi3" @@ -1618,7 +2042,7 @@ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "umull") + [(set_attr "type" "umull") (set_attr "predicable" "yes")] ) @@ -1629,8 +2053,9 @@ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] "TARGET_32BIT && arm_arch6" "umull%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "umull") - (set_attr "predicable" "yes")] + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umaddsidi4" @@ -1652,7 +2077,7 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "umlal") + [(set_attr "type" "umlal") (set_attr "predicable" "yes")] ) @@ -1665,8 +2090,9 @@ (match_operand:DI 1 "s_register_operand" "0")))] "TARGET_32BIT && arm_arch6" "umlal%?\\t%Q0, %R0, %3, %2" - [(set_attr "insn" "umlal") - (set_attr "predicable" "yes")] + [(set_attr "type" "umlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "smulsi3_highpart" @@ -1694,7 +2120,7 @@ (clobber (match_scratch:SI 3 "=&r,&r"))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "smull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "smull") + [(set_attr "type" "smull") (set_attr "predicable" "yes")] ) @@ -1709,8 +2135,9 @@ (clobber (match_scratch:SI 3 "=r"))] "TARGET_32BIT && arm_arch6" "smull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "smull") - (set_attr "predicable" "yes")] + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "umulsi3_highpart" @@ -1738,7 +2165,7 @@ (clobber (match_scratch:SI 3 "=&r,&r"))] "TARGET_32BIT && arm_arch3m && !arm_arch6" "umull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "umull") + [(set_attr "type" "umull") (set_attr "predicable" "yes")] ) @@ -1753,8 +2180,9 @@ (clobber (match_scratch:SI 3 "=r"))] "TARGET_32BIT && arm_arch6" "umull%?\\t%3, %0, %2, %1" - [(set_attr "insn" "umull") - (set_attr "predicable" "yes")] + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "mulhisi3" @@ -1765,7 +2193,7 @@ (match_operand:HI 2 "s_register_operand" "r"))))] "TARGET_DSP_MULTIPLY" "smulbb%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") + [(set_attr "type" "smulxy") (set_attr "predicable" "yes")] ) @@ -1778,8 +2206,9 @@ (match_operand:HI 2 "s_register_operand" "r"))))] "TARGET_DSP_MULTIPLY" "smultb%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulhisi3bt" @@ -1791,8 +2220,9 @@ (const_int 16))))] "TARGET_DSP_MULTIPLY" "smulbt%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*mulhisi3tt" @@ -1805,8 +2235,9 @@ (const_int 16))))] "TARGET_DSP_MULTIPLY" "smultt%?\\t%0, %1, %2" - [(set_attr "insn" "smulxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "maddhisi4" @@ -1818,8 +2249,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlabb%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ;; Note: there is no maddhisi4ibt because this one is canonical form @@ -1833,8 +2265,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlatb%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*maddhisi4tt" @@ -1848,8 +2281,9 @@ (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_DSP_MULTIPLY" "smlatt%?\\t%0, %1, %2, %3" - [(set_attr "insn" "smlaxy") - (set_attr "predicable" "yes")] + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "maddhidi4" @@ -1856,14 +2290,15 @@ [(set (match_operand:DI 0 "s_register_operand" "=r") (plus:DI (mult:DI (sign_extend:DI - (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:HI 1 "s_register_operand" "r")) (sign_extend:DI (match_operand:HI 2 "s_register_operand" "r"))) (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlalbb%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) ;; Note: there is no maddhidi4ibt because this one is canonical form (define_insn "*maddhidi4tb" @@ -1878,8 +2313,9 @@ (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlaltb%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*maddhidi4tt" [(set (match_operand:DI 0 "s_register_operand" "=r") @@ -1895,8 +2331,9 @@ (match_operand:DI 3 "s_register_operand" "0")))] "TARGET_DSP_MULTIPLY" "smlaltt%?\\t%Q0, %R0, %1, %2" - [(set_attr "insn" "smlalxy") - (set_attr "predicable" "yes")]) + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_expand "mulsf3" [(set (match_operand:SF 0 "s_register_operand" "") @@ -2024,13 +2461,49 @@ "" ) -(define_insn "*anddi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (and:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8")] +(define_insn_and_split "*anddi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vand\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vand", &operands[2], + DImode, 1, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: /* fall through */ + return "#"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (AND, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (AND, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*, + avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "length" "*,*,8,8,8,8,*,*") + ] ) (define_insn_and_split "*anddi_zesidi_di" @@ -2145,12 +2618,13 @@ ; ??? Check split length for Thumb-2 (define_insn_and_split "*arm_andsi3_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") - (and:SI (match_operand:SI 1 "s_register_operand" "r,r,r,r") - (match_operand:SI 2 "reg_or_int_operand" "I,K,r,?n")))] + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r") + (and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))] "TARGET_32BIT" "@ and%?\\t%0, %1, %2 + and%?\\t%0, %1, %2 bic%?\\t%0, %1, #%B2 and%?\\t%0, %1, %2 #" @@ -2164,9 +2638,11 @@ INTVAL (operands[2]), operands[0], operands[1], 0); DONE; " - [(set_attr "length" "4,4,4,16") + [(set_attr "length" "4,4,4,4,16") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*,simple_alu_imm")] + (set_attr "predicable_short_it" "no,yes,no,no,no") + (set_attr "type" + "arlo_imm,arlo_imm,*,*,arlo_imm")] ) (define_insn "*thumb1_andsi3_insn" @@ -2176,7 +2652,7 @@ "TARGET_THUMB1" "and\\t%0, %2" [(set_attr "length" "2") - (set_attr "type" "simple_alu_imm") + (set_attr "type" "arlo_imm") (set_attr "conds" "set")]) (define_insn "*andsi3_compare0" @@ -2193,7 +2669,7 @@ bic%.\\t%0, %1, #%B2 and%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] + (set_attr "type" "arlo_imm,arlo_imm,*")] ) (define_insn "*andsi3_compare0_scratch" @@ -2209,7 +2685,7 @@ bic%.\\t%2, %0, #%B1 tst%?\\t%0, %1" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*")] + (set_attr "type" "arlo_imm,arlo_imm,*")] ) (define_insn "*zeroextractsi_compare0_scratch" @@ -2216,7 +2692,7 @@ [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (zero_extract:SI (match_operand:SI 0 "s_register_operand" "r") - (match_operand 1 "const_int_operand" "n") + (match_operand 1 "const_int_operand" "n") (match_operand 2 "const_int_operand" "n")) (const_int 0)))] "TARGET_32BIT @@ -2232,7 +2708,8 @@ " [(set_attr "conds" "set") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "arlo_imm")] ) (define_insn_and_split "*ne_zeroextractsi" @@ -2659,7 +3136,8 @@ "arm_arch_thumb2" "bfc%?\t%0, %2, %1" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "insv_t2" @@ -2670,7 +3148,8 @@ "arm_arch_thumb2" "bfi%?\t%0, %3, %2, %1" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ; constants for op 2 will never be given to these patterns. @@ -2697,7 +3176,7 @@ [(set_attr "length" "8") (set_attr "predicable" "yes")] ) - + (define_insn_and_split "*anddi_notzesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (zero_extend:DI @@ -2722,9 +3201,10 @@ operands[1] = gen_lowpart (SImode, operands[1]); }" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) - + (define_insn_and_split "*anddi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (sign_extend:DI @@ -2745,9 +3225,10 @@ operands[1] = gen_lowpart (SImode, operands[1]); }" [(set_attr "length" "8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) - + (define_insn "andsi_notsi_si" [(set (match_operand:SI 0 "s_register_operand" "=r") (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) @@ -2754,7 +3235,8 @@ (match_operand:SI 1 "s_register_operand" "r")))] "TARGET_32BIT" "bic%?\\t%0, %1, %2" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "thumb1_bicsi3" @@ -2777,8 +3259,8 @@ [(set_attr "predicable" "yes") (set_attr "shift" "2") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "arlo_shift") + (const_string "arlo_shift_reg")))] ) (define_insn "*andsi_notsi_si_compare0" @@ -2814,14 +3296,47 @@ "" ) -(define_insn "*iordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*iordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))] + "TARGET_32BIT && !TARGET_IWMMXT" + { + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vorr\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vorr", &operands[2], + DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: + return "#"; + default: gcc_unreachable (); + } + } + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (IOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (IOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1") + (set_attr "length" "*,*,8,8,8,8,*,*") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] ) (define_insn "*iordi_zesidi_di" @@ -2834,7 +3349,8 @@ orr%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*iordi_sesidi_di" @@ -2879,12 +3395,13 @@ ) (define_insn_and_split "*iorsi3_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") - (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r,r,r") - (match_operand:SI 2 "reg_or_int_operand" "I,K,r,?n")))] + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r") + (ior:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))] "TARGET_32BIT" "@ orr%?\\t%0, %1, %2 + orr%?\\t%0, %1, %2 orn%?\\t%0, %1, #%B2 orr%?\\t%0, %1, %2 #" @@ -2894,14 +3411,15 @@ || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))" [(clobber (const_int 0))] { - arm_split_constant (IOR, SImode, curr_insn, + arm_split_constant (IOR, SImode, curr_insn, INTVAL (operands[2]), operands[0], operands[1], 0); DONE; } - [(set_attr "length" "4,4,4,16") - (set_attr "arch" "32,t2,32,32") + [(set_attr "length" "4,4,4,4,16") + (set_attr "arch" "32,t2,t2,32,32") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm,simple_alu_imm,*,*")] + (set_attr "predicable_short_it" "no,yes,no,no,no") + (set_attr "type" "arlo_imm,*,arlo_imm,*,*")] ) (define_insn "*thumb1_iorsi3_insn" @@ -2936,7 +3454,7 @@ "TARGET_32BIT" "orr%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,*")] + (set_attr "type" "arlo_imm,*")] ) (define_insn "*iorsi3_compare0_scratch" @@ -2948,25 +3466,55 @@ "TARGET_32BIT" "orr%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm, *")] + (set_attr "type" "arlo_imm,*")] ) (define_expand "xordi3" [(set (match_operand:DI 0 "s_register_operand" "") (xor:DI (match_operand:DI 1 "s_register_operand" "") - (match_operand:DI 2 "s_register_operand" "")))] + (match_operand:DI 2 "arm_xordi_operand" "")))] "TARGET_32BIT" "" ) -(define_insn "*xordi3_insn" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r") - (match_operand:DI 2 "s_register_operand" "r,r")))] - "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON" - "#" - [(set_attr "length" "8") - (set_attr "predicable" "yes")] +(define_insn_and_split "*xordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w") + (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w") + (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 1: + case 2: + case 3: + case 4: /* fall through */ + return "#"; + case 0: /* fall through */ + case 5: return "veor\t%P0, %P1, %P2"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (XOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (XOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "length" "*,8,8,8,8,*") + (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1") + (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")] ) (define_insn "*xordi_zesidi_di" @@ -2979,7 +3527,8 @@ eor%?\\t%Q0, %Q1, %2 #" [(set_attr "length" "4,8") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*xordi_sesidi_di" @@ -3022,13 +3571,14 @@ ) (define_insn_and_split "*arm_xorsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") - (xor:SI (match_operand:SI 1 "s_register_operand" "%r,r,r") - (match_operand:SI 2 "reg_or_int_operand" "I,r,?n")))] + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r") + (xor:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,r,?n")))] "TARGET_32BIT" "@ eor%?\\t%0, %1, %2 eor%?\\t%0, %1, %2 + eor%?\\t%0, %1, %2 #" "TARGET_32BIT && CONST_INT_P (operands[2]) @@ -3039,9 +3589,10 @@ INTVAL (operands[2]), operands[0], operands[1], 0); DONE; } - [(set_attr "length" "4,4,16") + [(set_attr "length" "4,4,4,16") (set_attr "predicable" "yes") - (set_attr "type" "simple_alu_imm,*,*")] + (set_attr "predicable_short_it" "no,yes,no,no") + (set_attr "type" "arlo_imm,*,*,*")] ) (define_insn "*thumb1_xorsi3_insn" @@ -3052,7 +3603,7 @@ "eor\\t%0, %2" [(set_attr "length" "2") (set_attr "conds" "set") - (set_attr "type" "simple_alu_imm")] + (set_attr "type" "arlo_imm")] ) (define_insn "*xorsi3_compare0" @@ -3065,7 +3616,7 @@ "TARGET_32BIT" "eor%.\\t%0, %1, %2" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,*")] + (set_attr "type" "arlo_imm,*")] ) (define_insn "*xorsi3_compare0_scratch" @@ -3076,7 +3627,7 @@ "TARGET_32BIT" "teq%?\\t%0, %1" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm, *")] + (set_attr "type" "arlo_imm,*")] ) ; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), @@ -3096,16 +3647,21 @@ "" ) -(define_insn "*andsi_iorsi3_notsi" +(define_insn_and_split "*andsi_iorsi3_notsi" [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") (and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")) (not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))] "TARGET_32BIT" - "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3" + "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3" + "&& reload_completed" + [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))] + "" [(set_attr "length" "8") (set_attr "ce_count" "2") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) ; ??? Are these four splitters still beneficial when the Thumb-2 bitfield @@ -3241,7 +3797,8 @@ (const_int 0)))] "TARGET_32BIT" "bic%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "*smax_m1" @@ -3250,18 +3807,27 @@ (const_int -1)))] "TARGET_32BIT" "orr%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) -(define_insn "*arm_smax_insn" +(define_insn_and_split "*arm_smax_insn" [(set (match_operand:SI 0 "s_register_operand" "=r,r") (smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%1, %2\;movlt\\t%0, %2 - cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" + "#" + ; cmp\\t%1, %2\;movlt\\t%0, %2 + ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" [(set_attr "conds" "clob") (set_attr "length" "8,12")] ) @@ -3290,18 +3856,27 @@ (const_int 0)))] "TARGET_32BIT" "and%?\\t%0, %1, %1, asr #31" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) -(define_insn "*arm_smin_insn" +(define_insn_and_split "*arm_smin_insn" [(set (match_operand:SI 0 "s_register_operand" "=r,r") (smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%1, %2\;movge\\t%0, %2 - cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" + "#" + ; cmp\\t%1, %2\;movge\\t%0, %2 + ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" [(set_attr "conds" "clob") (set_attr "length" "8,12")] ) @@ -3316,16 +3891,24 @@ "" ) -(define_insn "*arm_umaxsi3" +(define_insn_and_split "*arm_umaxsi3" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%1, %2\;movcc\\t%0, %2 - cmp\\t%1, %2\;movcs\\t%0, %1 - cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2" + "#" + ; cmp\\t%1, %2\;movcc\\t%0, %2 + ; cmp\\t%1, %2\;movcs\\t%0, %1 + ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" [(set_attr "conds" "clob") (set_attr "length" "8,8,12")] ) @@ -3340,16 +3923,24 @@ "" ) -(define_insn "*arm_uminsi3" +(define_insn_and_split "*arm_uminsi3" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%1, %2\;movcs\\t%0, %2 - cmp\\t%1, %2\;movcc\\t%0, %1 - cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2" + "#" + ; cmp\\t%1, %2\;movcs\\t%0, %2 + ; cmp\\t%1, %2\;movcc\\t%0, %1 + ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" [(set_attr "conds" "clob") (set_attr "length" "8,8,12")] ) @@ -3360,7 +3951,7 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT" + "TARGET_32BIT && optimize_insn_for_size_p()" "* operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]); @@ -3389,7 +3980,7 @@ (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) (match_operand:SI 1 "s_register_operand" "0,?r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT && !arm_eliminable_register (operands[1])" + "TARGET_32BIT && !arm_eliminable_register (operands[1]) && !arm_restrict_it" "* { enum rtx_code code = GET_CODE (operands[4]); @@ -3423,6 +4014,54 @@ (const_int 12)))] ) +; Reject the frame pointer in operand[1], since reloading this after +; it has been eliminated can cause carnage. +(define_insn_and_split "*minmax_arithsi_non_canon" + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") + (minus:SI + (match_operand:SI 1 "s_register_operand" "0,?Ts") + (match_operator:SI 4 "minmax_operator" + [(match_operand:SI 2 "s_register_operand" "Ts,Ts") + (match_operand:SI 3 "arm_rhs_operand" "TsI,TsI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !arm_eliminable_register (operands[1]) + && !(arm_restrict_it && CONST_INT_P (operands[3]))" + "#" + "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 3))) + + (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)]) + (set (match_dup 0) + (minus:SI (match_dup 1) + (match_dup 2)))) + (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)]) + (set (match_dup 0) + (match_dup 6)))] + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = minmax_code (operands[4]); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, + operands[2], operands[3]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]); + if (CONST_INT_P (operands[3])) + operands[6] = plus_constant (SImode, operands[1], -INTVAL (operands[3])); + else + operands[6] = gen_rtx_MINUS (SImode, operands[1], operands[3]); + } + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12)))] +) + (define_code_iterator SAT [smin smax]) (define_code_iterator SATrev [smin smax]) (define_code_attr SATlo [(smin "1") (smax "2")]) @@ -3449,7 +4088,8 @@ return "usat%?\t%0, %1, %3"; } [(set_attr "predicable" "yes") - (set_attr "insn" "sat")]) + (set_attr "predicable_short_it" "no")] +) (define_insn "*satsi__shift" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -3474,9 +4114,9 @@ return "usat%?\t%0, %1, %4%S3"; } [(set_attr "predicable" "yes") - (set_attr "insn" "sat") + (set_attr "predicable_short_it" "no") (set_attr "shift" "3") - (set_attr "type" "alu_shift")]) + (set_attr "type" "arlo_shift")]) ;; Shift and rotation insns @@ -3566,6 +4206,7 @@ "TARGET_THUMB1" "lsl\\t%0, %1, %2" [(set_attr "length" "2") + (set_attr "type" "shift,shift_reg") (set_attr "conds" "set")]) (define_expand "ashrdi3" @@ -3623,7 +4264,6 @@ "TARGET_32BIT" "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx" [(set_attr "conds" "clob") - (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -3646,6 +4286,7 @@ "TARGET_THUMB1" "asr\\t%0, %1, %2" [(set_attr "length" "2") + (set_attr "type" "shift,shift_reg") (set_attr "conds" "set")]) (define_expand "lshrdi3" @@ -3703,7 +4344,6 @@ "TARGET_32BIT" "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx" [(set_attr "conds" "clob") - (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -3729,6 +4369,7 @@ "TARGET_THUMB1" "lsr\\t%0, %1, %2" [(set_attr "length" "2") + (set_attr "type" "shift,shift_reg") (set_attr "conds" "set")]) (define_expand "rotlsi3" @@ -3774,51 +4415,52 @@ (match_operand:SI 2 "register_operand" "l")))] "TARGET_THUMB1" "ror\\t%0, %0, %2" - [(set_attr "length" "2")] + [(set_attr "type" "shift_reg") + (set_attr "length" "2")] ) (define_insn "*arm_shiftsi3" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "reg_or_int_operand" "rM")]))] + [(match_operand:SI 1 "s_register_operand" "0,r,r") + (match_operand:SI 2 "reg_or_int_operand" "l,M,r")]))] "TARGET_32BIT" "* return arm_output_shift(operands, 0);" [(set_attr "predicable" "yes") + (set_attr "arch" "t2,*,*") + (set_attr "predicable_short_it" "yes,no,no") + (set_attr "length" "4") (set_attr "shift" "1") - (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (set_attr "type" "arlo_shift_reg,arlo_shift,arlo_shift_reg")] ) (define_insn "*shiftsi3_compare0" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "arm_rhs_operand" "rM")]) + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_rhs_operand" "M,r")]) (const_int 0))) - (set (match_operand:SI 0 "s_register_operand" "=r") + (set (match_operand:SI 0 "s_register_operand" "=r,r") (match_op_dup 3 [(match_dup 1) (match_dup 2)]))] "TARGET_32BIT" "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") (set_attr "shift" "1") - (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (set_attr "type" "arlo_shift,arlo_shift_reg")] ) (define_insn "*shiftsi3_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) (compare:CC_NOOV (match_operator:SI 3 "shift_operator" - [(match_operand:SI 1 "s_register_operand" "r") - (match_operand:SI 2 "arm_rhs_operand" "rM")]) + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_rhs_operand" "M,r")]) (const_int 0))) - (clobber (match_scratch:SI 0 "=r"))] + (clobber (match_scratch:SI 0 "=r,r"))] "TARGET_32BIT" "* return arm_output_shift(operands, 1);" [(set_attr "conds" "set") - (set_attr "shift" "1")] + (set_attr "shift" "1") + (set_attr "type" "shift,shift_reg")] ) (define_insn "*not_shiftsi" @@ -3829,10 +4471,10 @@ "TARGET_32BIT" "mvn%?\\t%0, %1%S3" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "shift" "1") - (set_attr "insn" "mvn") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "mvn_shift,mvn_shift_reg")]) (define_insn "*not_shiftsi_compare0" [(set (reg:CC_NOOV CC_REGNUM) @@ -3847,9 +4489,8 @@ "mvn%.\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "insn" "mvn") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "mvn_shift,mvn_shift_reg")]) (define_insn "*not_shiftsi_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -3863,9 +4504,8 @@ "mvn%.\\t%0, %1%S3" [(set_attr "conds" "set") (set_attr "shift" "1") - (set_attr "insn" "mvn") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "mvn_shift,mvn_shift_reg")]) ;; We don't really have extzv, but defining this using shifts helps ;; to reduce register pressure later on. @@ -4042,6 +4682,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load1")]) (define_insn "unaligned_loadhis" @@ -4054,6 +4695,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load_byte")]) (define_insn "unaligned_loadhiu" @@ -4066,6 +4708,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "load_byte")]) (define_insn "unaligned_storesi" @@ -4077,6 +4720,7 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "store1")]) (define_insn "unaligned_storehi" @@ -4088,8 +4732,67 @@ [(set_attr "arch" "t2,any") (set_attr "length" "2,4") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") (set_attr "type" "store1")]) +;; Unaligned double-word load and store. +;; Split after reload into two unaligned single-word accesses. +;; It prevents lower_subreg from splitting some other aligned +;; double-word accesses too early. Used for internal memcpy. + +(define_insn_and_split "unaligned_loaddi" + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")] + UNSPEC_UNALIGNED_LOAD))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* If the first destination register overlaps with the base address, + swap the order in which the loads are emitted. */ + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + rtx tmp = operands[1]; + operands[1] = operands[3]; + operands[3] = tmp; + tmp = operands[0]; + operands[0] = operands[2]; + operands[2] = tmp; + } + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "load2")]) + +(define_insn_and_split "unaligned_storedi" + [(set (match_operand:DI 0 "memory_operand" "=o,o") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "store2")]) + + (define_insn "*extv_reg" [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") @@ -4098,7 +4801,8 @@ "arm_arch_thumb2" "sbfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_insn "extzv_t2" @@ -4109,7 +4813,8 @@ "arm_arch_thumb2" "ubfx%?\t%0, %1, %3, %2" [(set_attr "length" "4") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) @@ -4121,7 +4826,8 @@ "TARGET_IDIV" "sdiv%?\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "insn" "sdiv")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "sdiv")] ) (define_insn "udivsi3" @@ -4131,7 +4837,8 @@ "TARGET_IDIV" "udiv%?\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "insn" "udiv")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "udiv")] ) @@ -4154,12 +4861,24 @@ ;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1). ;; The first alternative allows the common case of a *full* overlap. -(define_insn "*arm_negdi2" +(define_insn_and_split "*arm_negdi2" [(set (match_operand:DI 0 "s_register_operand" "=r,&r") (neg:DI (match_operand:DI 1 "s_register_operand" "0,r"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0" + "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "clob") (set_attr "length" "8")] ) @@ -4181,11 +4900,14 @@ ) (define_insn "*arm_negsi2" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (neg:SI (match_operand:SI 1 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))] "TARGET_32BIT" "rsb%?\\t%0, %1, #0" - [(set_attr "predicable" "yes")] + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4")] ) (define_insn "*thumb1_negsi2" @@ -4227,14 +4949,67 @@ operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); ") -(define_insn "*arm_abssi2" +(define_insn_and_split "*arm_abssi2" [(set (match_operand:SI 0 "s_register_operand" "=r,&r") (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%0, #0\;rsblt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" + "#" + "&& reload_completed" + [(const_int 0)] + { + /* if (which_alternative == 0) */ + if (REGNO(operands[0]) == REGNO(operands[1])) + { + /* Emit the pattern: + cmp\\t%0, #0\;rsblt\\t%0, %0, #0 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (const_int 0))) + (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))] + */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_LT (SImode, + gen_rtx_REG (CCmode, CC_REGNUM), + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + DONE; + } + else + { + /* Emit the pattern: + alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31 + [(set (match_dup 0) + (xor:SI (match_dup 1) + (ashiftrt:SI (match_dup 1) (const_int 31)))) + (set (match_dup 0) + (minus:SI (match_dup 0) + (ashiftrt:SI (match_dup 1) (const_int 31))))] + */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31))))); + DONE; + } + } [(set_attr "conds" "clob,*") (set_attr "shift" "1") (set_attr "predicable" "no, yes") @@ -4255,14 +5030,56 @@ [(set_attr "length" "6")] ) -(define_insn "*arm_neg_abssi2" +(define_insn_and_split "*arm_neg_abssi2" [(set (match_operand:SI 0 "s_register_operand" "=r,&r") (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "@ - cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 - eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" + "#" + "&& reload_completed" + [(const_int 0)] + { + /* if (which_alternative == 0) */ + if (REGNO (operands[0]) == REGNO (operands[1])) + { + /* Emit the pattern: + cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 + */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_GT (SImode, + gen_rtx_REG (CCmode, CC_REGNUM), + const0_rtx), + gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1]))))); + } + else + { + /* Emit the pattern: + eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31 + */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[0]))); + } + DONE; + } [(set_attr "conds" "clob,*") (set_attr "shift" "1") (set_attr "predicable" "no, yes") @@ -4330,7 +5147,7 @@ [(set_attr "length" "*,8,8,*") (set_attr "predicable" "no,yes,yes,no") (set_attr "neon_type" "neon_int_1,*,*,neon_int_1") - (set_attr "arch" "neon_nota8,*,*,neon_onlya8")] + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] ) (define_expand "one_cmplsi2" @@ -4341,12 +5158,15 @@ ) (define_insn "*arm_one_cmplsi2" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (not:SI (match_operand:SI 1 "s_register_operand" "r")))] + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (not:SI (match_operand:SI 1 "s_register_operand" "l,r")))] "TARGET_32BIT" "mvn%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "insn" "mvn")] + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4") + (set_attr "type" "mvn_reg")] ) (define_insn "*thumb1_one_cmplsi2" @@ -4355,7 +5175,7 @@ "TARGET_THUMB1" "mvn\\t%0, %1" [(set_attr "length" "2") - (set_attr "insn" "mvn")] + (set_attr "type" "mvn_reg")] ) (define_insn "*notsi_compare0" @@ -4367,7 +5187,7 @@ "TARGET_32BIT" "mvn%.\\t%0, %1" [(set_attr "conds" "set") - (set_attr "insn" "mvn")] + (set_attr "type" "mvn_reg")] ) (define_insn "*notsi_compare0_scratch" @@ -4378,7 +5198,7 @@ "TARGET_32BIT" "mvn%.\\t%0, %1" [(set_attr "conds" "set") - (set_attr "insn" "mvn")] + (set_attr "type" "mvn_reg")] ) ;; Fixed <--> Floating conversion insns @@ -4498,7 +5318,7 @@ "TARGET_32BIT " "#" [(set_attr "length" "8,4,8,8") - (set_attr "arch" "neon_nota8,*,*,neon_onlya8") + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits") (set_attr "ce_count" "2") (set_attr "predicable" "yes")] ) @@ -4513,7 +5333,7 @@ (set_attr "ce_count" "2") (set_attr "shift" "1") (set_attr "predicable" "yes") - (set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")] + (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")] ) ;; Splits for all extensions to DImode @@ -4639,7 +5459,7 @@ [(if_then_else (eq_attr "is_arch6" "yes") (const_int 2) (const_int 4)) (const_int 4)]) - (set_attr "type" "simple_alu_shift, load_byte")] + (set_attr "type" "extend,load_byte")] ) (define_insn "*arm_zero_extendhisi2" @@ -4649,7 +5469,7 @@ "@ # ldr%(h%)\\t%0, %1" - [(set_attr "type" "alu_shift,load_byte") + [(set_attr "type" "arlo_shift,load_byte") (set_attr "predicable" "yes")] ) @@ -4661,7 +5481,7 @@ uxth%?\\t%0, %1 ldr%(h%)\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "type" "simple_alu_shift,load_byte")] + (set_attr "type" "extend,load_byte")] ) (define_insn "*arm_zero_extendhisi2addsi" @@ -4670,8 +5490,9 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "uxtah%?\\t%0, %2, %1" - [(set_attr "type" "alu_shift") - (set_attr "predicable" "yes")] + [(set_attr "type" "arlo_shift") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "zero_extendqisi2" @@ -4719,7 +5540,7 @@ # ldrb\\t%0, %1" [(set_attr "length" "4,2") - (set_attr "type" "alu_shift,load_byte") + (set_attr "type" "arlo_shift,load_byte") (set_attr "pool_range" "*,32")] ) @@ -4731,7 +5552,7 @@ uxtb\\t%0, %1 ldrb\\t%0, %1" [(set_attr "length" "2") - (set_attr "type" "simple_alu_shift,load_byte")] + (set_attr "type" "extend,load_byte")] ) (define_insn "*arm_zero_extendqisi2" @@ -4742,7 +5563,7 @@ # ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" [(set_attr "length" "8,4") - (set_attr "type" "alu_shift,load_byte") + (set_attr "type" "arlo_shift,load_byte") (set_attr "predicable" "yes")] ) @@ -4753,7 +5574,7 @@ "@ uxtb%(%)\\t%0, %1 ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes")] ) @@ -4764,8 +5585,8 @@ "TARGET_INT_SIMD" "uxtab%?\\t%0, %2, %1" [(set_attr "predicable" "yes") - (set_attr "insn" "xtab") - (set_attr "type" "alu_shift")] + (set_attr "predicable_short_it" "no") + (set_attr "type" "arlo_shift")] ) (define_split @@ -4816,7 +5637,8 @@ "TARGET_32BIT" "tst%?\\t%0, #255" [(set_attr "conds" "set") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_expand "extendhisi2" @@ -4927,7 +5749,7 @@ [(if_then_else (eq_attr "is_arch6" "yes") (const_int 2) (const_int 4)) (const_int 4)]) - (set_attr "type" "simple_alu_shift,load_byte") + (set_attr "type" "extend,load_byte") (set_attr "pool_range" "*,1018")] ) @@ -4986,7 +5808,7 @@ # ldr%(sh%)\\t%0, %1" [(set_attr "length" "8,4") - (set_attr "type" "alu_shift,load_byte") + (set_attr "type" "arlo_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5000,8 +5822,9 @@ "@ sxth%?\\t%0, %1 ldr%(sh%)\\t%0, %1" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] ) @@ -5086,7 +5909,7 @@ # ldr%(sb%)\\t%0, %1" [(set_attr "length" "8,4") - (set_attr "type" "alu_shift,load_byte") + (set_attr "type" "arlo_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5100,7 +5923,7 @@ "@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" - [(set_attr "type" "simple_alu_shift,load_byte") + [(set_attr "type" "extend,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5112,9 +5935,9 @@ (match_operand:SI 2 "s_register_operand" "r")))] "TARGET_INT_SIMD" "sxtab%?\\t%0, %2, %1" - [(set_attr "type" "alu_shift") - (set_attr "insn" "xtab") - (set_attr "predicable" "yes")] + [(set_attr "type" "arlo_shift") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] ) (define_split @@ -5213,7 +6036,7 @@ (const_int 2) (if_then_else (eq_attr "is_arch6" "yes") (const_int 4) (const_int 6))]) - (set_attr "type" "simple_alu_shift,load_byte,load_byte")] + (set_attr "type" "extend,load_byte,load_byte")] ) (define_expand "extendsfdf2" @@ -5313,8 +6136,8 @@ ) (define_insn "*arm_movdi" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m") - (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r"))] + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))] "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_VFP) && !TARGET_IWMMXT @@ -5472,8 +6295,7 @@ } }" [(set_attr "length" "4,4,6,2,2,6,4,4") - (set_attr "type" "*,*,*,load2,store2,load2,store2,*") - (set_attr "insn" "*,mov,*,*,*,*,*,mov") + (set_attr "type" "*,mov_reg,*,load2,store2,load2,store2,mov_reg") (set_attr "pool_range" "*,*,*,*,*,1018,*,*")] ) @@ -5570,6 +6392,7 @@ "arm_arch_thumb2" "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") (set_attr "length" "4")] ) @@ -5587,8 +6410,7 @@ movw%?\\t%0, %1 ldr%?\\t%0, %1 str%?\\t%1, %0" - [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,simple_alu_imm,load1,store1") - (set_attr "insn" "mov,mov,mvn,mov,*,*") + [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1") (set_attr "predicable" "yes") (set_attr "pool_range" "*,*,*,*,4096,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*")] @@ -5890,7 +6712,7 @@ cmp%?\\t%0, #0 sub%.\\t%0, %1, #0" [(set_attr "conds" "set") - (set_attr "type" "simple_alu_imm,simple_alu_imm")] + (set_attr "type" "arlo_imm,arlo_imm")] ) ;; Subroutine to store a half word from a register into memory. @@ -6304,14 +7126,13 @@ str%(h%)\\t%1, %0\\t%@ movhi ldr%(h%)\\t%0, %1\\t%@ movhi" [(set_attr "predicable" "yes") - (set_attr "insn" "mov,mvn,*,*") (set_attr "pool_range" "*,*,*,256") (set_attr "neg_pool_range" "*,*,*,244") (set_attr_alternative "type" [(if_then_else (match_operand 1 "const_int_operand" "") - (const_string "simple_alu_imm" ) - (const_string "*")) - (const_string "simple_alu_imm") + (const_string "mov_imm" ) + (const_string "mov_reg")) + (const_string "mvn_imm") (const_string "store1") (const_string "load1")])] ) @@ -6325,8 +7146,7 @@ mov%?\\t%0, %1\\t%@ movhi mvn%?\\t%0, #%B1\\t%@ movhi" [(set_attr "predicable" "yes") - (set_attr "insn" "mov, mov,mvn") - (set_attr "type" "simple_alu_imm,*,simple_alu_imm")] + (set_attr "type" "mov_imm,mov_reg,mvn_imm")] ) (define_expand "thumb_movhi_clobber" @@ -6449,10 +7269,9 @@ " ) - (define_insn "*arm_movqi_insn" - [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,Uu,r,m") - (match_operand:QI 1 "general_operand" "r,I,K,Uu,l,m,r"))] + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m") + (match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))] "TARGET_32BIT && ( register_operand (operands[0], QImode) || register_operand (operands[1], QImode))" @@ -6459,16 +7278,18 @@ "@ mov%?\\t%0, %1 mov%?\\t%0, %1 + mov%?\\t%0, %1 + mov%?\\t%0, %1 mvn%?\\t%0, #%B1 ldr%(b%)\\t%0, %1 str%(b%)\\t%1, %0 ldr%(b%)\\t%0, %1 str%(b%)\\t%1, %0" - [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1") - (set_attr "insn" "mov,mov,mvn,*,*,*,*") + [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load1,store1,load1,store1") (set_attr "predicable" "yes") - (set_attr "arch" "any,any,any,t2,t2,any,any") - (set_attr "length" "4,4,4,2,2,4,4")] + (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no") + (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any") + (set_attr "length" "2,4,4,2,4,2,2,4,4")] ) (define_insn "*thumb1_movqi_insn" @@ -6485,8 +7306,7 @@ mov\\t%0, %1 mov\\t%0, %1" [(set_attr "length" "2") - (set_attr "type" "simple_alu_imm,load1,store1,*,*,simple_alu_imm") - (set_attr "insn" "*,*,*,mov,mov,mov") + (set_attr "type" "arlo_imm,load1,store1,mov_reg,mov_imm,mov_imm") (set_attr "pool_range" "*,32,*,*,*,*") (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) @@ -6515,7 +7335,7 @@ (define_insn "*arm32_movhf" [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") (match_operand:HF 1 "general_operand" " m,r,r,F"))] - "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) && !arm_restrict_it && ( s_register_operand (operands[0], HFmode) || s_register_operand (operands[1], HFmode))" "* @@ -6551,8 +7371,7 @@ } " [(set_attr "conds" "unconditional") - (set_attr "type" "load1,store1,*,*") - (set_attr "insn" "*,*,mov,mov") + (set_attr "type" "load1,store1,mov_reg,mov_reg") (set_attr "length" "4,4,4,8") (set_attr "predicable" "yes")] ) @@ -6587,8 +7406,7 @@ } " [(set_attr "length" "2") - (set_attr "type" "*,load1,store1,*,*") - (set_attr "insn" "mov,*,*,mov,mov") + (set_attr "type" "mov_reg,load1,store1,mov_reg,mov_reg") (set_attr "pool_range" "*,1018,*,*,*") (set_attr "conds" "clob,nocond,nocond,nocond,nocond")]) @@ -6642,8 +7460,8 @@ ldr%?\\t%0, %1\\t%@ float str%?\\t%1, %0\\t%@ float" [(set_attr "predicable" "yes") - (set_attr "type" "*,load1,store1") - (set_attr "insn" "mov,*,*") + (set_attr "predicable_short_it" "no") + (set_attr "type" "mov_reg,load1,store1") (set_attr "arm_pool_range" "*,4096,*") (set_attr "thumb2_pool_range" "*,4094,*") (set_attr "arm_neg_pool_range" "*,4084,*") @@ -6666,9 +7484,8 @@ mov\\t%0, %1 mov\\t%0, %1" [(set_attr "length" "2") - (set_attr "type" "*,load1,store1,load1,store1,*,*") + (set_attr "type" "*,load1,store1,load1,store1,mov_reg,mov_reg") (set_attr "pool_range" "*,*,*,1018,*,*,*") - (set_attr "insn" "*,*,*,*,*,mov,mov") (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")] ) @@ -6738,8 +7555,8 @@ ) (define_insn "*movdf_soft_insn" - [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m") - (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))] + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m") + (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))] "TARGET_32BIT && TARGET_SOFT_FLOAT && ( register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode))" @@ -6799,8 +7616,7 @@ } " [(set_attr "length" "4,2,2,6,4,4") - (set_attr "type" "*,load2,store2,load2,store2,*") - (set_attr "insn" "*,*,*,*,*,mov") + (set_attr "type" "*,load2,store2,load2,store2,mov_reg") (set_attr "pool_range" "*,*,*,1018,*,*")] ) @@ -6869,10 +7685,18 @@ (match_operand:BLK 1 "general_operand" "") (match_operand:SI 2 "const_int_operand" "") (match_operand:SI 3 "const_int_operand" "")] - "TARGET_EITHER" + "" " if (TARGET_32BIT) { + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + if (gen_movmem_ldrd_strd (operands)) + DONE; + FAIL; + } + if (arm_gen_movmemqi (operands)) DONE; FAIL; @@ -7568,7 +8392,7 @@ (set_attr "arch" "t2,t2,any,any") (set_attr "length" "2,2,4,4") (set_attr "predicable" "yes") - (set_attr "type" "*,*,*,simple_alu_imm")] + (set_attr "type" "*,*,*,arlo_imm")] ) (define_insn "*cmpsi_shiftsi" @@ -7582,7 +8406,7 @@ [(set_attr "conds" "set") (set_attr "shift" "1") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*cmpsi_shiftsi_swp" [(set (reg:CC_SWP CC_REGNUM) @@ -7595,7 +8419,7 @@ [(set_attr "conds" "set") (set_attr "shift" "1") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*arm_cmpsi_negshiftsi_si" [(set (reg:CC_Z CC_REGNUM) @@ -7608,8 +8432,8 @@ "cmn%?\\t%0, %2%S1" [(set_attr "conds" "set") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg"))) + (const_string "arlo_shift") + (const_string "arlo_shift_reg"))) (set_attr "predicable" "yes")] ) @@ -7617,25 +8441,69 @@ ;; if-conversion can not reduce to a conditional compare, so we do ;; that directly. -(define_insn "*arm_cmpdi_insn" +(define_insn_and_split "*arm_cmpdi_insn" [(set (reg:CC_NCV CC_REGNUM) (compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r") (match_operand:DI 1 "arm_di_operand" "rDi"))) (clobber (match_scratch:SI 2 "=r"))] "TARGET_32BIT" - "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1" + "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (match_dup 1))) + (parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 3) (match_dup 4))) + (set (match_dup 2) + (minus:SI (match_dup 5) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + if (CONST_INT_P (operands[1])) + { + operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode, + DImode, + operands[1]))); + operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]); + } + else + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]); + } + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + } [(set_attr "conds" "set") (set_attr "length" "8")] ) -(define_insn "*arm_cmpdi_unsigned" +(define_insn_and_split "*arm_cmpdi_unsigned" [(set (reg:CC_CZ CC_REGNUM) - (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "r") - (match_operand:DI 1 "arm_di_operand" "rDi")))] + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r") + (match_operand:DI 1 "arm_di_operand" "Py,r,rDi")))] + "TARGET_32BIT" - "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" + "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 3))) + (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (match_dup 1))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + if (CONST_INT_P (operands[1])) + operands[3] = gen_highpart_mode (SImode, DImode, operands[1]); + else + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } [(set_attr "conds" "set") - (set_attr "length" "8")] + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "arch" "t2,t2,*") + (set_attr "length" "6,6,8")] ) (define_insn "*arm_cmpdi_zero" @@ -7758,36 +8626,56 @@ operands[3] = const0_rtx;" ) -(define_insn "*mov_scc" +(define_insn_and_split "*mov_scc" [(set (match_operand:SI 0 "s_register_operand" "=r") (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)]))] "TARGET_ARM" - "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (const_int 1) + (const_int 0)))] + "" [(set_attr "conds" "use") - (set_attr "insn" "mov") (set_attr "length" "8")] ) -(define_insn "*mov_negscc" +(define_insn_and_split "*mov_negscc" [(set (match_operand:SI 0 "s_register_operand" "=r") (neg:SI (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])))] "TARGET_ARM" - "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + } [(set_attr "conds" "use") - (set_attr "insn" "mov") (set_attr "length" "8")] ) -(define_insn "*mov_notscc" +(define_insn_and_split "*mov_notscc" [(set (match_operand:SI 0 "s_register_operand" "=r") (not:SI (match_operator:SI 1 "arm_comparison_operator" [(match_operand 2 "cc_register" "") (const_int 0)])))] "TARGET_ARM" - "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (match_dup 4)))] + { + operands[3] = GEN_INT (~1); + operands[4] = GEN_INT (~0); + } [(set_attr "conds" "use") - (set_attr "insn" "mov") (set_attr "length" "8")] ) @@ -8069,7 +8957,7 @@ (define_expand "movsfcc" [(set (match_operand:SF 0 "s_register_operand" "") - (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "") + (if_then_else:SF (match_operand 1 "arm_cond_move_operator" "") (match_operand:SF 2 "s_register_operand" "") (match_operand:SF 3 "s_register_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT" @@ -8091,7 +8979,7 @@ (define_expand "movdfcc" [(set (match_operand:DF 0 "s_register_operand" "") - (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "") + (if_then_else:DF (match_operand 1 "arm_cond_move_operator" "") (match_operand:DF 2 "s_register_operand" "") (match_operand:DF 3 "s_register_operand" "")))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" @@ -8110,7 +8998,40 @@ }" ) -(define_insn "*movsicc_insn" +(define_insn "*cmov" + [(set (match_operand:SDF 0 "s_register_operand" "=") + (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SDF 3 "s_register_operand" + "") + (match_operand:SDF 4 "s_register_operand" + "")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " + "* + { + enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]); + switch (code) + { + case ARM_GE: + case ARM_GT: + case ARM_EQ: + case ARM_VS: + return \"vsel%d1.\\t%0, %3, %4\"; + case ARM_LT: + case ARM_LE: + case ARM_NE: + case ARM_VC: + return \"vsel%D1.\\t%0, %4, %3\"; + default: + gcc_unreachable (); + } + return \"\"; + }" + [(set_attr "conds" "use") + (set_attr "type" "f_sel")] +) + +(define_insn_and_split "*movsicc_insn" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") (if_then_else:SI (match_operator 3 "arm_comparison_operator" @@ -8123,26 +9044,60 @@ mvn%D3\\t%0, #%B2 mov%d3\\t%0, %1 mvn%d3\\t%0, #%B1 - mov%d3\\t%0, %1\;mov%D3\\t%0, %2 - mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 - mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 - mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + # + # + # + #" + ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + "&& reload_completed" + [(const_int 0)] + { + enum rtx_code rev_code; + enum machine_mode mode; + rtx rev_cond; + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + operands[3], + gen_rtx_SET (VOIDmode, + operands[0], + operands[1]))); + + rev_code = GET_CODE (operands[3]); + mode = GET_MODE (operands[4]); + if (mode == CCFPmode || mode == CCFPEmode) + rev_code = reverse_condition_maybe_unordered (rev_code); + else + rev_code = reverse_condition (rev_code); + + rev_cond = gen_rtx_fmt_ee (rev_code, + VOIDmode, + operands[4], + const0_rtx); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + rev_cond, + gen_rtx_SET (VOIDmode, + operands[0], + operands[2]))); + DONE; + } [(set_attr "length" "4,4,4,4,8,8,8,8") (set_attr "conds" "use") - (set_attr "insn" "mov,mvn,mov,mvn,mov,mov,mvn,mvn") (set_attr_alternative "type" [(if_then_else (match_operand 2 "const_int_operand" "") - (const_string "simple_alu_imm") - (const_string "*")) - (const_string "simple_alu_imm") + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") (if_then_else (match_operand 1 "const_int_operand" "") - (const_string "simple_alu_imm") - (const_string "*")) - (const_string "simple_alu_imm") - (const_string "*") - (const_string "*") - (const_string "*") - (const_string "*")])] + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") + (const_string "mov_reg") + (const_string "mov_reg") + (const_string "mov_reg") + (const_string "mov_reg")])] ) (define_insn "*movsfcc_soft_insn" @@ -8156,7 +9111,7 @@ mov%D3\\t%0, %2 mov%d3\\t%0, %1" [(set_attr "conds" "use") - (set_attr "insn" "mov")] + (set_attr "type" "mov_reg")] ) @@ -8255,7 +9210,7 @@ (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && arm_arch5" + "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)" "blx%?\\t%0" [(set_attr "type" "call")] ) @@ -8265,7 +9220,7 @@ (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && !arm_arch5" + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" "* return output_call (operands); " @@ -8284,7 +9239,7 @@ (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && !arm_arch5" + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" "* return output_call_mem (operands); " @@ -8297,7 +9252,7 @@ (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_THUMB1 && arm_arch5" + "TARGET_THUMB1 && arm_arch5 && !SIBLING_CALL_P (insn)" "blx\\t%0" [(set_attr "length" "2") (set_attr "type" "call")] @@ -8308,7 +9263,7 @@ (match_operand 1 "" "")) (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_THUMB1 && !arm_arch5" + "TARGET_THUMB1 && !arm_arch5 && !SIBLING_CALL_P (insn)" "* { if (!TARGET_CALLER_INTERWORKING) @@ -8367,7 +9322,7 @@ (match_operand 2 "" ""))) (use (match_operand 3 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && arm_arch5" + "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)" "blx%?\\t%1" [(set_attr "type" "call")] ) @@ -8378,7 +9333,7 @@ (match_operand 2 "" ""))) (use (match_operand 3 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && !arm_arch5" + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" "* return output_call (&operands[1]); " @@ -8394,7 +9349,8 @@ (match_operand 2 "" ""))) (use (match_operand 3 "" "")) (clobber (reg:SI LR_REGNUM))] - "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))" + "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0))) + && !SIBLING_CALL_P (insn)" "* return output_call_mem (&operands[1]); " @@ -8444,6 +9400,7 @@ (use (match_operand 2 "" "")) (clobber (reg:SI LR_REGNUM))] "TARGET_32BIT + && !SIBLING_CALL_P (insn) && (GET_CODE (operands[0]) == SYMBOL_REF) && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" "* @@ -8460,6 +9417,7 @@ (use (match_operand 3 "" "")) (clobber (reg:SI LR_REGNUM))] "TARGET_32BIT + && !SIBLING_CALL_P (insn) && (GET_CODE (operands[1]) == SYMBOL_REF) && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" "* @@ -8505,6 +9463,10 @@ "TARGET_32BIT" " { + if (!REG_P (XEXP (operands[0], 0)) + && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) + XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0)); + if (operands[2] == NULL_RTX) operands[2] = const0_rtx; }" @@ -8519,6 +9481,10 @@ "TARGET_32BIT" " { + if (!REG_P (XEXP (operands[1], 0)) && + (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF)) + XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0)); + if (operands[3] == NULL_RTX) operands[3] = const0_rtx; }" @@ -8525,13 +9491,21 @@ ) (define_insn "*sibcall_insn" - [(call (mem:SI (match_operand:SI 0 "" "X")) + [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs, US")) (match_operand 1 "" "")) (return) (use (match_operand 2 "" ""))] - "TARGET_32BIT && GET_CODE (operands[0]) == SYMBOL_REF" + "TARGET_32BIT && SIBLING_CALL_P (insn)" "* - return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\"; + if (which_alternative == 1) + return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\"; + else + { + if (arm_arch5 || arm_arch4t) + return \"bx%?\\t%0\\t%@ indirect register sibling call\"; + else + return \"mov%?\\t%|pc, %0\\t%@ indirect register sibling call\"; + } " [(set_attr "type" "call")] ) @@ -8538,28 +9512,36 @@ (define_insn "*sibcall_value_insn" [(set (match_operand 0 "" "") - (call (mem:SI (match_operand:SI 1 "" "X")) + (call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,US")) (match_operand 2 "" ""))) (return) (use (match_operand 3 "" ""))] - "TARGET_32BIT && GET_CODE (operands[1]) == SYMBOL_REF" + "TARGET_32BIT && SIBLING_CALL_P (insn)" "* - return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\"; + if (which_alternative == 1) + return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\"; + else + { + if (arm_arch5 || arm_arch4t) + return \"bx%?\\t%1\"; + else + return \"mov%?\\t%|pc, %1\\t@ indirect sibling call \"; + } " [(set_attr "type" "call")] ) -(define_expand "return" - [(return)] +(define_expand "return" + [(returns)] "(TARGET_ARM || (TARGET_THUMB2 && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL && !IS_STACKALIGN (arm_current_func_type ()))) - && USE_RETURN_INSN (FALSE)" + " " { if (TARGET_THUMB2) { - thumb2_expand_return (); + thumb2_expand_return (); DONE; } } @@ -8584,13 +9566,13 @@ (set_attr "predicable" "yes")] ) -(define_insn "*cond_return" +(define_insn "*cond_return" [(set (pc) (if_then_else (match_operator 0 "arm_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)]) - (return) + (returns) (pc)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + "TARGET_ARM " "* { if (arm_ccfsm_state == 2) @@ -8598,7 +9580,8 @@ arm_ccfsm_state += 2; return \"\"; } - return output_return_instruction (operands[0], true, false, false); + return output_return_instruction (operands[0], true, false, + ); }" [(set_attr "conds" "use") (set_attr "length" "12") @@ -8605,13 +9588,13 @@ (set_attr "type" "load1")] ) -(define_insn "*cond_return_inverted" +(define_insn "*cond_return_inverted" [(set (pc) (if_then_else (match_operator 0 "arm_comparison_operator" [(match_operand 1 "cc_register" "") (const_int 0)]) (pc) - (return)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" + (returns)))] + "TARGET_ARM " "* { if (arm_ccfsm_state == 2) @@ -8619,7 +9602,8 @@ arm_ccfsm_state += 2; return \"\"; } - return output_return_instruction (operands[0], true, true, false); + return output_return_instruction (operands[0], true, true, + ); }" [(set_attr "conds" "use") (set_attr "length" "12") @@ -8991,7 +9975,7 @@ (if_then_else (match_operand:SI 3 "mult_operator" "") (const_string "no") (const_string "yes"))]) - (set_attr "type" "alu_shift,alu_shift,alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift,arlo_shift,arlo_shift_reg")]) (define_split [(set (match_operand:SI 0 "s_register_operand" "") @@ -9028,7 +10012,7 @@ [(set_attr "conds" "set") (set_attr "shift" "4") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*arith_shiftsi_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -9045,7 +10029,7 @@ [(set_attr "conds" "set") (set_attr "shift" "4") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*sub_shiftsi" [(set (match_operand:SI 0 "s_register_operand" "=r,r") @@ -9058,7 +10042,7 @@ [(set_attr "predicable" "yes") (set_attr "shift" "3") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*sub_shiftsi_compare0" [(set (reg:CC_NOOV CC_REGNUM) @@ -9076,7 +10060,7 @@ [(set_attr "conds" "set") (set_attr "shift" "3") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) (define_insn "*sub_shiftsi_compare0_scratch" [(set (reg:CC_NOOV CC_REGNUM) @@ -9092,30 +10076,67 @@ [(set_attr "conds" "set") (set_attr "shift" "3") (set_attr "arch" "32,a") - (set_attr "type" "alu_shift,alu_shift_reg")]) + (set_attr "type" "arlo_shift,arlo_shift_reg")]) -(define_insn "*and_scc" +(define_insn_and_split "*and_scc" [(set (match_operand:SI 0 "s_register_operand" "=r") (and:SI (match_operator:SI 1 "arm_comparison_operator" - [(match_operand 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 2 "s_register_operand" "r")))] + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "r")))] "TARGET_ARM" - "mov%D1\\t%0, #0\;and%d1\\t%0, %2, #1" + "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1" + "&& reload_completed" + [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0))) + (cond_exec (match_dup 4) (set (match_dup 0) + (and:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + /* Note that operands[4] is the same as operands[1], + but with VOIDmode as the result. */ + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } [(set_attr "conds" "use") - (set_attr "insn" "mov") + (set_attr "type" "mov_reg") (set_attr "length" "8")] ) -(define_insn "*ior_scc" +(define_insn_and_split "*ior_scc" [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (ior:SI (match_operator:SI 2 "arm_comparison_operator" - [(match_operand 3 "cc_register" "") (const_int 0)]) - (match_operand:SI 1 "s_register_operand" "0,?r")))] + (ior:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "0,?r")))] "TARGET_ARM" "@ - orr%d2\\t%0, %1, #1 - mov%D2\\t%0, %1\;orr%d2\\t%0, %1, #1" + orr%d1\\t%0, %3, #1 + #" + "&& reload_completed + && REGNO (operands [0]) != REGNO (operands[3])" + ;; && which_alternative == 1 + ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1 + [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) + (ior:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + /* Note that operands[4] is the same as operands[1], + but with VOIDmode as the result. */ + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } [(set_attr "conds" "use") (set_attr "length" "4,8")] ) @@ -9144,6 +10165,16 @@ (eq:SI (match_operand:SI 1 "s_register_operand" "") (const_int 0))) (clobber (reg:CC CC_REGNUM))] + "arm_arch5 && TARGET_32BIT" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT && reload_completed" [(parallel [(set (reg:CC CC_REGNUM) @@ -9184,7 +10215,7 @@ (set (match_dup 0) (const_int 1)))]) (define_insn_and_split "*compare_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r,r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") (match_operator:SI 1 "arm_comparison_operator" [(match_operand:SI 2 "s_register_operand" "r,r") (match_operand:SI 3 "arm_add_operand" "rI,L")])) @@ -9213,29 +10244,93 @@ ;; Attempt to improve the sequence generated by the compare_scc splitters ;; not to use conditional execution. + +;; Rd = (eq (reg1) (const_int0)) // ARMv5 +;; clz Rd, reg1 +;; lsr Rd, Rd, #5 (define_peephole2 [(set (reg:CC CC_REGNUM) (compare:CC (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +;; Rd = (eq (reg1) (const_int0)) // !ARMv5 +;; negs Rd, reg1 +;; adc Rd, Rd, reg1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (match_scratch:SI 2 "r")] + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] +) + +;; Rd = (eq (reg1) (reg2/imm)) // ARMv5 and optimising for speed. +;; sub Rd, Reg1, reg2 +;; clz Rd, Rd +;; lsr Rd, Rd, #5 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "arm_rhs_operand" ""))) (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) (set (match_operand:SI 0 "register_operand" "") (const_int 0))) (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM) + && !(TARGET_THUMB2 && optimize_insn_for_size_p ())" + [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (clz:SI (match_dup 0))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + + +;; Rd = (eq (reg1) (reg2)) // ! ARMv5 or optimising for size. +;; sub T1, Reg1, reg2 +;; negs Rd, T1 +;; adc Rd, Rd, T1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) (set (match_dup 0) (const_int 1))) (match_scratch:SI 3 "r")] - "TARGET_32BIT" - [(parallel - [(set (reg:CC CC_REGNUM) - (compare:CC (match_dup 1) (match_dup 2))) - (set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))]) + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 3) (match_dup 4)) (parallel [(set (reg:CC CC_REGNUM) (compare:CC (const_int 0) (match_dup 3))) (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))]) - (parallel - [(set (match_dup 0) - (plus:SI (plus:SI (match_dup 0) (match_dup 3)) - (geu:SI (reg:CC CC_REGNUM) (const_int 0)))) - (clobber (reg:CC CC_REGNUM))])]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 0) (match_dup 3)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] + " + if (CONST_INT_P (operands[2])) + operands[4] = plus_constant (SImode, operands[1], -INTVAL (operands[2])); + else + operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[2]); + ") (define_insn "*cond_move" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") @@ -9262,7 +10357,7 @@ return \"\"; " [(set_attr "conds" "use") - (set_attr "insn" "mov") + (set_attr "type" "mov_reg") (set_attr "length" "4,4,8")] ) @@ -9636,7 +10731,7 @@ ) (define_insn_and_split "*ior_scc_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (ior:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_add_operand" "rIL")]) @@ -9674,7 +10769,7 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")])) (const_int 0))) - (set (match_operand:SI 7 "s_register_operand" "=r") + (set (match_operand:SI 7 "s_register_operand" "=Ts") (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] "TARGET_32BIT" @@ -9692,7 +10787,7 @@ (set_attr "length" "16")]) (define_insn_and_split "*and_scc_scc" - [(set (match_operand:SI 0 "s_register_operand" "=r") + [(set (match_operand:SI 0 "s_register_operand" "=Ts") (and:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_add_operand" "rIL")]) @@ -9732,7 +10827,7 @@ [(match_operand:SI 4 "s_register_operand" "r") (match_operand:SI 5 "arm_add_operand" "rIL")])) (const_int 0))) - (set (match_operand:SI 7 "s_register_operand" "=r") + (set (match_operand:SI 7 "s_register_operand" "=Ts") (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] "TARGET_32BIT" @@ -9754,7 +10849,7 @@ ;; need only zero the value if false (if true, then the value is already ;; correct). (define_insn_and_split "*and_scc_scc_nodom" - [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") + [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts") (and:SI (match_operator:SI 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r,r,0") (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")]) @@ -9822,7 +10917,7 @@ "") ;; ??? The conditional patterns above need checking for Thumb-2 usefulness -(define_insn "*negscc" +(define_insn_and_split "*negscc" [(set (match_operand:SI 0 "s_register_operand" "=r") (neg:SI (match_operator 3 "arm_comparison_operator" [(match_operand:SI 1 "s_register_operand" "r") @@ -9829,21 +10924,110 @@ (match_operand:SI 2 "arm_rhs_operand" "rI")]))) (clobber (reg:CC CC_REGNUM))] "TARGET_ARM" - "* - if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) - return \"mov\\t%0, %1, asr #31\"; + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); - if (GET_CODE (operands[3]) == NE) - return \"subs\\t%0, %1, %2\;mvnne\\t%0, #0\"; + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + { + /* Emit mov\\t%0, %1, asr #31 */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)))); + DONE; + } + else if (GET_CODE (operands[3]) == NE) + { + /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */ + if (CONST_INT_P (operands[2])) + emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2], + GEN_INT (- INTVAL (operands[2])))); + else + emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2])); - output_asm_insn (\"cmp\\t%1, %2\", operands); - output_asm_insn (\"mov%D3\\t%0, #0\", operands); - return \"mvn%d3\\t%0, #0\"; - " + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_NE (SImode, + cc_reg, + const0_rtx), + gen_rtx_SET (SImode, + operands[0], + GEN_INT (~0)))); + DONE; + } + else + { + /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */ + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[1], operands[2]))); + enum rtx_code rc = GET_CODE (operands[3]); + + rc = reverse_condition (rc); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + cc_reg, + const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + rc = GET_CODE (operands[3]); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + cc_reg, + const0_rtx), + gen_rtx_SET (VOIDmode, + operands[0], + GEN_INT (~0)))); + DONE; + } + FAIL; + } [(set_attr "conds" "clob") (set_attr "length" "12")] ) +(define_insn_and_split "movcond_addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (if_then_else:SI + (match_operator 5 "comparison_operator" + [(plus:SI (match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "rI,rPy,r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rPy,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "&& reload_completed" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_dup 3) + (match_dup 4)) + (const_int 0))) + (set (match_dup 0) (match_dup 1)) + (cond_exec (match_dup 6) + (set (match_dup 0) (match_dup 2)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]), + operands[3], operands[4]); + enum rtx_code rc = GET_CODE (operands[5]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + gcc_assert (!(mode == CCFPmode || mode == CCFPEmode)); + rc = reverse_condition (rc); + + operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + } + " + [(set_attr "conds" "clob") + (set_attr "enabled_for_depr_it" "no,yes,yes")] +) + (define_insn "movcond" [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") (if_then_else:SI @@ -9944,9 +11128,9 @@ (set_attr "length" "4,4,8,8") (set_attr_alternative "type" [(if_then_else (match_operand 3 "const_int_operand" "") - (const_string "simple_alu_imm" ) + (const_string "arlo_imm" ) (const_string "*")) - (const_string "simple_alu_imm") + (const_string "arlo_imm") (const_string "*") (const_string "*")])] ) @@ -9986,9 +11170,9 @@ (set_attr "length" "4,4,8,8") (set_attr_alternative "type" [(if_then_else (match_operand 3 "const_int_operand" "") - (const_string "simple_alu_imm" ) + (const_string "arlo_imm" ) (const_string "*")) - (const_string "simple_alu_imm") + (const_string "arlo_imm") (const_string "*") (const_string "*")])] ) @@ -10174,7 +11358,7 @@ mov%d4\\t%0, %1\;mvn%D4\\t%0, %2 mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") - (set_attr "insn" "mvn") + (set_attr "type" "mvn_reg") (set_attr "length" "4,8,8")] ) @@ -10207,7 +11391,7 @@ mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" [(set_attr "conds" "use") - (set_attr "insn" "mvn") + (set_attr "type" "mvn_reg") (set_attr "length" "4,8,8")] ) @@ -10245,10 +11429,9 @@ [(set_attr "conds" "use") (set_attr "shift" "2") (set_attr "length" "4,8,8") - (set_attr "insn" "mov") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "mov_shift") + (const_string "mov_shift_reg")))] ) (define_insn "*ifcompare_move_shift" @@ -10285,10 +11468,9 @@ [(set_attr "conds" "use") (set_attr "shift" "2") (set_attr "length" "4,8,8") - (set_attr "insn" "mov") (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "mov_shift") + (const_string "mov_shift_reg")))] ) (define_insn "*ifcompare_shift_shift" @@ -10326,12 +11508,11 @@ [(set_attr "conds" "use") (set_attr "shift" "1") (set_attr "length" "8") - (set_attr "insn" "mov") (set (attr "type") (if_then_else (and (match_operand 2 "const_int_operand" "") (match_operand 4 "const_int_operand" "")) - (const_string "alu_shift") - (const_string "alu_shift_reg")))] + (const_string "mov_shift") + (const_string "mov_shift_reg")))] ) (define_insn "*ifcompare_not_arith" @@ -10363,7 +11544,7 @@ "TARGET_ARM" "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3" [(set_attr "conds" "use") - (set_attr "insn" "mvn") + (set_attr "type" "mvn_reg") (set_attr "length" "8")] ) @@ -10396,7 +11577,7 @@ "TARGET_ARM" "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" [(set_attr "conds" "use") - (set_attr "insn" "mvn") + (set_attr "type" "mvn_reg") (set_attr "length" "8")] ) @@ -10844,7 +12025,7 @@ mvn%D4\\t%0, %2 mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" [(set_attr "conds" "use") - (set_attr "insn" "mvn") + (set_attr "type" "mvn_reg") (set_attr "length" "4,8")] ) @@ -11239,7 +12420,7 @@ "TARGET_32BIT && arm_arch5" "clz%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "insn" "clz")]) + (set_attr "type" "clz")]) (define_insn "rbitsi2" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -11247,7 +12428,7 @@ "TARGET_32BIT && arm_arch_thumb2" "rbit%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "insn" "clz")]) + (set_attr "type" "clz")]) (define_expand "ctzsi2" [(set (match_operand:SI 0 "s_register_operand" "") @@ -11282,6 +12463,7 @@ (const_int 0)])] "TARGET_32BIT" "" +[(set_attr "predicated" "yes")] ) (define_insn "force_register_use" @@ -11401,7 +12583,8 @@ "arm_arch_thumb2" "movt%?\t%0, %L1" [(set_attr "predicable" "yes") - (set_attr "length" "4")] + (set_attr "predicable_short_it" "no") + (set_attr "length" "4")] ) (define_insn "*arm_rev" @@ -11552,7 +12735,8 @@ false, true))" "ldrd%?\t%0, %3, [%1, %2]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_ldrd_base" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -11566,7 +12750,8 @@ operands[1], 0, false, true))" "ldrd%?\t%0, %2, [%1]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_ldrd_base_neg" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -11580,7 +12765,8 @@ operands[1], -4, false, true))" "ldrd%?\t%0, %2, [%1, #-4]" [(set_attr "type" "load2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd" [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") @@ -11597,7 +12783,8 @@ false, false))" "strd%?\t%2, %4, [%0, %1]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd_base" [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk")) @@ -11611,7 +12798,8 @@ operands[0], 0, false, false))" "strd%?\t%1, %2, [%0]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) (define_insn "*thumb2_strd_base_neg" [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") @@ -11625,9 +12813,24 @@ operands[0], -4, false, false))" "strd%?\t%1, %2, [%0, #-4]" [(set_attr "type" "store2") - (set_attr "predicable" "yes")]) + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) +;; ARMv8 CRC32 instructions. +(define_insn "" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "r") + (match_operand: 2 "s_register_operand" "r")] + CRC))] + "TARGET_CRC32" + "\\t%0, %1, %2" + [(set_attr "type" "crc") + (set_attr "conds" "unconditional")] +) +;; Load the load/store double peephole optimizations. +(include "ldrdstrd.md") + ;; Load the load/store multiple patterns (include "ldmstm.md") @@ -11661,6 +12864,8 @@ (include "thumb2.md") ;; Neon patterns (include "neon.md") +;; Crypto patterns +(include "crypto.md") ;; Synchronization Primitives (include "sync.md") ;; Fixed-point patterns --- a/src/gcc/config/arm/fmp626.md +++ b/src/gcc/config/arm/fmp626.md @@ -63,12 +63,15 @@ ;; ALU operations (define_insn_reservation "mp626_alu_op" 1 (and (eq_attr "tune" "fmp626") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "fmp626_core") (define_insn_reservation "mp626_alu_shift_op" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) + (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) "fmp626_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -77,22 +80,22 @@ (define_insn_reservation "mp626_mult1" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) "fmp626_core") (define_insn_reservation "mp626_mult2" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "fmp626_core") (define_insn_reservation "mp626_mult3" 3 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) "fmp626_core*2") (define_insn_reservation "mp626_mult4" 4 (and (eq_attr "tune" "fmp626") - (eq_attr "insn" "smulls,smlals,umulls,umlals")) + (eq_attr "type" "smulls,smlals,umulls,umlals")) "fmp626_core*3") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/crypto.md +++ b/src/gcc/config/arm/crypto.md @@ -0,0 +1,86 @@ +;; ARMv8-A crypto patterns. +;; Copyright (C) 2013-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 + "register_operand" "w")] + CRYPTO_UNARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q1" + [(set_attr "neon_type" "")] +) + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w")] + CRYPTO_BINARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q2" + [(set_attr "neon_type" "")] +) + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + CRYPTO_TERNARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q2, %q3" + [(set_attr "neon_type" "")] +) + +(define_insn "crypto_sha1h" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (zero_extend:V4SI + (unspec:SI [(vec_select:SI + (match_operand:V4SI 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))] + UNSPEC_SHA1H)))] + "TARGET_CRYPTO" + "sha1h.32\\t%q0, %q1" + [(set_attr "neon_type" "neon_crypto_sha1_fast")] +) + +(define_insn "crypto_vmullp64" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")] + UNSPEC_VMULLP64))] + "TARGET_CRYPTO" + "vmull.p64\\t%q0, %P1, %P2" + [(set_attr "neon_type" "neon_mul_d_long")] +) + +(define_insn "crypto_" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec: + [(match_operand: 1 "register_operand" "0") + (vec_select:SI + (match_operand: 2 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")])) + (match_operand: 3 "register_operand" "w")] + CRYPTO_SELECTING))] + "TARGET_CRYPTO" + ".\\t%q0, %q2, %q3" + [(set_attr "neon_type" "")] +) --- a/src/gcc/config/arm/fa526.md +++ b/src/gcc/config/arm/fa526.md @@ -62,12 +62,15 @@ ;; ALU operations (define_insn_reservation "526_alu_op" 1 (and (eq_attr "tune" "fa526") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "fa526_core") (define_insn_reservation "526_alu_shift_op" 2 (and (eq_attr "tune" "fa526") - (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) + (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) "fa526_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -76,12 +79,12 @@ (define_insn_reservation "526_mult1" 2 (and (eq_attr "tune" "fa526") - (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy")) + (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy")) "fa526_core") (define_insn_reservation "526_mult2" 5 (and (eq_attr "tune" "fa526") - (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ + (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ umlals,smulls,smlals,smlawx")) "fa526_core*4") --- a/src/gcc/config/arm/arm-generic.md +++ b/src/gcc/config/arm/arm-generic.md @@ -114,7 +114,9 @@ (define_insn_reservation "mult" 16 (and (eq_attr "generic_sched" "yes") - (and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))) + (and (eq_attr "ldsched" "no") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) "core*16") (define_insn_reservation "mult_ldsched_strongarm" 3 @@ -122,7 +124,8 @@ (and (eq_attr "ldsched" "yes") (and (eq_attr "tune" "strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) "core*2") (define_insn_reservation "mult_ldsched" 4 @@ -130,13 +133,17 @@ (and (eq_attr "ldsched" "yes") (and (eq_attr "tune" "!strongarm,strongarm110,strongarm1100,strongarm1110") - (eq_attr "type" "mult")))) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) "core*4") (define_insn_reservation "multi_cycle" 32 (and (eq_attr "generic_sched" "yes") (and (eq_attr "core_cycles" "multi") - (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4"))) + (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\ + store1,store2,store3,store4") + (not (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))))) "core*32") (define_insn_reservation "single_cycle" 1 --- a/src/gcc/config/arm/neon-docgen.ml +++ b/src/gcc/config/arm/neon-docgen.ml @@ -329,6 +329,85 @@ "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml"; "@c Please do not edit manually."] +let crypto_doc = +" +@itemize @bullet +@item poly128_t vldrq_p128(poly128_t const *) +@end itemize + +@itemize @bullet +@item void vstrq_p128(poly128_t *, poly128_t) +@end itemize + +@itemize @bullet +@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t) +@end itemize + +@itemize @bullet +@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t) +@end itemize + +@itemize @bullet +@item uint32_t vsha1h_u32 (uint32_t) +@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item poly128_t vmull_p64 (poly64_t a, poly64_t b) +@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} +@end itemize + +@itemize @bullet +@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b) +@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} +@end itemize +" + (* Program entry point. *) let _ = if Array.length Sys.argv <> 2 then @@ -339,6 +418,7 @@ let chan = open_out file in gnu_header chan; List.iter (document_group chan) intrinsic_groups; + Printf.fprintf chan "%s\n" crypto_doc; close_out chan with Sys_error sys -> failwith ("Could not create output file " ^ file ^ ": " ^ sys) --- a/src/gcc/config/arm/iwmmxt2.md +++ b/src/gcc/config/arm/iwmmxt2.md @@ -24,7 +24,7 @@ "TARGET_REALLY_IWMMXT" "wabs%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "wtype" "wabs")] + (set_attr "type" "wmmx_wabs")] ) (define_insn "iwmmxt_wabsdiffb" @@ -37,7 +37,7 @@ "TARGET_REALLY_IWMMXT" "wabsdiffb%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wabsdiff")] + (set_attr "type" "wmmx_wabsdiff")] ) (define_insn "iwmmxt_wabsdiffh" @@ -50,7 +50,7 @@ "TARGET_REALLY_IWMMXT" "wabsdiffh%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wabsdiff")] + (set_attr "type" "wmmx_wabsdiff")] ) (define_insn "iwmmxt_wabsdiffw" @@ -63,7 +63,7 @@ "TARGET_REALLY_IWMMXT" "wabsdiffw%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wabsdiff")] + (set_attr "type" "wmmx_wabsdiff")] ) (define_insn "iwmmxt_waddsubhx" @@ -81,7 +81,7 @@ "TARGET_REALLY_IWMMXT" "waddsubhx%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "waddsubhx")] + (set_attr "type" "wmmx_waddsubhx")] ) (define_insn "iwmmxt_wsubaddhx" @@ -99,7 +99,7 @@ "TARGET_REALLY_IWMMXT" "wsubaddhx%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wsubaddhx")] + (set_attr "type" "wmmx_wsubaddhx")] ) (define_insn "addc3" @@ -111,7 +111,7 @@ "TARGET_REALLY_IWMMXT" "waddc%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wadd")] + (set_attr "type" "wmmx_wadd")] ) (define_insn "iwmmxt_avg4" @@ -143,7 +143,7 @@ "TARGET_REALLY_IWMMXT" "wavg4%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg4")] + (set_attr "type" "wmmx_wavg4")] ) (define_insn "iwmmxt_avg4r" @@ -175,7 +175,7 @@ "TARGET_REALLY_IWMMXT" "wavg4r%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wavg4")] + (set_attr "type" "wmmx_wavg4")] ) (define_insn "iwmmxt_wmaddsx" @@ -194,7 +194,7 @@ "TARGET_REALLY_IWMMXT" "wmaddsx%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_wmaddux" @@ -213,7 +213,7 @@ "TARGET_REALLY_IWMMXT" "wmaddux%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_wmaddsn" @@ -232,7 +232,7 @@ "TARGET_REALLY_IWMMXT" "wmaddsn%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_wmaddun" @@ -251,7 +251,7 @@ "TARGET_REALLY_IWMMXT" "wmaddun%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmadd")] + (set_attr "type" "wmmx_wmadd")] ) (define_insn "iwmmxt_wmulwsm" @@ -265,7 +265,7 @@ "TARGET_REALLY_IWMMXT" "wmulwsm%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmulw")] + (set_attr "type" "wmmx_wmulw")] ) (define_insn "iwmmxt_wmulwum" @@ -279,7 +279,7 @@ "TARGET_REALLY_IWMMXT" "wmulwum%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmulw")] + (set_attr "type" "wmmx_wmulw")] ) (define_insn "iwmmxt_wmulsmr" @@ -297,7 +297,7 @@ "TARGET_REALLY_IWMMXT" "wmulsmr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "iwmmxt_wmulumr" @@ -316,7 +316,7 @@ "TARGET_REALLY_IWMMXT" "wmulumr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "iwmmxt_wmulwsmr" @@ -333,7 +333,7 @@ "TARGET_REALLY_IWMMXT" "wmulwsmr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmul")] + (set_attr "type" "wmmx_wmul")] ) (define_insn "iwmmxt_wmulwumr" @@ -350,7 +350,7 @@ "TARGET_REALLY_IWMMXT" "wmulwumr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmulw")] + (set_attr "type" "wmmx_wmulw")] ) (define_insn "iwmmxt_wmulwl" @@ -361,7 +361,7 @@ "TARGET_REALLY_IWMMXT" "wmulwl%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmulw")] + (set_attr "type" "wmmx_wmulw")] ) (define_insn "iwmmxt_wqmulm" @@ -371,7 +371,7 @@ "TARGET_REALLY_IWMMXT" "wqmulm%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmulm")] + (set_attr "type" "wmmx_wqmulm")] ) (define_insn "iwmmxt_wqmulwm" @@ -381,7 +381,7 @@ "TARGET_REALLY_IWMMXT" "wqmulwm%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmulwm")] + (set_attr "type" "wmmx_wqmulwm")] ) (define_insn "iwmmxt_wqmulmr" @@ -391,7 +391,7 @@ "TARGET_REALLY_IWMMXT" "wqmulmr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmulm")] + (set_attr "type" "wmmx_wqmulm")] ) (define_insn "iwmmxt_wqmulwmr" @@ -401,7 +401,7 @@ "TARGET_REALLY_IWMMXT" "wqmulwmr%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmulwm")] + (set_attr "type" "wmmx_wqmulwm")] ) (define_insn "iwmmxt_waddbhusm" @@ -417,7 +417,7 @@ "TARGET_REALLY_IWMMXT" "waddbhusm%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "waddbhus")] + (set_attr "type" "wmmx_waddbhus")] ) (define_insn "iwmmxt_waddbhusl" @@ -433,7 +433,7 @@ "TARGET_REALLY_IWMMXT" "waddbhusl%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "wtype" "waddbhus")] + (set_attr "type" "wmmx_waddbhus")] ) (define_insn "iwmmxt_wqmiabb" @@ -446,7 +446,7 @@ "TARGET_REALLY_IWMMXT" "wqmiabb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiabt" @@ -459,7 +459,7 @@ "TARGET_REALLY_IWMMXT" "wqmiabt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiatb" @@ -472,7 +472,7 @@ "TARGET_REALLY_IWMMXT" "wqmiatb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiatt" @@ -485,7 +485,7 @@ "TARGET_REALLY_IWMMXT" "wqmiatt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiabbn" @@ -498,7 +498,7 @@ "TARGET_REALLY_IWMMXT" "wqmiabbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiabtn" @@ -511,7 +511,7 @@ "TARGET_REALLY_IWMMXT" "wqmiabtn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiatbn" @@ -524,7 +524,7 @@ "TARGET_REALLY_IWMMXT" "wqmiatbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wqmiattn" @@ -537,7 +537,7 @@ "TARGET_REALLY_IWMMXT" "wqmiattn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wqmiaxy")] + (set_attr "type" "wmmx_wqmiaxy")] ) (define_insn "iwmmxt_wmiabb" @@ -561,7 +561,7 @@ "TARGET_REALLY_IWMMXT" "wmiabb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiabt" @@ -585,7 +585,7 @@ "TARGET_REALLY_IWMMXT" "wmiabt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiatb" @@ -609,7 +609,7 @@ "TARGET_REALLY_IWMMXT" "wmiatb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiatt" @@ -633,7 +633,7 @@ "TARGET_REALLY_IWMMXT" "wmiatt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiabbn" @@ -657,7 +657,7 @@ "TARGET_REALLY_IWMMXT" "wmiabbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiabtn" @@ -681,7 +681,7 @@ "TARGET_REALLY_IWMMXT" "wmiabtn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiatbn" @@ -705,7 +705,7 @@ "TARGET_REALLY_IWMMXT" "wmiatbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiattn" @@ -729,7 +729,7 @@ "TARGET_REALLY_IWMMXT" "wmiattn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiaxy")] + (set_attr "type" "wmmx_wmiaxy")] ) (define_insn "iwmmxt_wmiawbb" @@ -742,7 +742,7 @@ "TARGET_REALLY_IWMMXT" "wmiawbb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawbt" @@ -755,7 +755,7 @@ "TARGET_REALLY_IWMMXT" "wmiawbt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawtb" @@ -768,7 +768,7 @@ "TARGET_REALLY_IWMMXT" "wmiawtb%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawtt" @@ -781,7 +781,7 @@ "TARGET_REALLY_IWMMXT" "wmiawtt%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawbbn" @@ -794,7 +794,7 @@ "TARGET_REALLY_IWMMXT" "wmiawbbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawbtn" @@ -807,7 +807,7 @@ "TARGET_REALLY_IWMMXT" "wmiawbtn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawtbn" @@ -820,7 +820,7 @@ "TARGET_REALLY_IWMMXT" "wmiawtbn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmiawttn" @@ -833,7 +833,7 @@ "TARGET_REALLY_IWMMXT" "wmiawttn%?\\t%0, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmiawxy")] + (set_attr "type" "wmmx_wmiawxy")] ) (define_insn "iwmmxt_wmerge" @@ -858,7 +858,7 @@ "TARGET_REALLY_IWMMXT" "wmerge%?\\t%0, %1, %2, %3" [(set_attr "predicable" "yes") - (set_attr "wtype" "wmerge")] + (set_attr "type" "wmmx_wmerge")] ) (define_insn "iwmmxt_tandc3" @@ -868,7 +868,7 @@ "TARGET_REALLY_IWMMXT" "tandc%?\\t r15" [(set_attr "predicable" "yes") - (set_attr "wtype" "tandc")] + (set_attr "type" "wmmx_tandc")] ) (define_insn "iwmmxt_torc3" @@ -878,7 +878,7 @@ "TARGET_REALLY_IWMMXT" "torc%?\\t r15" [(set_attr "predicable" "yes") - (set_attr "wtype" "torc")] + (set_attr "type" "wmmx_torc")] ) (define_insn "iwmmxt_torvsc3" @@ -888,7 +888,7 @@ "TARGET_REALLY_IWMMXT" "torvsc%?\\t r15" [(set_attr "predicable" "yes") - (set_attr "wtype" "torvsc")] + (set_attr "type" "wmmx_torvsc")] ) (define_insn "iwmmxt_textrc3" @@ -899,5 +899,5 @@ "TARGET_REALLY_IWMMXT" "textrc%?\\t r15, %0" [(set_attr "predicable" "yes") - (set_attr "wtype" "textrc")] + (set_attr "type" "wmmx_textrc")] ) --- a/src/gcc/config/arm/cortex-a5.md +++ b/src/gcc/config/arm/cortex-a5.md @@ -58,12 +58,15 @@ (define_insn_reservation "cortex_a5_alu" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "cortex_a5_ex1") (define_insn_reservation "cortex_a5_alu_shift" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) + (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) "cortex_a5_ex1") ;; Forwarding path for unshifted operands. @@ -80,7 +83,8 @@ (define_insn_reservation "cortex_a5_mul" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "mult")) + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) "cortex_a5_ex1") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/fa606te.md +++ b/src/gcc/config/arm/fa606te.md @@ -62,7 +62,10 @@ ;; ALU operations (define_insn_reservation "606te_alu_op" 1 (and (eq_attr "tune" "fa606te") - (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg, + extend,arlo_shift,arlo_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) "fa606te_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -71,22 +74,22 @@ (define_insn_reservation "606te_mult1" 2 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "smlalxy")) + (eq_attr "type" "smlalxy")) "fa606te_core") (define_insn_reservation "606te_mult2" 3 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy")) + (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy")) "fa606te_core*2") (define_insn_reservation "606te_mult3" 4 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "mul,mla,muls,mlas")) + (eq_attr "type" "mul,mla,muls,mlas")) "fa606te_core*3") (define_insn_reservation "606te_mult4" 5 (and (eq_attr "tune" "fa606te") - (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) + (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) "fa606te_core*4") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/cortex-a9.md +++ b/src/gcc/config/arm/cortex-a9.md @@ -80,18 +80,17 @@ ;; which can go down E2 without any problem. (define_insn_reservation "cortex_a9_dp" 2 (and (eq_attr "tune" "cortexa9") - (ior (and (eq_attr "type" "alu_reg,simple_alu_imm") - (eq_attr "neon_type" "none")) - (and (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift") - (eq_attr "insn" "mov")) - (eq_attr "neon_type" "none")))) + (and (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mov_shift_reg,mov_shift") + (eq_attr "neon_type" "none"))) "cortex_a9_p0_default|cortex_a9_p1_default") ;; An instruction using the shifter will go down E1. (define_insn_reservation "cortex_a9_dp_shift" 3 (and (eq_attr "tune" "cortexa9") - (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift") - (not (eq_attr "insn" "mov")))) + (eq_attr "type" "arlo_shift_reg,extend,arlo_shift,\ + mvn_shift,mvn_shift_reg")) "cortex_a9_p0_shift | cortex_a9_p1_shift") ;; Loads have a latency of 4 cycles. @@ -130,7 +129,7 @@ ;; We get 16*16 multiply / mac results in 3 cycles. (define_insn_reservation "cortex_a9_mult16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smulxy")) + (eq_attr "type" "smulxy")) "cortex_a9_mult16") ;; The 16*16 mac is slightly different that it @@ -137,22 +136,22 @@ ;; reserves M1 and M2 in the same cycle. (define_insn_reservation "cortex_a9_mac16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smlaxy")) + (eq_attr "type" "smlaxy")) "cortex_a9_mac16") (define_insn_reservation "cortex_a9_multiply" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mul,smmul,smmulr")) + (eq_attr "type" "mul,smmul,smmulr")) "cortex_a9_mult") (define_insn_reservation "cortex_a9_mac" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mla,smmla")) + (eq_attr "type" "mla,smmla")) "cortex_a9_mac") (define_insn_reservation "cortex_a9_multiply_long" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) + (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) "cortex_a9_mult_long") ;; An instruction with a result in E2 can be forwarded --- a/src/gcc/config/arm/fa626te.md +++ b/src/gcc/config/arm/fa626te.md @@ -68,12 +68,15 @@ ;; ALU operations (define_insn_reservation "626te_alu_op" 1 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "type" "alu_reg,simple_alu_imm")) + (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) "fa626te_core") (define_insn_reservation "626te_alu_shift_op" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) + (eq_attr "type" "extend,arlo_shift,arlo_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) "fa626te_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -82,22 +85,22 @@ (define_insn_reservation "626te_mult1" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy")) + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) "fa626te_core") (define_insn_reservation "626te_mult2" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "mul,mla")) + (eq_attr "type" "mul,mla")) "fa626te_core") (define_insn_reservation "626te_mult3" 3 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) "fa626te_core*2") (define_insn_reservation "626te_mult4" 4 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "insn" "smulls,smlals,umulls,umlals")) + (eq_attr "type" "smulls,smlals,umulls,umlals")) "fa626te_core*3") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; --- a/src/gcc/config/arm/neon-gen.ml +++ b/src/gcc/config/arm/neon-gen.ml @@ -114,6 +114,7 @@ | T_uint32x4 -> T_int32x4 | T_uint64x1 -> T_int64x1 | T_uint64x2 -> T_int64x2 + | T_poly64x2 -> T_int64x2 (* Cast to types defined by mode in arm.c, not random types pulled in from the header in use. This fixes incompatible pointer errors when compiling with C++. *) @@ -121,9 +122,12 @@ | T_uint16 | T_int16 -> T_intHI | T_uint32 | T_int32 -> T_intSI | T_uint64 | T_int64 -> T_intDI + | T_float16 -> T_floatHF | T_float32 -> T_floatSF | T_poly8 -> T_intQI | T_poly16 -> T_intHI + | T_poly64 -> T_intDI + | T_poly128 -> T_intTI | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) | T_ptrto elt -> T_ptrto (signed_ctype elt) | T_const elt -> T_const (signed_ctype elt) @@ -275,8 +279,8 @@ let mode = mode_of_elt elttype shape in string_of_mode mode with MixedMode (dst, src) -> - let dstmode = mode_of_elt dst shape - and srcmode = mode_of_elt src shape in + let dstmode = mode_of_elt ~argpos:0 dst shape + and srcmode = mode_of_elt ~argpos:1 src shape in string_of_mode dstmode ^ string_of_mode srcmode let get_shuffle features = @@ -291,19 +295,24 @@ match List.find (fun feature -> match feature with Requires_feature _ -> true | Requires_arch _ -> true + | Requires_FP_bit _ -> true | _ -> false) features with - Requires_feature feature -> + Requires_feature feature -> Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature | Requires_arch arch -> Format.printf "#if __ARM_ARCH >= %d@\n" arch + | Requires_FP_bit bit -> + Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n" + (1 lsl bit) | _ -> assert false with Not_found -> assert true let print_feature_test_end features = let feature = - List.exists (function Requires_feature x -> true - | Requires_arch x -> true + List.exists (function Requires_feature _ -> true + | Requires_arch _ -> true + | Requires_FP_bit _ -> true | _ -> false) features in if feature then Format.printf "#endif@\n" @@ -356,79 +365,96 @@ abase : "ARM" base name for the type (i.e. int in int8x8_t). esize : element size. enum : element count. + alevel: architecture level at which available. *) +type fpulevel = CRYPTO | ALL + let deftypes () = let typeinfo = [ (* Doubleword vector types. *) - "__builtin_neon_qi", "int", 8, 8; - "__builtin_neon_hi", "int", 16, 4; - "__builtin_neon_si", "int", 32, 2; - "__builtin_neon_di", "int", 64, 1; - "__builtin_neon_sf", "float", 32, 2; - "__builtin_neon_poly8", "poly", 8, 8; - "__builtin_neon_poly16", "poly", 16, 4; - "__builtin_neon_uqi", "uint", 8, 8; - "__builtin_neon_uhi", "uint", 16, 4; - "__builtin_neon_usi", "uint", 32, 2; - "__builtin_neon_udi", "uint", 64, 1; + "__builtin_neon_qi", "int", 8, 8, ALL; + "__builtin_neon_hi", "int", 16, 4, ALL; + "__builtin_neon_si", "int", 32, 2, ALL; + "__builtin_neon_di", "int", 64, 1, ALL; + "__builtin_neon_hf", "float", 16, 4, ALL; + "__builtin_neon_sf", "float", 32, 2, ALL; + "__builtin_neon_poly8", "poly", 8, 8, ALL; + "__builtin_neon_poly16", "poly", 16, 4, ALL; + "__builtin_neon_poly64", "poly", 64, 1, CRYPTO; + "__builtin_neon_uqi", "uint", 8, 8, ALL; + "__builtin_neon_uhi", "uint", 16, 4, ALL; + "__builtin_neon_usi", "uint", 32, 2, ALL; + "__builtin_neon_udi", "uint", 64, 1, ALL; (* Quadword vector types. *) - "__builtin_neon_qi", "int", 8, 16; - "__builtin_neon_hi", "int", 16, 8; - "__builtin_neon_si", "int", 32, 4; - "__builtin_neon_di", "int", 64, 2; - "__builtin_neon_sf", "float", 32, 4; - "__builtin_neon_poly8", "poly", 8, 16; - "__builtin_neon_poly16", "poly", 16, 8; - "__builtin_neon_uqi", "uint", 8, 16; - "__builtin_neon_uhi", "uint", 16, 8; - "__builtin_neon_usi", "uint", 32, 4; - "__builtin_neon_udi", "uint", 64, 2 + "__builtin_neon_qi", "int", 8, 16, ALL; + "__builtin_neon_hi", "int", 16, 8, ALL; + "__builtin_neon_si", "int", 32, 4, ALL; + "__builtin_neon_di", "int", 64, 2, ALL; + "__builtin_neon_sf", "float", 32, 4, ALL; + "__builtin_neon_poly8", "poly", 8, 16, ALL; + "__builtin_neon_poly16", "poly", 16, 8, ALL; + "__builtin_neon_poly64", "poly", 64, 2, CRYPTO; + "__builtin_neon_uqi", "uint", 8, 16, ALL; + "__builtin_neon_uhi", "uint", 16, 8, ALL; + "__builtin_neon_usi", "uint", 32, 4, ALL; + "__builtin_neon_udi", "uint", 64, 2, ALL ] in List.iter - (fun (cbase, abase, esize, enum) -> + (fun (cbase, abase, esize, enum, fpulevel) -> let attr = match enum with 1 -> "" | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" (esize * enum / 8) in - Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr) + if fpulevel == CRYPTO then + Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; + Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr; + if fpulevel == CRYPTO then + Format.printf "#endif\n";) typeinfo; Format.print_newline (); (* Extra types not in . *) Format.printf "typedef float float32_t;\n"; Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; - Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"; + Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; + Format.printf "typedef __builtin_neon_poly64 poly64_t;\n"; + Format.printf "typedef __builtin_neon_poly128 poly128_t;\n"; + Format.printf "#endif\n" -(* Output structs containing arrays, for load & store instructions etc. *) +(* Output structs containing arrays, for load & store instructions etc. + poly128_t is deliberately not included here because it has no array types + defined for it. *) let arrtypes () = let typeinfo = [ - "int", 8; "int", 16; - "int", 32; "int", 64; - "uint", 8; "uint", 16; - "uint", 32; "uint", 64; - "float", 32; "poly", 8; - "poly", 16 + "int", 8, ALL; "int", 16, ALL; + "int", 32, ALL; "int", 64, ALL; + "uint", 8, ALL; "uint", 16, ALL; + "uint", 32, ALL; "uint", 64, ALL; + "float", 32, ALL; "poly", 8, ALL; + "poly", 16, ALL; "poly", 64, CRYPTO ] in - let writestruct elname elsize regsize arrsize = + let writestruct elname elsize regsize arrsize fpulevel = let elnum = regsize / elsize in let structname = Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in let sfmt = start_function () in - Format.printf "typedef struct %s" structname; + Format.printf "%stypedef struct %s" + (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname; open_braceblock sfmt; Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; close_braceblock sfmt; - Format.printf " %s;" structname; + Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else ""); end_function sfmt; in for n = 2 to 4 do List.iter - (fun (elname, elsize) -> - writestruct elname elsize 64 n; - writestruct elname elsize 128 n) + (fun (elname, elsize, alevel) -> + writestruct elname elsize 64 n alevel; + writestruct elname elsize 128 n alevel) typeinfo done @@ -484,6 +510,8 @@ print_ops ops; Format.print_newline (); print_ops reinterp; + print_ops reinterpq; + Format.printf "%s" crypto_intrinsics; print_lines [ "#ifdef __cplusplus"; "}"; --- a/src/gcc/config/mips/linux-common.h +++ b/src/gcc/config/mips/linux-common.h @@ -44,7 +44,7 @@ #undef LIB_SPEC #define LIB_SPEC \ LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ - GNU_USER_TARGET_LIB_SPEC " " ANDROID_LIB_SPEC) + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) #undef STARTFILE_SPEC #define STARTFILE_SPEC \ --- a/src/gcc/tree-vect-slp.c +++ b/src/gcc/tree-vect-slp.c @@ -2192,7 +2192,7 @@ } /* Cost model: check if the vectorization is worthwhile. */ - if (flag_vect_cost_model + if (!unlimited_cost_model () && !vect_bb_vectorization_profitable_p (bb_vinfo)) { if (dump_enabled_p ()) --- a/src/gcc/params.def +++ b/src/gcc/params.def @@ -544,6 +544,11 @@ "Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check", 10, 0, 0) +DEFPARAM(PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT, + "vect-max-peeling-for-alignment", + "Max number of loop peels to enhancement alignment of data references in a loop", + -1, -1, 64) + DEFPARAM(PARAM_MAX_CSELIB_MEMORY_LOCATIONS, "max-cselib-memory-locations", "The maximum memory locations recorded by cselib", --- a/src/libobjc/ChangeLog.linaro +++ b/src/libobjc/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libgfortran/ChangeLog.linaro +++ b/src/libgfortran/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libada/ChangeLog.linaro +++ b/src/libada/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libffi/ChangeLog.linaro +++ b/src/libffi/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libssp/ChangeLog.linaro +++ b/src/libssp/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libcpp/configure +++ b/src/libcpp/configure @@ -7152,9 +7152,7 @@ case $target in aarch64*-*-* | \ alpha*-*-* | \ - arm*-*-*eabi* | \ - arm*-*-rtems* | \ - arm*-*-symbianelf* | \ + arm*-*-* | \ x86_64-*-* | \ ia64-*-* | \ hppa*64*-*-* | \ --- a/src/libcpp/configure.ac +++ b/src/libcpp/configure.ac @@ -184,9 +184,7 @@ case $target in aarch64*-*-* | \ alpha*-*-* | \ - arm*-*-*eabi* | \ - arm*-*-rtems* | \ - arm*-*-symbianelf* | \ + arm*-*-* | \ x86_64-*-* | \ ia64-*-* | \ hppa*64*-*-* | \ --- a/src/libcpp/ChangeLog.linaro +++ b/src/libcpp/ChangeLog.linaro @@ -0,0 +1,59 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-09-05 Yvan Roux + + Backport from trunk r201566. + 2013-08-07 Richard Earnshaw + + * configure.ac: Set need_64bit_hwint for all arm targets. + * configure: Regenerated. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/libcpp/po/ChangeLog.linaro +++ b/src/libcpp/po/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released. --- a/src/fixincludes/ChangeLog.linaro +++ b/src/fixincludes/ChangeLog.linaro @@ -0,0 +1,51 @@ +2014-03-11 Yvan Roux + + GCC Linaro 4.8-2014.03 released. + +2014-02-11 Yvan Roux + + GCC Linaro 4.8-2014.02 released. + +2014-01-17 Christophe Lyon + + GCC Linaro 4.8-2014.01 released. + +2013-12-21 Christophe Lyon + + GCC Linaro 4.8-2013.12 released. + +2013-11-14 Christophe Lyon + + GCC Linaro 4.8-2013.11 released. + +2013-10-15 Christophe Lyon + + GCC Linaro 4.8-2013.10 released. + +2013-09-10 Christophe Lyon + + GCC Linaro 4.8-2013.09 released. + +2013-08-14 Christophe Lyon + + GCC Linaro 4.8-2013.08 released. + +2013-07-19 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.07-1 released. + +2013-07-05 Christophe Lyon + + GCC Linaro 4.8-2013.07 released. + +2013-06-11 Rob Savoye + + GCC Linaro gcc-linaro-4.8-2013.06 released. + +2013-05-14 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.05 released. + +2013-04-09 Matthew Gretton-Dann + + GCC Linaro 4.8-2013.04 released.