diff options
Diffstat (limited to 'debian/patches/gcc-linaro.diff')
-rw-r--r-- | debian/patches/gcc-linaro.diff | 48934 |
1 files changed, 3 insertions, 48931 deletions
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff index e7d7c84..b363b1f 100644 --- a/debian/patches/gcc-linaro.diff +++ b/debian/patches/gcc-linaro.diff @@ -1,48934 +1,6 @@ -# DP: Changes for the Linaro 4.9-2015.01 release. +# DP: Changes for the Linaro 5-2015.xx release. -LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@219502 \ - svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@219643 \ +LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-5-branch@219502 \ + svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-5-branch@219643 \ | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/ ---- a/src/libitm/ChangeLog.linaro -+++ b/src/libitm/ChangeLog.linaro -@@ -0,0 +1,68 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213035. -+ 2014-07-24 Richard Henderson <rth@redhat.com> -+ -+ * config/aarch64/sjlj.S (_ITM_beginTransaction): Use post-inc -+ addressing mode in epilogue. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210615. -+ 2014-05-19 Richard Henderson <rth@redhat.com> -+ -+ * config/aarch64/sjlj.S: New file. -+ * config/aarch64/target.h: New file. -+ * configure.tgt: Enable aarch64. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libgomp/ChangeLog.linaro -+++ b/src/libgomp/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libquadmath/ChangeLog.linaro -+++ b/src/libquadmath/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libsanitizer/ChangeLog.linaro -+++ b/src/libsanitizer/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/zlib/ChangeLog.linaro -+++ b/src/zlib/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libstdc++-v3/ChangeLog.linaro -+++ b/src/libstdc++-v3/ChangeLog.linaro -@@ -0,0 +1,70 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216444. -+ 2014-10-19 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> -+ -+ * testsuite/lib/libstdc++.exp (v3-copy-file): New proc split from ... -+ (v3-copy-files): ... this. Update. -+ (check_v3_target_fileio): Fix race on cin_unget-1.txt file. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215101. -+ 2014-09-10 Tony Wang <tony.wang@arm.com> -+ -+ PR target/56846 -+ * libsupc++/eh_personality.cc (PERSONALITY_FUNCTION): -+ Return with CONTINUE_UNWINDING when the state pattern -+ contains: _US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libstdc++-v3/testsuite/lib/libstdc++.exp -+++ b/src/libstdc++-v3/testsuite/lib/libstdc++.exp -@@ -63,19 +63,24 @@ - verbose "++ $var is $val" $n - } - -+# Copy file to the target. -+proc v3-copy-file {src dst} { -+ if { [catch { set symlink [file readlink $src] } x] } then { -+ remote_download target $src $dst -+ } else { -+ if { [regexp "^/" "$symlink"] } then { -+ remote_download target $symlink $dst -+ } else { -+ set dirname [file dirname $f] -+ remote_download target $dirname/$symlink $dst -+ } -+ } -+} -+ - # Called by v3-init below. "Static" to this file. - proc v3-copy-files {srcfiles} { - foreach f $srcfiles { -- if { [catch { set symlink [file readlink $f] } x] } then { -- remote_download target $f -- } else { -- if { [regexp "^/" "$symlink"] } then { -- remote_download target $symlink -- } else { -- set dirname [file dirname $f] -- remote_download target $dirname/$symlink -- } -- } -+ v3-copy-file $f [file tail $f] - } - } - -@@ -681,8 +686,8 @@ - # the file functions - set src fileio[pid].cc - set exe fileio[pid].x -- set testfile "cin_unget-1.txt" -- v3-copy-files "$srcdir/data/$testfile" -+ set testfile "cin_unget-1.[pid].txt" -+ v3-copy-file "$srcdir/data/cin_unget-1.txt" "$testfile" - - set f [open $src "w"] - puts $f "#include <sys/types.h>" ---- a/src/configure.ac -+++ b/src/configure.ac -@@ -331,7 +331,8 @@ - if test "$is_elf" = "yes"; then - # Check for target supported by gold. - case "${target}" in -- i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*) -+ i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ -+ | aarch64*-*-* | tilegx*-*-*) - configdirs="$configdirs gold" - if test x${ENABLE_GOLD} = xdefault; then - default_ld=gold ---- a/src/intl/ChangeLog.linaro -+++ b/src/intl/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/ChangeLog.linaro -+++ b/src/ChangeLog.linaro -@@ -0,0 +1,59 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215865. -+ 2014-10-03 Jing Yu <jingyu@google.com> -+ -+ * configure.ac: Add aarch64 to list of targets that support gold. -+ * configure: Regenerate. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/boehm-gc/ChangeLog.linaro -+++ b/src/boehm-gc/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/include/ChangeLog.linaro -+++ b/src/include/ChangeLog.linaro -@@ -0,0 +1,58 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209649. -+ 2014-04-22 Yufeng Zhang <yufeng.zhang@arm.com> -+ -+ * longlong.h: Merge from glibc. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/include/longlong.h -+++ b/src/include/longlong.h -@@ -1,5 +1,5 @@ - /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. -- Copyright (C) 1991-2013 Free Software Foundation, Inc. -+ Copyright (C) 1991-2014 Free Software Foundation, Inc. - - This file is part of the GNU C Library. - -@@ -122,6 +122,22 @@ - #define __AND_CLOBBER_CC , "cc" - #endif /* __GNUC__ < 2 */ - -+#if defined (__aarch64__) -+ -+#if W_TYPE_SIZE == 32 -+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) -+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) -+#define COUNT_LEADING_ZEROS_0 32 -+#endif /* W_TYPE_SIZE == 32 */ -+ -+#if W_TYPE_SIZE == 64 -+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) -+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) -+#define COUNT_LEADING_ZEROS_0 64 -+#endif /* W_TYPE_SIZE == 64 */ -+ -+#endif /* __aarch64__ */ -+ - #if defined (__alpha) && W_TYPE_SIZE == 64 - #define umul_ppmm(ph, pl, m0, m1) \ - do { \ ---- a/src/libiberty/ChangeLog.linaro -+++ b/src/libiberty/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/lto-plugin/ChangeLog.linaro -+++ b/src/lto-plugin/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/contrib/regression/ChangeLog.linaro -+++ b/src/contrib/regression/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/contrib/ChangeLog.linaro -+++ b/src/contrib/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/contrib/reghunt/ChangeLog.linaro -+++ b/src/contrib/reghunt/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libatomic/ChangeLog.linaro -+++ b/src/libatomic/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/config/ChangeLog.linaro -+++ b/src/config/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libbacktrace/ChangeLog.linaro -+++ b/src/libbacktrace/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libjava/libltdl/ChangeLog.linaro -+++ b/src/libjava/libltdl/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libjava/ChangeLog.linaro -+++ b/src/libjava/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libjava/classpath/ChangeLog.linaro -+++ b/src/libjava/classpath/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gnattools/ChangeLog.linaro -+++ b/src/gnattools/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/maintainer-scripts/ChangeLog.linaro -+++ b/src/maintainer-scripts/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/configure -+++ b/src/configure -@@ -2971,7 +2971,8 @@ - if test "$is_elf" = "yes"; then - # Check for target supported by gold. - case "${target}" in -- i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*) -+ i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \ -+ | aarch64*-*-* | tilegx*-*-*) - configdirs="$configdirs gold" - if test x${ENABLE_GOLD} = xdefault; then - default_ld=gold ---- a/src/libgcc/config.host -+++ b/src/libgcc/config.host -@@ -316,13 +316,15 @@ - case ${host} in - aarch64*-*-elf) - extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o" -+ extra_parts="$extra_parts crtfastmath.o" - tmake_file="${tmake_file} ${cpu_type}/t-aarch64" -- tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" -+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" - ;; - aarch64*-*-linux*) -+ extra_parts="$extra_parts crtfastmath.o" - md_unwind_header=aarch64/linux-unwind.h - tmake_file="${tmake_file} ${cpu_type}/t-aarch64" -- tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" -+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" - ;; - alpha*-*-linux*) - tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux" ---- a/src/libgcc/ChangeLog.linaro -+++ b/src/libgcc/ChangeLog.linaro -@@ -0,0 +1,69 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215013. -+ 2014-09-08 Joseph Myers <joseph@codesourcery.com> -+ -+ * fp-bit.c (pack_d, unpack_d): Remove LARGEST_EXPONENT_IS_NORMAL -+ and ROUND_TOWARDS_ZERO conditionals. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215086. -+ 2014-09-09 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config.host (aarch64*): Include crtfastmath.o and -+ t-crtfm. -+ * config/aarch64/crtfastmath.c: New file. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libgcc/config/aarch64/crtfastmath.c -+++ b/src/libgcc/config/aarch64/crtfastmath.c -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2014 Free Software Foundation, Inc. -+ * -+ * This file is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 3, or (at your option) any -+ * later version. -+ * -+ * This file is distributed in the hope that it will be useful, but -+ * WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * Under Section 7 of GPL version 3, you are granted additional -+ * permissions described in the GCC Runtime Library Exception, version -+ * 3.1, as published by the Free Software Foundation. -+ * -+ * You should have received a copy of the GNU General Public License and -+ * a copy of the GCC Runtime Library Exception along with this program; -+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ * <http://www.gnu.org/licenses/>. -+ */ -+ -+#define _FPU_FPCR_FZ 0x1000000 -+ -+#define _FPU_SETCW(fpcr) \ -+ { \ -+ __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr)); \ -+ } -+ -+static void __attribute__((constructor)) -+set_fast_math (void) -+{ -+ /* Flush to zero, round to nearest, IEEE exceptions disabled. */ -+ _FPU_SETCW (_FPU_FPCR_FZ); -+} ---- a/src/libgcc/config/arm/bpabi-v6m.S -+++ b/src/libgcc/config/arm/bpabi-v6m.S -@@ -148,7 +148,7 @@ - mov r0, sp - push {r0, lr} - ldr r0, [sp, #8] -- bl SYM(__gnu_uldivmod_helper) -+ bl SYM(__udivmoddi4) - ldr r3, [sp, #4] - mov lr, r3 - add sp, sp, #8 ---- a/src/libgcc/config/arm/bpabi.c -+++ b/src/libgcc/config/arm/bpabi.c -@@ -26,9 +26,6 @@ - extern unsigned long long __udivdi3 (unsigned long long, - unsigned long long); - extern long long __gnu_ldivmod_helper (long long, long long, long long *); --extern unsigned long long __gnu_uldivmod_helper (unsigned long long, -- unsigned long long, -- unsigned long long *); - - - long long -@@ -43,14 +40,3 @@ - return quotient; - } - --unsigned long long --__gnu_uldivmod_helper (unsigned long long a, -- unsigned long long b, -- unsigned long long *remainder) --{ -- unsigned long long quotient; -- -- quotient = __udivdi3 (a, b); -- *remainder = a - b * quotient; -- return quotient; --} ---- a/src/libgcc/config/arm/bpabi.S -+++ b/src/libgcc/config/arm/bpabi.S -@@ -22,6 +22,8 @@ - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -+ .cfi_sections .debug_frame -+ - #ifdef __ARM_EABI__ - /* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte -@@ -120,49 +122,137 @@ - #endif - .endm - -+/* we can use STRD/LDRD on v5TE and later, and any Thumb-2 architecture. */ -+#if (defined(__ARM_EABI__) \ -+ && (defined(__thumb2__) \ -+ || (__ARM_ARCH >= 5 && defined(__TARGET_FEATURE_DSP)))) -+#define CAN_USE_LDRD 1 -+#else -+#define CAN_USE_LDRD 0 -+#endif -+ -+/* set up stack from for call to __udivmoddi4. At the end of the macro the -+ stack is arranged as follows: -+ sp+12 / space for remainder -+ sp+8 \ (written by __udivmoddi4) -+ sp+4 lr -+ sp+0 sp+8 [rp (remainder pointer) argument for __udivmoddi4] -+ -+ */ -+.macro push_for_divide fname -+#if defined(__thumb2__) && CAN_USE_LDRD -+ sub ip, sp, #8 -+ strd ip, lr, [sp, #-16]! -+#else -+ sub sp, sp, #8 -+ do_push {sp, lr} -+#endif -+ .cfi_adjust_cfa_offset 16 -+ .cfi_offset 14, -12 -+.endm -+ -+/* restore stack */ -+.macro pop_for_divide -+ ldr lr, [sp, #4] -+#if CAN_USE_LDRD -+ ldrd r2, r3, [sp, #8] -+ add sp, sp, #16 -+#else -+ add sp, sp, #8 -+ do_pop {r2, r3} -+#endif -+ .cfi_restore 14 -+ .cfi_adjust_cfa_offset 0 -+.endm -+ - #ifdef L_aeabi_ldivmod - -+/* Perform 64 bit signed division. -+ Inputs: -+ r0:r1 numerator -+ r2:r3 denominator -+ Outputs: -+ r0:r1 quotient -+ r2:r3 remainder -+ */ - ARM_FUNC_START aeabi_ldivmod -- cfi_start __aeabi_ldivmod, LSYM(Lend_aeabi_ldivmod) -- test_div_by_zero signed -+ .cfi_startproc -+ test_div_by_zero signed - -- sub sp, sp, #8 --#if defined(__thumb2__) -- mov ip, sp -- push {ip, lr} --#else -- do_push {sp, lr} --#endif --98: cfi_push 98b - __aeabi_ldivmod, 0xe, -0xc, 0x10 -- bl SYM(__gnu_ldivmod_helper) __PLT__ -- ldr lr, [sp, #4] -- add sp, sp, #8 -- do_pop {r2, r3} -+ push_for_divide __aeabi_ldivmod -+ cmp xxh, #0 -+ blt 1f -+ cmp yyh, #0 -+ blt 2f -+ /* arguments in (r0:r1), (r2:r3) and *sp */ -+ bl SYM(__udivmoddi4) __PLT__ -+ .cfi_remember_state -+ pop_for_divide - RET -- cfi_end LSYM(Lend_aeabi_ldivmod) -+ -+1: /* xxh:xxl is negative */ -+ .cfi_restore_state -+ negs xxl, xxl -+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ cmp yyh, #0 -+ blt 3f -+ /* arguments in (r0:r1), (r2:r3) and *sp */ -+ bl SYM(__udivmoddi4) __PLT__ -+ .cfi_remember_state -+ pop_for_divide -+ negs xxl, xxl -+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ negs yyl, yyl -+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ RET -+ -+2: /* only yyh:yyl is negative */ -+ .cfi_restore_state -+ negs yyl, yyl -+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ /* arguments in (r0:r1), (r2:r3) and *sp */ -+ bl SYM(__udivmoddi4) __PLT__ -+ .cfi_remember_state -+ pop_for_divide -+ negs xxl, xxl -+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ RET -+ -+3: /* both xxh:xxl and yyh:yyl are negative */ -+ .cfi_restore_state -+ negs yyl, yyl -+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ /* arguments in (r0:r1), (r2:r3) and *sp */ -+ bl SYM(__udivmoddi4) __PLT__ -+ pop_for_divide -+ negs yyl, yyl -+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */ -+ RET -+ -+ .cfi_endproc - - #endif /* L_aeabi_ldivmod */ - - #ifdef L_aeabi_uldivmod - -+/* Perform 64 bit signed division. -+ Inputs: -+ r0:r1 numerator -+ r2:r3 denominator -+ Outputs: -+ r0:r1 quotient -+ r2:r3 remainder -+ */ - ARM_FUNC_START aeabi_uldivmod -- cfi_start __aeabi_uldivmod, LSYM(Lend_aeabi_uldivmod) -- test_div_by_zero unsigned -+ .cfi_startproc -+ test_div_by_zero unsigned - -- sub sp, sp, #8 --#if defined(__thumb2__) -- mov ip, sp -- push {ip, lr} --#else -- do_push {sp, lr} --#endif --98: cfi_push 98b - __aeabi_uldivmod, 0xe, -0xc, 0x10 -- bl SYM(__gnu_uldivmod_helper) __PLT__ -- ldr lr, [sp, #4] -- add sp, sp, #8 -- do_pop {r2, r3} -+ push_for_divide __aeabi_uldivmod -+ /* arguments in (r0:r1), (r2:r3) and *sp */ -+ bl SYM(__udivmoddi4) __PLT__ -+ pop_for_divide - RET -- cfi_end LSYM(Lend_aeabi_uldivmod) -+ .cfi_endproc - - #endif /* L_aeabi_divmod */ - ---- a/src/libgcc/config/libbid/ChangeLog.linaro -+++ b/src/libgcc/config/libbid/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libgcc/fp-bit.c -+++ b/src/libgcc/fp-bit.c -@@ -202,17 +202,9 @@ - int sign = src->sign; - int exp = 0; - -- if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src))) -+ if (isnan (src)) - { -- /* We can't represent these values accurately. By using the -- largest possible magnitude, we guarantee that the conversion -- of infinity is at least as big as any finite number. */ - exp = EXPMAX; -- fraction = ((fractype) 1 << FRACBITS) - 1; -- } -- else if (isnan (src)) -- { -- exp = EXPMAX; - /* Restore the NaN's payload. */ - fraction >>= NGARDS; - fraction &= QUIET_NAN - 1; -@@ -291,8 +283,7 @@ - fraction >>= NGARDS; - #endif /* NO_DENORMALS */ - } -- else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) -- && __builtin_expect (src->normal_exp > EXPBIAS, 0)) -+ else if (__builtin_expect (src->normal_exp > EXPBIAS, 0)) - { - exp = EXPMAX; - fraction = 0; -@@ -300,35 +291,25 @@ - else - { - exp = src->normal_exp + EXPBIAS; -- if (!ROUND_TOWARDS_ZERO) -+ /* IF the gard bits are the all zero, but the first, then we're -+ half way between two numbers, choose the one which makes the -+ lsb of the answer 0. */ -+ if ((fraction & GARDMASK) == GARDMSB) - { -- /* IF the gard bits are the all zero, but the first, then we're -- half way between two numbers, choose the one which makes the -- lsb of the answer 0. */ -- if ((fraction & GARDMASK) == GARDMSB) -- { -- if (fraction & (1 << NGARDS)) -- fraction += GARDROUND + 1; -- } -- else -- { -- /* Add a one to the guards to round up */ -- fraction += GARDROUND; -- } -- if (fraction >= IMPLICIT_2) -- { -- fraction >>= 1; -- exp += 1; -- } -+ if (fraction & (1 << NGARDS)) -+ fraction += GARDROUND + 1; - } -- fraction >>= NGARDS; -- -- if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX) -+ else - { -- /* Saturate on overflow. */ -- exp = EXPMAX; -- fraction = ((fractype) 1 << FRACBITS) - 1; -+ /* Add a one to the guards to round up */ -+ fraction += GARDROUND; - } -+ if (fraction >= IMPLICIT_2) -+ { -+ fraction >>= 1; -+ exp += 1; -+ } -+ fraction >>= NGARDS; - } - } - -@@ -556,8 +537,7 @@ - dst->fraction.ll = fraction; - } - } -- else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) -- && __builtin_expect (exp == EXPMAX, 0)) -+ else if (__builtin_expect (exp == EXPMAX, 0)) - { - /* Huge exponent*/ - if (fraction == 0) -@@ -915,7 +895,7 @@ - low <<= 1; - } - -- if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB) -+ if ((high & GARDMASK) == GARDMSB) - { - if (high & (1 << NGARDS)) - { -@@ -1035,7 +1015,7 @@ - numerator *= 2; - } - -- if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB) -+ if ((quotient & GARDMASK) == GARDMSB) - { - if (quotient & (1 << NGARDS)) - { ---- a/src/libdecnumber/ChangeLog.linaro -+++ b/src/libdecnumber/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/LINARO-VERSION -+++ b/src/gcc/LINARO-VERSION -@@ -0,0 +1 @@ -+4.9-2015.01 ---- a/src/gcc/ira-conflicts.c -+++ b/src/gcc/ira-conflicts.c -@@ -774,6 +774,27 @@ - temp_hard_reg_set); - } - -+ /* Now we deal with paradoxical subreg cases where certain registers -+ cannot be accessed in the widest mode. */ -+ enum machine_mode outer_mode = ALLOCNO_WMODE (a); -+ enum machine_mode inner_mode = ALLOCNO_MODE (a); -+ if (GET_MODE_SIZE (outer_mode) > GET_MODE_SIZE (inner_mode)) -+ { -+ enum reg_class aclass = ALLOCNO_CLASS (a); -+ for (int j = ira_class_hard_regs_num[aclass] - 1; j >= 0; --j) -+ { -+ int inner_regno = ira_class_hard_regs[aclass][j]; -+ int outer_regno = simplify_subreg_regno (inner_regno, -+ inner_mode, 0, -+ outer_mode); -+ if (outer_regno < 0 -+ || !in_hard_reg_set_p (reg_class_contents[aclass], -+ outer_mode, outer_regno)) -+ SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), -+ inner_regno); -+ } -+ } -+ - if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) - { - int regno; ---- a/src/gcc/targhooks.c -+++ b/src/gcc/targhooks.c -@@ -1357,7 +1357,62 @@ - #endif - } - -+/* For hooks which use the MOVE_RATIO macro, this gives the legacy default -+ behaviour. SPEED_P is true if we are compiling for speed. */ -+ -+static unsigned int -+get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) -+{ -+ unsigned int move_ratio; -+#ifdef MOVE_RATIO -+ move_ratio = (unsigned int) MOVE_RATIO (speed_p); -+#else -+#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) -+ move_ratio = 2; -+#else /* No movmem patterns, pick a default. */ -+ move_ratio = ((speed_p) ? 15 : 3); -+#endif -+#endif -+ return move_ratio; -+} -+ -+/* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be -+ used; return FALSE if the movmem/setmem optab should be expanded, or -+ a call to memcpy emitted. */ -+ - bool -+default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int alignment, -+ enum by_pieces_operation op, -+ bool speed_p) -+{ -+ unsigned int max_size = 0; -+ unsigned int ratio = 0; -+ -+ switch (op) -+ { -+ case CLEAR_BY_PIECES: -+ max_size = STORE_MAX_PIECES; -+ ratio = CLEAR_RATIO (speed_p); -+ break; -+ case MOVE_BY_PIECES: -+ max_size = MOVE_MAX_PIECES; -+ ratio = get_move_ratio (speed_p); -+ break; -+ case SET_BY_PIECES: -+ max_size = STORE_MAX_PIECES; -+ ratio = SET_RATIO (speed_p); -+ break; -+ case STORE_BY_PIECES: -+ max_size = STORE_MAX_PIECES; -+ ratio = get_move_ratio (speed_p); -+ break; -+ } -+ -+ return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio; -+} -+ -+bool - default_profile_before_prologue (void) - { - #ifdef PROFILE_BEFORE_PROLOGUE ---- a/src/gcc/targhooks.h -+++ b/src/gcc/targhooks.h -@@ -177,6 +177,11 @@ - extern int default_register_move_cost (enum machine_mode, reg_class_t, - reg_class_t); - -+extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, -+ unsigned int, -+ enum by_pieces_operation, -+ bool); -+ - extern bool default_profile_before_prologue (void); - extern reg_class_t default_preferred_reload_class (rtx, reg_class_t); - extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t); ---- a/src/gcc/cppbuiltin.c -+++ b/src/gcc/cppbuiltin.c -@@ -53,18 +53,41 @@ - *patchlevel = s_patchlevel; - } - -+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]" -+ to create Linaro release number YYYYMM and spin version. */ -+static void -+parse_linarover (int *release, int *spin) -+{ -+ static int s_year = -1, s_month, s_spin; - -+ if (s_year == -1) -+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3) -+ { -+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month); -+ s_spin = 0; -+ } -+ -+ if (release) -+ *release = s_year * 100 + s_month; -+ -+ if (spin) -+ *spin = s_spin; -+} -+ - /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */ - static void - define__GNUC__ (cpp_reader *pfile) - { -- int major, minor, patchlevel; -+ int major, minor, patchlevel, linaro_release, linaro_spin; - - parse_basever (&major, &minor, &patchlevel); -+ parse_linarover (&linaro_release, &linaro_spin); - cpp_define_formatted (pfile, "__GNUC__=%d", major); - cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor); - cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel); - cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string); -+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release); -+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin); - cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED); - cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST); - cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE); ---- a/src/gcc/tree-ssa-threadupdate.c -+++ b/src/gcc/tree-ssa-threadupdate.c -@@ -156,8 +156,9 @@ - bool registering) - { - fprintf (dump_file, -- " %s jump thread: (%d, %d) incoming edge; ", -+ " %s%s jump thread: (%d, %d) incoming edge; ", - (registering ? "Registering" : "Cancelling"), -+ (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""), - path[0]->e->src->index, path[0]->e->dest->index); - - for (unsigned int i = 1; i < path.length (); i++) -@@ -1622,6 +1623,155 @@ - return false; - } - -+/* Verify that the REGION is a Single Entry Multiple Exits region: make sure no -+ edge other than ENTRY is entering the REGION. */ -+ -+DEBUG_FUNCTION void -+verify_seme (edge entry, basic_block *region, unsigned n_region) -+{ -+ bitmap bbs = BITMAP_ALLOC (NULL); -+ -+ for (unsigned i = 0; i < n_region; i++) -+ bitmap_set_bit (bbs, region[i]->index); -+ -+ for (unsigned i = 0; i < n_region; i++) -+ { -+ edge e; -+ edge_iterator ei; -+ basic_block bb = region[i]; -+ -+ /* All predecessors other than ENTRY->src should be in the region. */ -+ for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei)) -+ if (e != entry) -+ gcc_assert (bitmap_bit_p (bbs, e->src->index)); -+ } -+ -+ BITMAP_FREE (bbs); -+} -+ -+/* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic -+ blocks). The ENTRY edge is redirected to the duplicate of the region. If -+ REGION is not a Single Entry region, ignore any incoming edges other than -+ ENTRY: this makes the copied region a Single Entry region. -+ -+ Remove the last conditional statement in the last basic block in the REGION, -+ and create a single fallthru edge pointing to the same destination as the -+ EXIT edge. -+ -+ The new basic blocks are stored to REGION_COPY in the same order as they had -+ in REGION, provided that REGION_COPY is not NULL. -+ -+ Returns false if it is unable to copy the region, true otherwise. */ -+ -+static bool -+duplicate_seme_region (edge entry, edge exit, -+ basic_block *region, unsigned n_region, -+ basic_block *region_copy) -+{ -+ unsigned i; -+ bool free_region_copy = false, copying_header = false; -+ struct loop *loop = entry->dest->loop_father; -+ edge exit_copy; -+ edge redirected; -+ int total_freq = 0, entry_freq = 0; -+ gcov_type total_count = 0, entry_count = 0; -+ -+ if (!can_copy_bbs_p (region, n_region)) -+ return false; -+ -+ /* Some sanity checking. Note that we do not check for all possible -+ missuses of the functions. I.e. if you ask to copy something weird, -+ it will work, but the state of structures probably will not be -+ correct. */ -+ for (i = 0; i < n_region; i++) -+ { -+ /* We do not handle subloops, i.e. all the blocks must belong to the -+ same loop. */ -+ if (region[i]->loop_father != loop) -+ return false; -+ } -+ -+ initialize_original_copy_tables (); -+ -+ if (copying_header) -+ set_loop_copy (loop, loop_outer (loop)); -+ else -+ set_loop_copy (loop, loop); -+ -+ if (!region_copy) -+ { -+ region_copy = XNEWVEC (basic_block, n_region); -+ free_region_copy = true; -+ } -+ -+ if (entry->dest->count) -+ { -+ total_count = entry->dest->count; -+ entry_count = entry->count; -+ /* Fix up corner cases, to avoid division by zero or creation of negative -+ frequencies. */ -+ if (entry_count > total_count) -+ entry_count = total_count; -+ } -+ else -+ { -+ total_freq = entry->dest->frequency; -+ entry_freq = EDGE_FREQUENCY (entry); -+ /* Fix up corner cases, to avoid division by zero or creation of negative -+ frequencies. */ -+ if (total_freq == 0) -+ total_freq = 1; -+ else if (entry_freq > total_freq) -+ entry_freq = total_freq; -+ } -+ -+ copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, -+ split_edge_bb_loc (entry), 0); -+ if (total_count) -+ { -+ scale_bbs_frequencies_gcov_type (region, n_region, -+ total_count - entry_count, -+ total_count); -+ scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count, -+ total_count); -+ } -+ else -+ { -+ scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq, -+ total_freq); -+ scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq); -+ } -+ -+#ifdef ENABLE_CHECKING -+ /* Make sure no edge other than ENTRY is entering the copied region. */ -+ verify_seme (entry, region_copy, n_region); -+#endif -+ -+ /* Remove the last branch in the jump thread path. */ -+ remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest); -+ edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU); -+ -+ if (e) { -+ rescan_loop_exit (e, true, false); -+ e->probability = REG_BR_PROB_BASE; -+ e->count = region_copy[n_region - 1]->count; -+ } -+ -+ /* Redirect the entry and add the phi node arguments. */ -+ redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest)); -+ gcc_assert (redirected != NULL); -+ flush_pending_stmts (entry); -+ -+ /* Add the other PHI node arguments. */ -+ add_phi_args_after_copy (region_copy, n_region, NULL); -+ -+ if (free_region_copy) -+ free (region_copy); -+ -+ free_original_copy_tables (); -+ return true; -+} -+ - /* Walk through all blocks and thread incoming edges to the appropriate - outgoing edge for each edge pair recorded in THREADED_EDGES. - -@@ -1651,6 +1801,57 @@ - threaded_blocks = BITMAP_ALLOC (NULL); - memset (&thread_stats, 0, sizeof (thread_stats)); - -+ /* Jump-thread all FSM threads before other jump-threads. */ -+ for (i = 0; i < paths.length ();) -+ { -+ vec<jump_thread_edge *> *path = paths[i]; -+ edge entry = (*path)[0]->e; -+ -+ if ((*path)[0]->type != EDGE_FSM_THREAD -+ /* Do not jump-thread twice from the same block. */ -+ || bitmap_bit_p (threaded_blocks, entry->src->index)) { -+ i++; -+ continue; -+ } -+ -+ unsigned len = path->length (); -+ edge exit = (*path)[len - 1]->e; -+ basic_block *region = XNEWVEC (basic_block, len - 1); -+ -+ for (unsigned int j = 0; j < len - 1; j++) -+ region[j] = (*path)[j]->e->dest; -+ -+ if (duplicate_seme_region (entry, exit, region, len - 1, NULL)) -+ { -+ /* We do not update dominance info. */ -+ free_dominance_info (CDI_DOMINATORS); -+ bitmap_set_bit (threaded_blocks, entry->src->index); -+ retval = true; -+ } -+ -+ delete_jump_thread_path (path); -+ paths.unordered_remove (i); -+ } -+ -+ /* Remove from PATHS all the jump-threads starting with an edge already -+ jump-threaded. */ -+ for (i = 0; i < paths.length ();) -+ { -+ vec<jump_thread_edge *> *path = paths[i]; -+ edge entry = (*path)[0]->e; -+ -+ /* Do not jump-thread twice from the same block. */ -+ if (bitmap_bit_p (threaded_blocks, entry->src->index)) -+ { -+ delete_jump_thread_path (path); -+ paths.unordered_remove (i); -+ } -+ else -+ i++; -+ } -+ -+ bitmap_clear (threaded_blocks); -+ - mark_threaded_blocks (threaded_blocks); - - initialize_original_copy_tables (); ---- a/src/gcc/tree-ssa-threadupdate.h -+++ b/src/gcc/tree-ssa-threadupdate.h -@@ -26,6 +26,7 @@ - enum jump_thread_edge_type - { - EDGE_START_JUMP_THREAD, -+ EDGE_FSM_THREAD, - EDGE_COPY_SRC_BLOCK, - EDGE_COPY_SRC_JOINER_BLOCK, - EDGE_NO_COPY_SRC_BLOCK ---- a/src/gcc/c-family/ChangeLog.linaro -+++ b/src/gcc/c-family/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/java/ChangeLog.linaro -+++ b/src/gcc/java/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/c/c-parser.c -+++ b/src/gcc/c/c-parser.c -@@ -4210,7 +4210,8 @@ - init.original_type = NULL; - c_parser_error (parser, "expected identifier"); - c_parser_skip_until_found (parser, CPP_COMMA, NULL); -- process_init_element (init, false, braced_init_obstack); -+ process_init_element (input_location, init, false, -+ braced_init_obstack); - return; - } - } -@@ -4342,7 +4343,8 @@ - init.original_type = NULL; - c_parser_error (parser, "expected %<=%>"); - c_parser_skip_until_found (parser, CPP_COMMA, NULL); -- process_init_element (init, false, braced_init_obstack); -+ process_init_element (input_location, init, false, -+ braced_init_obstack); - return; - } - } -@@ -4363,11 +4365,12 @@ - { - struct c_expr init; - gcc_assert (!after || c_dialect_objc ()); -+ location_t loc = c_parser_peek_token (parser)->location; -+ - if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) - init = c_parser_braced_init (parser, NULL_TREE, true); - else - { -- location_t loc = c_parser_peek_token (parser)->location; - init = c_parser_expr_no_commas (parser, after); - if (init.value != NULL_TREE - && TREE_CODE (init.value) != STRING_CST -@@ -4374,7 +4377,7 @@ - && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR) - init = convert_lvalue_to_rvalue (loc, init, true, true); - } -- process_init_element (init, false, braced_init_obstack); -+ process_init_element (loc, init, false, braced_init_obstack); - } - - /* Parse a compound statement (possibly a function body) (C90 6.6.2, ---- a/src/gcc/c/c-typeck.c -+++ b/src/gcc/c/c-typeck.c -@@ -102,8 +102,8 @@ - static char *print_spelling (char *); - static void warning_init (int, const char *); - static tree digest_init (location_t, tree, tree, tree, bool, bool, int); --static void output_init_element (tree, tree, bool, tree, tree, int, bool, -- struct obstack *); -+static void output_init_element (location_t, tree, tree, bool, tree, tree, int, -+ bool, struct obstack *); - static void output_pending_init_elements (int, struct obstack *); - static int set_designator (int, struct obstack *); - static void push_range_stack (tree, struct obstack *); -@@ -7187,13 +7187,15 @@ - if ((TREE_CODE (constructor_type) == RECORD_TYPE - || TREE_CODE (constructor_type) == UNION_TYPE) - && constructor_fields == 0) -- process_init_element (pop_init_level (1, braced_init_obstack), -+ process_init_element (input_location, -+ pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - else if (TREE_CODE (constructor_type) == ARRAY_TYPE - && constructor_max_index - && tree_int_cst_lt (constructor_max_index, - constructor_index)) -- process_init_element (pop_init_level (1, braced_init_obstack), -+ process_init_element (input_location, -+ pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - else - break; -@@ -7393,10 +7395,9 @@ - /* When we come to an explicit close brace, - pop any inner levels that didn't have explicit braces. */ - while (constructor_stack->implicit) -- { -- process_init_element (pop_init_level (1, braced_init_obstack), -- true, braced_init_obstack); -- } -+ process_init_element (input_location, -+ pop_init_level (1, braced_init_obstack), -+ true, braced_init_obstack); - gcc_assert (!constructor_range_stack); - } - -@@ -7574,10 +7575,9 @@ - /* Designator list starts at the level of closest explicit - braces. */ - while (constructor_stack->implicit) -- { -- process_init_element (pop_init_level (1, braced_init_obstack), -- true, braced_init_obstack); -- } -+ process_init_element (input_location, -+ pop_init_level (1, braced_init_obstack), -+ true, braced_init_obstack); - constructor_designated = 1; - return 0; - } -@@ -8197,9 +8197,9 @@ - existing initializer. */ - - static void --output_init_element (tree value, tree origtype, bool strict_string, tree type, -- tree field, int pending, bool implicit, -- struct obstack * braced_init_obstack) -+output_init_element (location_t loc, tree value, tree origtype, -+ bool strict_string, tree type, tree field, int pending, -+ bool implicit, struct obstack * braced_init_obstack) - { - tree semantic_type = NULL_TREE; - bool maybe_const = true; -@@ -8297,8 +8297,8 @@ - - if (semantic_type) - value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value); -- value = digest_init (input_location, type, value, origtype, npc, -- strict_string, require_constant_value); -+ value = digest_init (loc, type, value, origtype, npc, strict_string, -+ require_constant_value); - if (value == error_mark_node) - { - constructor_erroneous = 1; -@@ -8425,8 +8425,8 @@ - { - if (tree_int_cst_equal (elt->purpose, - constructor_unfilled_index)) -- output_init_element (elt->value, elt->origtype, true, -- TREE_TYPE (constructor_type), -+ output_init_element (input_location, elt->value, elt->origtype, -+ true, TREE_TYPE (constructor_type), - constructor_unfilled_index, 0, false, - braced_init_obstack); - else if (tree_int_cst_lt (constructor_unfilled_index, -@@ -8480,8 +8480,8 @@ - if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos)) - { - constructor_unfilled_fields = elt->purpose; -- output_init_element (elt->value, elt->origtype, true, -- TREE_TYPE (elt->purpose), -+ output_init_element (input_location, elt->value, elt->origtype, -+ true, TREE_TYPE (elt->purpose), - elt->purpose, 0, false, - braced_init_obstack); - } -@@ -8554,7 +8554,7 @@ - existing initializer. */ - - void --process_init_element (struct c_expr value, bool implicit, -+process_init_element (location_t loc, struct c_expr value, bool implicit, - struct obstack * braced_init_obstack) - { - tree orig_value = value.value; -@@ -8598,7 +8598,7 @@ - if ((TREE_CODE (constructor_type) == RECORD_TYPE - || TREE_CODE (constructor_type) == UNION_TYPE) - && constructor_fields == 0) -- process_init_element (pop_init_level (1, braced_init_obstack), -+ process_init_element (loc, pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - else if ((TREE_CODE (constructor_type) == ARRAY_TYPE - || TREE_CODE (constructor_type) == VECTOR_TYPE) -@@ -8605,7 +8605,7 @@ - && constructor_max_index - && tree_int_cst_lt (constructor_max_index, - constructor_index)) -- process_init_element (pop_init_level (1, braced_init_obstack), -+ process_init_element (loc, pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - else - break; -@@ -8683,7 +8683,7 @@ - if (value.value) - { - push_member_name (constructor_fields); -- output_init_element (value.value, value.original_type, -+ output_init_element (loc, value.value, value.original_type, - strict_string, fieldtype, - constructor_fields, 1, implicit, - braced_init_obstack); -@@ -8775,7 +8775,7 @@ - if (value.value) - { - push_member_name (constructor_fields); -- output_init_element (value.value, value.original_type, -+ output_init_element (loc, value.value, value.original_type, - strict_string, fieldtype, - constructor_fields, 1, implicit, - braced_init_obstack); -@@ -8827,7 +8827,7 @@ - if (value.value) - { - push_array_bounds (tree_to_uhwi (constructor_index)); -- output_init_element (value.value, value.original_type, -+ output_init_element (loc, value.value, value.original_type, - strict_string, elttype, - constructor_index, 1, implicit, - braced_init_obstack); -@@ -8862,7 +8862,7 @@ - { - if (TREE_CODE (value.value) == VECTOR_CST) - elttype = TYPE_MAIN_VARIANT (constructor_type); -- output_init_element (value.value, value.original_type, -+ output_init_element (loc, value.value, value.original_type, - strict_string, elttype, - constructor_index, 1, implicit, - braced_init_obstack); -@@ -8891,7 +8891,7 @@ - else - { - if (value.value) -- output_init_element (value.value, value.original_type, -+ output_init_element (loc, value.value, value.original_type, - strict_string, constructor_type, - NULL_TREE, 1, implicit, - braced_init_obstack); -@@ -8910,8 +8910,8 @@ - while (constructor_stack != range_stack->stack) - { - gcc_assert (constructor_stack->implicit); -- process_init_element (pop_init_level (1, -- braced_init_obstack), -+ process_init_element (loc, -+ pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - } - for (p = range_stack; -@@ -8919,7 +8919,8 @@ - p = p->prev) - { - gcc_assert (constructor_stack->implicit); -- process_init_element (pop_init_level (1, braced_init_obstack), -+ process_init_element (loc, -+ pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - } - ---- a/src/gcc/c/c-tree.h -+++ b/src/gcc/c/c-tree.h -@@ -612,7 +612,8 @@ - extern struct c_expr pop_init_level (int, struct obstack *); - extern void set_init_index (tree, tree, struct obstack *); - extern void set_init_label (tree, struct obstack *); --extern void process_init_element (struct c_expr, bool, struct obstack *); -+extern void process_init_element (location_t, struct c_expr, bool, -+ struct obstack *); - extern tree build_compound_literal (location_t, tree, tree, bool); - extern void check_compound_literal_type (location_t, struct c_type_name *); - extern tree c_start_case (location_t, location_t, tree); ---- a/src/gcc/c/ChangeLog.linaro -+++ b/src/gcc/c/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/target.def -+++ b/src/gcc/target.def -@@ -3039,6 +3039,43 @@ - int, (enum machine_mode mode, reg_class_t rclass, bool in), - default_memory_move_cost) - -+DEFHOOK -+(use_by_pieces_infrastructure_p, -+ "GCC will attempt several strategies when asked to copy between\n\ -+two areas of memory, or to set, clear or store to memory, for example\n\ -+when copying a @code{struct}. The @code{by_pieces} infrastructure\n\ -+implements such memory operations as a sequence of load, store or move\n\ -+insns. Alternate strategies are to expand the\n\ -+@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\ -+unit-by-unit, loop-based operations.\n\ -+\n\ -+This target hook should return true if, for a memory operation with a\n\ -+given @var{size} and @var{alignment}, using the @code{by_pieces}\n\ -+infrastructure is expected to result in better code generation.\n\ -+Both @var{size} and @var{alignment} are measured in terms of storage\n\ -+units.\n\ -+\n\ -+The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\ -+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\ -+These describe the type of memory operation under consideration.\n\ -+\n\ -+The parameter @var{speed_p} is true if the code is currently being\n\ -+optimized for speed rather than size.\n\ -+\n\ -+Returning true for higher values of @var{size} can improve code generation\n\ -+for speed if the target does not provide an implementation of the\n\ -+@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\ -+@code{setmem} implementation would be more expensive than a sequence of\n\ -+insns, or if the overhead of a library call would dominate that of\n\ -+the body of the memory operation.\n\ -+\n\ -+Returning true for higher values of @code{size} may also cause an increase\n\ -+in code size, for example where the number of insns emitted to perform a\n\ -+move would be greater than that of a library call.", -+ bool, (unsigned HOST_WIDE_INT size, unsigned int alignment, -+ enum by_pieces_operation op, bool speed_p), -+ default_use_by_pieces_infrastructure_p) -+ - /* True for MODE if the target expects that registers in this mode will - be allocated to registers in a small register class. The compiler is - allowed to use registers explicitly used in the rtl as spill registers ---- a/src/gcc/optabs.c -+++ b/src/gcc/optabs.c -@@ -4234,7 +4234,7 @@ - y = const0_rtx; - } - -- *pmode = word_mode; -+ *pmode = ret_mode; - prepare_cmp_insn (x, y, comparison, NULL_RTX, unsignedp, methods, - ptest, pmode); - } ---- a/src/gcc/defaults.h -+++ b/src/gcc/defaults.h -@@ -914,14 +914,6 @@ - #define PREFERRED_DEBUGGING_TYPE NO_DEBUG - #endif - --#ifndef LARGEST_EXPONENT_IS_NORMAL --#define LARGEST_EXPONENT_IS_NORMAL(SIZE) 0 --#endif -- --#ifndef ROUND_TOWARDS_ZERO --#define ROUND_TOWARDS_ZERO 0 --#endif -- - #ifndef FLOAT_LIB_COMPARE_RETURNS_BOOL - #define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) false - #endif -@@ -1065,6 +1057,15 @@ - #define MOVE_MAX_PIECES MOVE_MAX - #endif - -+/* STORE_MAX_PIECES is the number of bytes at a time that we can -+ store efficiently. Due to internal GCC limitations, this is -+ MOVE_MAX_PIECES limited by the number of bytes GCC can represent -+ for an immediate constant. */ -+ -+#ifndef STORE_MAX_PIECES -+#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT)) -+#endif -+ - #ifndef MAX_MOVE_MAX - #define MAX_MOVE_MAX MOVE_MAX - #endif ---- a/src/gcc/target.h -+++ b/src/gcc/target.h -@@ -78,6 +78,17 @@ - SWITCH_TYPE_LINE_END /* Please emit a line terminator. */ - }; - -+/* Types of memory operation understood by the "by_pieces" infrastructure. -+ Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook. */ -+ -+enum by_pieces_operation -+{ -+ CLEAR_BY_PIECES, -+ MOVE_BY_PIECES, -+ SET_BY_PIECES, -+ STORE_BY_PIECES -+}; -+ - typedef int (* print_switch_fn_type) (print_switch_type, const char *); - - /* An example implementation for ELF targets. Defined in varasm.c */ ---- a/src/gcc/configure -+++ b/src/gcc/configure -@@ -1686,7 +1686,8 @@ - use sysroot as the system root during the build - --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR - --with-specs=SPECS add SPECS to driver command-line processing -- --with-pkgversion=PKG Use PKG in the version string in place of "GCC" -+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro -+ GCC `cat $srcdir/LINARO-VERSION`" - --with-bugurl=URL Direct users to URL to report a bug - --with-multilib-list select multilibs (AArch64, SH and x86-64 only) - --with-gnu-ld assume the C compiler uses GNU ld default=no -@@ -7231,7 +7232,7 @@ - *) PKGVERSION="($withval) " ;; - esac - else -- PKGVERSION="(GCC) " -+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " - - fi - -@@ -17936,7 +17937,7 @@ - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 17939 "configure" -+#line 17940 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -18042,7 +18043,7 @@ - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 18045 "configure" -+#line 18046 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H ---- a/src/gcc/lra-eliminations.c -+++ b/src/gcc/lra-eliminations.c -@@ -1164,7 +1164,9 @@ - ep->from, ep->to); - /* If after processing RTL we decides that SP can be used as - a result of elimination, it can not be changed. */ -- gcc_assert (ep->to_rtx != stack_pointer_rtx); -+ gcc_assert ((ep->to_rtx != stack_pointer_rtx) -+ || (ep->from < FIRST_PSEUDO_REGISTER -+ && fixed_regs [ep->from])); - /* Mark that is not eliminable anymore. */ - elimination_map[ep->from] = NULL; - for (ep1 = ep + 1; ep1 < ®_eliminate[NUM_ELIMINABLE_REGS]; ep1++) ---- a/src/gcc/objc/ChangeLog.linaro -+++ b/src/gcc/objc/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/ChangeLog.linaro -+++ b/src/gcc/ChangeLog.linaro -@@ -0,0 +1,3211 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ * LINARO-VERSION: Update. -+ -+2015-01-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ Fix Linaro PR #902 -+ -+ Partial Backport from trunk r211798. -+ 2014-06-18 Radovan Obradovic <robradovic@mips.com> -+ Tom de Vries <tom@codesourcery.com> -+ -+ * config/arm/arm.c (arm_emit_call_insn): Add IP and CC clobbers to -+ CALL_INSN_FUNCTION_USAGE. -+ -+ Backport from trunk r209800. -+ 2014-04-25 Tom de Vries <tom@codesourcery.com> -+ -+ * expr.c (clobber_reg_mode): New function. -+ * expr.h (clobber_reg): New function. -+ -+2015-01-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211783. -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/arm.c (neon_vector_mem_operand): Allow register -+ POST_MODIFY for neon loads and stores. -+ (arm_print_operand): Output post-index register for neon loads and -+ stores. -+ -+2015-01-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r218451. -+ 2014-12-06 James Greenhalgh <james.greenhalgh@arm.com> -+ Sebastian Pop <s.pop@samsung.com> -+ Brian Rzycki <b.rzycki@samsung.com> -+ -+ PR tree-optimization/54742 -+ * params.def (max-fsm-thread-path-insns, max-fsm-thread-length, -+ max-fsm-thread-paths): New. -+ -+ * doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length, -+ max-fsm-thread-paths): Documented. -+ -+ * tree-cfg.c (split_edge_bb_loc): Export. -+ * tree-cfg.h (split_edge_bb_loc): Declared extern. -+ -+ * tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the -+ original value of cond when simplification fails. -+ (fsm_find_thread_path): New. -+ (fsm_find_control_statement_thread_paths): New. -+ (thread_through_normal_block): Call find_control_statement_thread_paths. -+ -+ * tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print -+ EDGE_FSM_THREAD. -+ (verify_seme): New. -+ (duplicate_seme_region): New. -+ (thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges -+ calling duplicate_seme_region. -+ -+ * tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD. -+ -+2015-01-13 Michael Collison <michael.collison@linaro.org> -+ -+ Backport from trunk r217394. -+ 2014-11-11 Andrew Pinski <apinski@cavium.com> -+ -+ Bug target/61997 -+ * config.gcc (aarch64*-*-*): Set target_gtfiles to include -+ aarch64-builtins.c. -+ * config/aarch64/aarch64-builtins.c: Include gt-aarch64-builtins.h -+ at the end of the file. -+ -+2015-01-13 Michael Collison <michael.collison@linaro.org> -+ -+ Backport from trunk r216267, r216547, r216548, r217072, r217192, r217405, -+ r217406, r217768. -+ 2014-11-19 Renlin Li <renlin.li@arm.com> -+ -+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP_FAST, -+ __ARM_FEATURE_FMA, __ARM_FP, __ARM_FEATURE_NUMERIC_MAXMIN, __ARM_NEON_FP. -+ -+ 2014-11-12 Tejas Belagod <tejas.belagod@arm.com> -+ -+ * Makefile.in (TEXI_GCC_FILES): Remove arm-acle-intrinsics.texi, -+ arm-neon-intrinsics.texi, aarch64-acle-intrinsics.texi. -+ * doc/aarch64-acle-intrinsics.texi: Remove. -+ * doc/arm-acle-intrinsics.texi: Remove. -+ * doc/arm-neon-intrinsics.texi: Remove. -+ * doc/extend.texi: Consolidate sections AArch64 intrinsics, -+ ARM NEON Intrinsics, ARM ACLE Intrinsics into one ARM C Language -+ Extension section. Add references to public ACLE specification. -+ -+ 2014-11-06 Renlin Li <renlin.li@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_architecture_version): New. -+ (processor): New architecture_version field. -+ (aarch64_override_options): Initialize aarch64_architecture_version. -+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_ARCH, -+ __ARM_ARCH_PROFILE, aarch64_arch_name macro. -+ -+ 2014-11-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix typo in definition -+ of __ARM_FEATURE_IDIV. -+ -+ 2014-10-22 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add missing '\'. -+ -+ 2014-10-22 Renlin Li <renlin.li@arm.com> -+ -+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define -+ __ARM_FEATURE_IDIV__. -+ -+ 2014-10-15 Renlin Li <renlin.li@arm.com> -+ -+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define -+ __ARM_BIG_ENDIAN, __ARM_SIZEOF_MINIMAL_ENUM. Add __ARM_64BIT_STATE, -+ __ARM_ARCH_ISA_A64, __ARM_FEATURE_CLZ, __ARM_FEATURE_IDIV, -+ __ARM_FEATURE_UNALIGNED, __ARM_PCS_AAPCS64, __ARM_SIZEOF_WCHAR_T. -+ -+2015-01-13 Michael Collison <michael.collison@linaro.org> -+ -+ Backport from trunk r211789, r211790, r211791, r211792, r211793, r211794, -+ r211795, r211796, r211797. -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.c (__gnu_uldivmod_helper): Remove. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi-v6m.S (__aeabi_uldivmod): Perform division using -+ __udivmoddi4. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_ldivmod, __aeabi_uldivmod, -+ push_for_divide, pop_for_divide): Use .cfi_* directives for DWARF -+ annotations. Fix DWARF information. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_ldivmod): Perform division using -+ __udivmoddi4, and fixups for negative operands. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_ldivmod): Optimise stack manipulation. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_uldivmod): Perform division using call -+ to __udivmoddi4. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_uldivmod): Optimise stack pointer -+ manipulation. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_uldivmod, __aeabi_ldivmod): Add comment -+ describing register usage on function entry and exit. -+ -+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org> -+ -+ * config/arm/bpabi.S (__aeabi_uldivmod): Fix whitespace. -+ (__aeabi_ldivmod): Fix whitespace. -+ -+2015-01-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217593. -+ 2014-11-14 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64-cores.def (thunderx): Change the scheduler -+ over to thunderx. -+ * config/aarch64/aarch64.md: Include thunderx.md. -+ (generic_sched): Set to no for thunderx. -+ * config/aarch64/thunderx.md: New file. -+ -+2015-01-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217717. -+ 2014-11-18 Felix Yang <felix.yang@huawei.com> -+ -+ * config/aarch64/aarch64.c (doloop_end): New pattern. -+ * config/aarch64/aarch64.md (TARGET_CAN_USE_DOLOOP_P): Implement. -+ -+2015-01-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217661. -+ 2014-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64-cores.def (cortex-a53): Remove -+ AARCH64_FL_CRYPTO from feature flags. -+ (cortex-a57): Likewise. -+ (cortex-a57.cortex-a53): Likewise. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r218319. -+ 2014-12-03 Andrew Stubbs <ams@codesourcery.com> -+ -+ Revert: -+ -+ 2014-09-17 Andrew Stubbs <ams@codesourcery.com> -+ -+ * config/arm/arm.c (arm_option_override): Reject -mfpu=neon -+ when architecture is older than ARMv7. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217691. -+ 2014-11-18 Jiong Wang <jiong.wang@arm.com> -+ -+ * lra-eliminations.c (update_reg_eliminate): Relax gcc_assert for fixed -+ registers. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215503. -+ 2014-09-23 Wilco Dijkstra <wdijkstr@arm.com> -+ -+ * common/config/aarch64/aarch64-common.c: -+ (default_options aarch_option_optimization_table): -+ Default to -fsched-pressure. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211132. -+ 2014-06-02 Tom de Vries <tom@codesourcery.com> -+ -+ * config/aarch64/aarch64.c (aarch64_float_const_representable_p): Handle -+ case that x has VOIDmode. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209620. -+ 2014-04-22 Vidya Praveen <vidyapraveen@arm.com> -+ -+ * aarch64.md (float<GPI:mode><GPF:mode>2): Remove. -+ (floatuns<GPI:mode><GPF:mode>2): Remove. -+ (<optab><fcvt_target><GPF:mode>2): New pattern for equal width float -+ and floatuns conversions. -+ (<optab><fcvt_iesize><GPF:mode>2): New pattern for inequal width float -+ and floatuns conversions. -+ * iterators.md (fcvt_target, FCVT_TARGET): Support SF and DF modes. -+ (w1,w2): New mode attributes for inequal width conversions. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217362, r217546. -+ 2014-11-14 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ PR target/63724 -+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out -+ numerical immediate handling to... -+ (aarch64_internal_mov_immediate): ...this. New. -+ (aarch64_rtx_costs): Use aarch64_internal_mov_immediate. -+ (aarch64_mov_operand_p): Relax predicate. -+ * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs. -+ (*movsi_aarch64): Turn into define_insn_and_split and new alternative -+ for 'n'. -+ (*movdi_aarch64): Likewise. -+ -+ 2014-11-11 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-simd.md -+ (aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize. -+ (aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we -+ are punning between float vectors and integer vectors. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ * LINARO-VERSION: Update. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217079, r217080. -+ 2014-11-04 Alan Lawrence <alan.lawrence@arm.com> -+ -+ config/arm/neon.md (reduc_smin_<mode> *2): Rename to... -+ (reduc_smin_scal_<mode> *2): ...this; extract scalar result. -+ (reduc_smax_<mode> *2): Rename to... -+ (reduc_smax_scal_<mode> *2): ...this; extract scalar result. -+ (reduc_umin_<mode> *2): Rename to... -+ (reduc_umin_scal_<mode> *2): ...this; extract scalar result. -+ (reduc_umax_<mode> *2): Rename to... -+ (reduc_umax_scal_<mode> *2): ...this; extract scalar result. -+ -+ 2014-11-04 Alan Lawrence <alan.lawrence@arm.com> -+ -+ config/arm/neon.md (reduc_plus_*): Rename to... -+ (reduc_plus_scal_*): ...this; reduce to temp and extract scalar result. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Fix Backport from trunk r216524 (committed at r218379). -+ Add missing file: config/aarch64/aarch64-cost-tables.h -+ -+ * config/aarch64/aarch64-cost-tables.h: New file. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217076. -+ 2014-11-04 Michael Collison <michael.collison@linaro.org> -+ -+ * config/aarch64/iterators.md (lconst_atomic): New mode attribute -+ to support constraints for CONST_INT in atomic operations. -+ * config/aarch64/atomics.md -+ (atomic_<atomic_optab><mode>): Use lconst_atomic constraint. -+ (atomic_nand<mode>): Likewise. -+ (atomic_fetch_<atomic_optab><mode>): Likewise. -+ (atomic_fetch_nand<mode>): Likewise. -+ (atomic_<atomic_optab>_fetch<mode>): Likewise. -+ (atomic_nand_fetch<mode>): Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217026. -+ 2014-11-03 Zhenqiang Chen <zhenqiang.chen@arm.com> -+ -+ * ifcvt.c (noce_emit_cmove, noce_get_alt_condition, noce_get_condition): -+ Allow CC mode if HAVE_cbranchcc4. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217014. -+ 2014-11-02 Michael Collison <michael.collison@linaro.org> -+ -+ * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO) : Update -+ to support vector modes. -+ (CTZ_DEFINED_VALUE_AT_ZERO): Ditto. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216996, r216998, r216999, r217001, r217002, r217003, -+ r217004, r217742. -+ 2014-11-18 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ PR target/63937 -+ * target.def (use_by_pieces_infrastructure_p): Take unsigned -+ HOST_WIDE_INT as the size parameter. -+ * targhooks.c (default_use_by_pieces_infrastructure_p): Likewise. -+ * targhooks.h (default_use_by_pieces_infrastructure_p): Likewise. -+ * config/arc/arc.c (arc_use_by_pieces_infrastructure_p)): Likewise. -+ * config/mips/mips.c (mips_use_by_pieces_infrastructure_p)): Likewise. -+ * config/s390/s390.c (s390_use_by_pieces_infrastructure_p)): Likewise. -+ * config/sh/sh.c (sh_use_by_pieces_infrastructure_p)): Likewise. -+ * config/aarch64/aarch64.c -+ (aarch64_use_by_pieces_infrastructure_p)): Likewise. -+ * doc/tm.texi: Regenerate. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * doc/tm.texi.in (MOVE_BY_PIECES_P): Remove. -+ (CLEAR_BY_PIECES_P): Likewise. -+ (SET_BY_PIECES_P): Likewise. -+ (STORE_BY_PIECES_P): Likewise. -+ * doc/tm.texi: Regenerate. -+ * system.h: Poison MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P, -+ SET_BY_PIECES_P, STORE_BY_PIECES_P. -+ * expr.c (MOVE_BY_PIECES_P): Remove. -+ (CLEAR_BY_PIECES_P): Likewise. -+ (SET_BY_PIECES_P): Likewise. -+ (STORE_BY_PIECES_P): Likewise. -+ (can_move_by_pieces): Rewrite in terms of -+ targetm.use_by_pieces_infrastructure_p. -+ (emit_block_move_hints): Likewise. -+ (can_store_by_pieces): Likewise. -+ (store_by_pieces): Likewise. -+ (clear_storage_hints): Likewise. -+ (emit_push_insn): Likewise. -+ (expand_constructor): Likewise. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_use_by_pieces_infrastructre_p): New. -+ (TARGET_USE_BY_PIECES_INFRASTRUCTURE): Likewise. -+ * config/aarch64/aarch64.h (STORE_BY_PIECES_P): Delete. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/mips/mips.h (MOVE_BY_PIECES_P): Remove. -+ (STORE_BY_PIECES_P): Likewise. -+ * config/mips/mips.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New. -+ (mips_move_by_pieces_p): Rename to... -+ (mips_use_by_pieces_infrastructure_p): ...this, use new hook -+ parameters, use the default hook implementation as a -+ fall-back. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/sh/sh.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New. -+ (sh_use_by_pieces_infrastructure_p): Likewise. -+ * config/sh/sh.h (MOVE_BY_PIECES_P): Remove. -+ (STORE_BY_PIECES_P): Likewise. -+ (SET_BY_PIECES_P): Likewise. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/arc/arc.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New. -+ (arc_use_by_pieces_infrastructure_p): Likewise. -+ * confir/arc/arc.h (MOVE_BY_PIECES_P): Delete. -+ (CAN_MOVE_BY_PIECES): Likewise. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/s390/s390.c (s390_use_by_pieces_infrastructure_p): New. -+ (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise. -+ * config/s390/s390.h (MOVE_BY_PIECES_P): Remove. -+ (CLEAR_BY_PIECES): Likewise. -+ (SET_BY_PIECES): Likewise. -+ (STORE_BY_PIECES): Likewise. -+ -+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * target.def (use_by_pieces_infrastructure_p): New. -+ * doc/tm.texi.in (MOVE_BY_PIECES_P): Describe that this macro -+ is deprecated. -+ (STORE_BY_PIECES_P): Likewise. -+ (CLEAR_BY_PIECES_P): Likewise. -+ (SET_BY_PIECES_P): Likewise. -+ (TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook. -+ * doc/tm.texi: Regenerate. -+ * expr.c (MOVE_BY_PIECES_P): Rewrite in terms of -+ TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. -+ (STORE_BY_PIECES_P): Likewise. -+ (CLEAR_BY_PIECES_P): Likewise. -+ (SET_BY_PIECES_P): Likewise. -+ (STORE_MAX_PIECES): Move to... -+ * defaults.h (STORE_MAX_PIECES): ...here. -+ * targhooks.c (get_move_ratio): New. -+ (default_use_by_pieces_infrastructure_p): Likewise. -+ * targhooks.h (default_use_by_pieces_infrastructure_p): New. -+ * target.h (by_pieces_operation): New. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216765. -+ 2014-10-27 Jiong Wang <jiong.wang@arm.com> -+ -+ PR target/63442 -+ * optabs.c (prepare_cmp_insn): Use "ret_mode" instead of "word_mode". -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216630. -+ 2014-10-24 Felix Yang <felix.yang@huawei.com> -+ Jiji Jiang <jiangjiji@huawei.com> -+ -+ PR target/63173 -+ * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro. -+ (__LD3R_FUNC): Ditto. -+ (__LD4R_FUNC): Ditto. -+ (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64, -+ vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16 -+ vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, -+ vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, -+ vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 -+ vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions. -+ (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 -+ vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 -+ vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32 -+ vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 -+ vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 -+ vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise. -+ (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 -+ vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 -+ vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 -+ vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 -+ vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 -+ vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise. -+ * config/aarch64/aarch64.md (define_c_enum "unspec"): Add -+ UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP. -+ * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New -+ builtins. -+ * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern. -+ (aarch64_simd_ld3r<mode>): Likewise. -+ (aarch64_simd_ld4r<mode>): Likewise. -+ (aarch64_ld2r<mode>): New expand. -+ (aarch64_ld3r<mode>): Likewise. -+ (aarch64_ld4r<mode>): Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217971. -+ 2014-11-22 Uros Bizjak <ubizjak@gmail.com> -+ -+ * params.def (PARAM_MAX_COMPLETELY_PEELED_INSNS): Increase to 200. -+ * config/i386/i386.c (ix86_option_override_internal): Do not increase -+ PARAM_MAX_COMPLETELY_PEELED_INSNS. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216524. -+ 2014-10-21 Andrew Pinski <apinski@cavium.com> -+ -+ * doc/invoke.texi (AARCH64/mtune): Document thunderx as an -+ available option also. -+ * config/aarch64/aarch64-cost-tables.h: New file. -+ * config/aarch64/aarch64-cores.def (thunderx): New core. -+ * config/aarch64/aarch64-tune.md: Regenerate. -+ * config/aarch64/aarch64.c: Include aarch64-cost-tables.h instead -+ of config/arm/aarch-cost-tables.h. -+ (thunderx_regmove_cost): New variable. -+ (thunderx_tunings): New variable. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216336. -+ 2014-10-16 Richard Earnshaw <rearnsha@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_legitimize_address): New function. -+ (TARGET_LEGITIMIZE_ADDRESS): Redefine. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216253. -+ 2014-10-15 Renlin Li <renlin.li@arm.com> -+ -+ * config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215711. -+ 2014-09-30 Terry Guo <terry.guo@arm.com> -+ -+ * config/arm/arm-cores.def (cortex-m7): New core name. -+ * config/arm/arm-fpus.def (fpv5-sp-d16): New fpu name. -+ (fpv5-d16): Ditto. -+ * config/arm/arm-tables.opt: Regenerated. -+ * config/arm/arm-tune.md: Regenerated. -+ * config/arm/arm.h (TARGET_VFP5): New macro. -+ * config/arm/bpabi.h (BE8_LINK_SPEC): Include cortex-m7. -+ * config/arm/vfp.md (<vrint_pattern><SDF:mode>2, -+ smax<mode>3, smin<mode>3): Enabled for FPU FPv5. -+ * doc/invoke.texi: Document new cpu and fpu names. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215707, r215842. -+ 2014-10-03 David Sherwood <david.sherwood@arm.com> -+ -+ * ira-int.h (ira_allocno): Mark hard_regno as signed. -+ -+ 2014-09-30 David Sherwood <david.sherwood@arm.com> -+ -+ * ira-int.h (ira_allocno): Add "wmode" field. -+ * ira-build.c (create_insn_allocnos): Add new "parent" function -+ parameter. -+ * ira-conflicts.c (ira_build_conflicts): Add conflicts for registers -+ that cannot be accessed in wmode. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215540. -+ 2014-09-24 Zhenqiang Chen <zhenqiang.chen@arm.com> -+ -+ PR rtl-optimization/63210 -+ * ira-color.c (assign_hard_reg): Ignore conflict cost if the -+ HARD_REGNO is not available for CONFLICT_A. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215046. -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/61749 -+ * config/aarch64/aarch64-builtins.c (aarch64_types_quadop_qualifiers): -+ Use qualifier_immediate for last operand. Rename to... -+ (aarch64_types_ternop_lane_qualifiers): ... This. -+ (TYPES_QUADOP): Rename to... -+ (TYPES_TERNOP_LANE): ... This. -+ (aarch64_simd_expand_args): Return const0_rtx when encountering user -+ error. Change return of 0 to return of NULL_RTX. -+ (aarch64_crc32_expand_builtin): Likewise. -+ (aarch64_expand_builtin): Return NULL_RTX instead of 0. -+ ICE when expanding unknown builtin. -+ * config/aarch64/aarch64-simd-builtins.def (sqdmlal_lane): Use -+ TERNOP_LANE qualifiers. -+ (sqdmlsl_lane): Likewise. -+ (sqdmlal_laneq): Likewise. -+ (sqdmlsl_laneq): Likewise. -+ (sqdmlal2_lane): Likewise. -+ (sqdmlsl2_lane): Likewise. -+ (sqdmlal2_laneq): Likewise. -+ (sqdmlsl2_laneq): Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215013. -+ 2014-09-08 Joseph Myers <joseph@codesourcery.com> -+ -+ * defaults.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO): -+ Remove. -+ * doc/tm.texi.in (ROUND_TOWARDS_ZERO, LARGEST_EXPONENT_IS_NORMAL): -+ Remove. -+ * doc/tm.texi: Regenerate. -+ * system.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO): -+ Poison. -+ * config/arm/arm.h (LARGEST_EXPONENT_IS_NORMAL): Remove. -+ * config/cris/cris.h (__make_dp): Remove. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214952. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/arm_neon.h (__GET_HIGH): New macro. -+ (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16, -+ vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64, -+ vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64): -+ Remove temporary __asm__ and reimplement. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214948, r214949. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code -+ handling cmge, cmgt, cmeq, cmtst. -+ -+ * config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle, -+ cmlt, cmgeu, cmgtu, cmtst): Remove. -+ -+ * config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*, -+ vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*, -+ vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*, -+ vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions. -+ -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers, -+ TYPES_TST): Define. -+ (aarch64_fold_builtin): Update pattern for cmtst. -+ -+ * config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p): -+ Declare. -+ -+ * config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers. -+ -+ * config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>): -+ Switch operands, separate out more cases, refactor. -+ -+ (aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1). -+ -+ * config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single -+ argument; rename old version to... -+ (aarch64_const_vec_all_same_in_range_p): ...this. -+ (aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming. -+ -+ * config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214008. -+ 2014-08-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Move -+ one_match > zero_match case to just before simple_sequence. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213382. -+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/arm_neon.h (vpadd_<suf><8,16,32,64>): Move to -+ correct alphabetical position. -+ (vpaddd_f64): Rewrite using builtins. -+ (vpaddd_s64): Move to correct alphabetical position. -+ (vpaddd_u64): New. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210735, r215206, r215207, r215208. -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table -+ for A57. -+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP -+ cost to spilling from integer to FP registers. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register -+ move handling. -+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves -+ are now handled correctly. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost -+ handling of CALLER_SAVE_REGS and POINTER_REGS. -+ -+ 2014-05-22 Kugan Vivekanandarajah <kuganv@linaro.org> -+ -+ * config/aarch64/aarch64.c (aarch64_regno_regclass) : Change CORE_REGS -+ to GENERAL_REGS. -+ (aarch64_secondary_reload) : LikeWise. -+ (aarch64_class_max_nregs) : Remove CORE_REGS. -+ * config/aarch64/aarch64.h (enum reg_class) : Remove CORE_REGS. -+ (REG_CLASS_NAMES) : Likewise. -+ (REG_CLASS_CONTENTS) : LikeWise. -+ (INDEX_REG_CLASS) : Change CORE_REGS to GENERAL_REGS. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ * LINARO-VERSION: Update. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ Add Linaro release macros (Linaro only patch.) -+ -+ * Makefile.in (LINAROVER, LINAROVER_C, LINAROVER_S): Define. -+ (CFLAGS-cppbuiltin.o): Add LINAROVER macro definition. -+ (cppbuiltin.o): Depend on $(LINAROVER). -+ * cppbuiltin.c (parse_linarover): New. -+ (define_GNUC__): Define __LINARO_RELEASE__ and __LINARO_SPIN__ macros. -+ -+2014-11-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216229, r216230. -+ 2014-10-14 Andrew Pinski <apinski@cavium.com> -+ -+ * explow.c (convert_memory_address_addr_space): Rename to ... -+ (convert_memory_address_addr_space_1): This. Add in_const argument. -+ Inside a CONST RTL, permute the conversion and addition of constant -+ for zero and sign extended pointers. -+ (convert_memory_address_addr_space): New function. -+ -+ 2014-10-14 Andrew Pinski <apinski@cavium.com> -+ -+ Revert: -+ 2011-08-19 H.J. Lu <hongjiu.lu@intel.com> -+ -+ PR middle-end/49721 -+ * explow.c (convert_memory_address_addr_space): Also permute the -+ conversion and addition of constant for zero-extend. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ * LINARO-VERSION: Update. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ * LINARO-VERSION: Update. -+ -+2014-10-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ Revert: -+ 2014-10-08 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215206, r215207, r215208. -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table -+ for A57. -+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP -+ cost to spilling from integer to FP registers. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register -+ move handling. -+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves -+ are now handled correctly. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost -+ handling of CALLER_SAVE_REGS and POINTER_REGS. -+ -+2014-10-08 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214825, r214826. -+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/62275 -+ * config/arm/neon.md -+ (neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode> -+ <v_cmp_result>): New pattern. -+ * config/arm/iterators.md (NEON_VCVT): New int iterator. -+ * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf, -+ vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf, -+ vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions. -+ * config/arm/arm.c (arm_builtin_vectorized_function): Handle -+ BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF. -+ -+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/62275 -+ * config/arm/iterators.md (FIXUORS): New code iterator. -+ (VCVT): New int iterator. -+ (su_optab): New code attribute. -+ (su): Likewise. -+ * config/arm/vfp.md (l<vrint_pattern><su_optab><mode>si2): New pattern. -+ -+2014-10-08 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215471. -+ 2014-09-22 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/geniterators.sh: New. -+ * config/aarch64/iterators.md (VDQF_DF): New. -+ * config/aarch64/t-aarch64: Generate aarch64-builtin-iterators.h. -+ * config/aarch64/aarch64-builtins.c (BUILTIN_*) Remove. -+ -+2014-10-08 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215206, r215207, r215208. -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table -+ for A57. -+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP -+ cost to spilling from integer to FP registers. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register -+ move handling. -+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves -+ are now handled correctly. -+ -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost -+ handling of CALLER_SAVE_REGS and POINTER_REGS. -+ -+2014-10-07 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214824. -+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/predicates.md (aarch64_comparison_operation): -+ New special predicate. -+ * config/aarch64/aarch64.md (*csinc2<mode>_insn): Use -+ aarch64_comparison_operation instead of matching an operator. -+ Update operand numbers. -+ (csinc3<mode>_insn): Likewise. -+ (*csinv3<mode>_insn): Likewise. -+ (*csneg3<mode>_insn): Likewise. -+ (ffs<mode>2): Update gen_csinc3<mode>_insn callsite. -+ * config/aarch64/aarch64.c (aarch64_get_condition_code): -+ Return -1 instead of aborting on invalid condition codes. -+ (aarch64_print_operand): Update aarch64_get_condition_code callsites -+ to assert that the returned condition code is valid. -+ * config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export. -+ -+2014-10-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ Backport from trunk r209643, r211881. -+ 2014-06-22 Richard Henderson <rth@redhat.com> -+ -+ PR target/61565 -+ * compare-elim.c (struct comparison): Add eh_note. -+ (find_comparison_dom_walker::before_dom_children): Don't eliminate -+ a redundant comparison in a different EH region. Purge EH edges if -+ necessary. -+ -+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. -+ -+2014-10-06 Charles Baylis <charles.baylis@linaro.org> -+ -+ Backport from trunk r214945. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Replace -+ varargs with pointer parameter. -+ (aarch64_simd_expand_builtin): pass pointer into previous. -+ -+2014-10-06 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> -+ -+ Backport from trunk r214944. -+ 2014-09-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/cortex-a53.md (cortex_a53_alu_shift): Add alu_ext, -+ alus_ext. -+ -+2014-10-06 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ Backport from trunk r214943. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern. -+ * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin. -+ * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8): -+ Replace temporary asm with call to builtin. -+ (vrbit_p8, vrbitq_p8): New functions. -+ -+2014-10-06 Michael Collison <michael.collison@linaro.org> -+ -+ Backport from trunk r214886. -+ 2014-09-03 Richard Henderson <rth@redhat.com> -+ -+ * config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove. -+ (aarch64_popwb_pair_reg): Remove. -+ (aarch64_set_frame_expr): Remove. -+ (aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with -+ the restore ops performed by the insns generated. -+ (aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation -+ insn. Perform the calls_eh_return addition later; do not attempt to -+ preserve the CFA in that case. Don't use aarch64_set_frame_expr. -+ (aarch64_expand_prologue): Use REG_CFA_ADJUST_CFA directly, or no -+ special markup at all. Load cfun->machine->frame.hard_fp_offset -+ into a local variable. -+ (aarch64_frame_pointer_required): Don't check calls_alloca. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215385. -+ 2014-09-19 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.md (stack_protect_test_<mode>): Mark -+ scratch register as written. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215346. -+ 2014-09-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/neon.md (*movmisalign<mode>_neon_load): Change type -+ to neon_load1_1reg<q>. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215321. -+ 2014-09-17 Andrew Stubbs <ams@codesourcery.com> -+ -+ * config/arm/arm.c (arm_option_override): Reject -mfpu=neon -+ when architecture is older than ARMv7. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215260. -+ 2014-09-14 David Sherwood <david.sherwood@arm.com> -+ -+ * gcc.target/aarch64/vdup_lane_2.c (force_simd): Emit simd mov. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215205. -+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com> -+ -+ * gcc/ree.c (combine_reaching_defs): Ensure inserted copy don't change -+ the number of hard registers. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215136. -+ 2014-09-10 Xinliang David Li <davidxl@google.com> -+ -+ PR target/63209 -+ * config/arm/arm.md (movcond_addsi): Handle case where source -+ and target operands are the same. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215086. -+ 2014-09-09 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/aarch64/aarch64-elf-raw.h (ENDFILE_SPEC): Add crtfastmath.o. -+ * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_MATH_ENDFILE_SPEC): -+ Define. -+ (ENDFILE_SPEC): Define and use GNU_USER_TARGET_MATH_ENDFILE_SPEC. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215067. -+ 2014-09-09 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/arm/arm.c (NEON_COPYSIGNF): New enum. -+ (arm_init_neon_builtins): Support NEON_COPYSIGNF. -+ (arm_builtin_vectorized_function): Likewise. -+ * config/arm/arm_neon_builtins.def: New macro for copysignf. -+ * config/arm/neon.md (neon_copysignf<mode>): New pattern for vector -+ copysignf. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215050, r215051, r215052, r215053, r215054, -+ r215055, r215056. -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.md (vfp_pop_multiple_with_writeback): Use vldm -+ mnemonic instead of fldmfdd. -+ * config/arm/arm.c (vfp_output_fstmd): Rename to... -+ (vfp_output_vstmd): ... This. Convert output to UAL syntax. -+ Output vpush when address register is SP. -+ * config/arm/arm-protos.h (vfp_output_fstmd): Rename to... -+ (vfp_output_vstmd): ... This. -+ * config/arm/vfp.md (push_multi_vfp): Update call to -+ vfp_output_vstmd. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/vfp.md (*movcc_vfp): Use UAL syntax. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/vfp.md (*sqrtsf2_vfp): Use UAL assembly syntax. -+ (*sqrtdf2_vfp): Likewise. -+ (*cmpsf_vfp): Likewise. -+ (*cmpsf_trap_vfp): Likewise. -+ (*cmpdf_vfp): Likewise. -+ (*cmpdf_trap_vfp): Likewise. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/vfp.md (*extendsfdf2_vfp): Use UAL assembly syntax. -+ (*truncdfsf2_vfp): Likewise. -+ (*truncsisf2_vfp): Likewise. -+ (*truncsidf2_vfp): Likewise. -+ (fixuns_truncsfsi2): Likewise. -+ (fixuns_truncdfsi2): Likewise. -+ (*floatsisf2_vfp): Likewise. -+ (*floatsidf2_vfp): Likewise. -+ (floatunssisf2): Likewise. -+ (floatunssidf2): Likewise. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/vfp.md (*mulsf3_vfp): Use UAL assembly syntax. -+ (*muldf3_vfp): Likewise. -+ (*mulsf3negsf_vfp): Likewise. -+ (*muldf3negdf_vfp): Likewise. -+ (*mulsf3addsf_vfp): Likewise. -+ (*muldf3adddf_vfp): Likewise. -+ (*mulsf3subsf_vfp): Likewise. -+ (*muldf3subdf_vfp): Likewise. -+ (*mulsf3negsfaddsf_vfp): Likewise. -+ (*fmuldf3negdfadddf_vfp): Likewise. -+ (*mulsf3negsfsubsf_vfp): Likewise. -+ (*muldf3negdfsubdf_vfp): Likewise. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/vfp.md (*abssf2_vfp): Use UAL assembly syntax. -+ (*absdf2_vfp): Likewise. -+ (*negsf2_vfp): Likewise. -+ (*negdf2_vfp): Likewise. -+ (*addsf3_vfp): Likewise. -+ (*adddf3_vfp): Likewise. -+ (*subsf3_vfp): Likewise. -+ (*subdf3_vfp): Likewise. -+ (*divsf3_vfp): Likewise. -+ (*divdf3_vfp): Likewise. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (output_move_vfp): Use UAL syntax for load/store -+ multiple. -+ (arm_print_operand): Don't convert real values to decimal -+ representation in default case. -+ (fp_immediate_constant): Delete. -+ * config/arm/arm-protos.h (fp_immediate_constant): Likewise. -+ * config/arm/vfp.md (*arm_movsi_vfp): Convert to VFP moves to UAL -+ syntax. -+ (*thumb2_movsi_vfp): Likewise. -+ (*movdi_vfp): Likewise. -+ (*movdi_vfp_cortexa8): Likewise. -+ (*movhf_vfp_neon): Likewise. -+ (*movhf_vfp): Likewise. -+ (*movsf_vfp): Likewise. -+ (*thumb2_movsf_vfp): Likewise. -+ (*movdf_vfp): Likewise. -+ (*thumb2_movdf_vfp): Likewise. -+ (*movsfcc_vfp): Likewise. -+ (*thumb2_movsfcc_vfp): Likewise. -+ (*movdfcc_vfp): Likewise. -+ (*thumb2_movdfcc_vfp): Likewise. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214959. -+ 2014-09-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/cortex-a53.md (cortex_a53_fpalu): Add f_rints, f_rintd, -+ f_minmaxs, f_minmaxd types. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214947. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): -+ Remove qualifier_const_pointer, update comment. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214940. -+ 2014-09-05 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.md (sibcall_value_insn): Give operand 1 -+ DImode. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213090. -+ 2014-07-26 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64.md (*extr_insv_lower_reg<mode>): Remove + -+ from the read only register. -+ -+2014-09-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ * LINARO-VERSION: Update. -+ -+2014-09-09 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ Backport from trunk r215004. -+ 2014-09-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ PR target/63190 -+ * config/aarch64/aarch64.md (stack_protect_test_<mode>) Add register -+ constraint for operand0 and remove write only modifier from operand3. -+ -+2014-09-09 Michael Collison <michael.collison@linaro.org> -+ -+ Backport from trunk r212178 -+ 2014-06-30 Joseph Myers <joseph@codesourcery.com> -+ -+ * var-tracking.c (add_stores): Return instead of asserting if old -+ and new values for conditional store are the same. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Revert: -+ 2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213712. -+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.md (absdi2): Set simd attribute. -+ (aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT. -+ (aarch64_movdi_<mode>high): Likewise. -+ (aarch64_mov<mode>high_di): Likewise. -+ (aarch64_movdi_<mode>low): Likewise. -+ (aarch64_mov<mode>low_di): Likewise. -+ (aarch64_movtilow_tilow): Likewise. -+ Add comment explaining usage of fp,simd attributes and of -+ TARGET_FLOAT and TARGET_SIMD. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213712. -+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.md (absdi2): Set simd attribute. -+ (aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT. -+ (aarch64_movdi_<mode>high): Likewise. -+ (aarch64_mov<mode>high_di): Likewise. -+ (aarch64_movdi_<mode>low): Likewise. -+ (aarch64_mov<mode>low_di): Likewise. -+ (aarch64_movtilow_tilow): Likewise. -+ Add comment explaining usage of fp,simd attributes and of -+ TARGET_FLOAT and TARGET_SIMD. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214526. -+ 2014-08-26 Joseph Myers <joseph@codesourcery.com> -+ -+ PR target/60606 -+ PR target/61330 -+ * varasm.c (make_decl_rtl): Clear DECL_ASSEMBLER_NAME and -+ DECL_HARD_REGISTER and return for invalid register specifications. -+ * cfgexpand.c (expand_one_var): If expand_one_hard_reg_var clears -+ DECL_HARD_REGISTER, call expand_one_error_var. -+ * config/arm/arm.c (arm_hard_regno_mode_ok): Do not allow -+ CC_REGNUM with non-MODE_CC modes. -+ (arm_regno_class): Return NO_REGS for PC_REGNUM. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214503. -+ 2014-08-26 Evandro Menezes <e.menezes@samsung.com> -+ -+ * config/arm/aarch64/aarch64.c (generic_addrcost_table): Delete -+ qi cost; add di cost. -+ (cortexa57_addrcost_table): Likewise. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213659. -+ 2014-08-06 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_evpc_dup): Enable for bigendian. -+ (aarch64_expand_vec_perm_const): Check for dup before zip. -+ -+2014-09-02 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213651. -+ 2014-08-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_classify_address): Use REG_P and -+ CONST_INT_P instead of GET_CODE and compare. -+ (aarch64_select_cc_mode): Likewise. -+ (aarch64_print_operand): Likewise. -+ (aarch64_rtx_costs): Likewise. -+ (aarch64_simd_valid_immediate): Likewise. -+ (aarch64_simd_check_vect_par_cnst_half): Likewise. -+ (aarch64_simd_emit_pair_result_insn): Likewise. -+ -+2014-08-29 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212978. -+ 2014-07-24 Andreas Schwab <schwab@suse.de> -+ -+ * lib/target-supports.exp (check_effective_target_arm_nothumb): -+ Also check for __arm__. -+ -+2014-08-29 Christophe Lyon <christophe.lyon@linaro.org> -+ -+ Fix backport from trunk 211440: -+ * config.gcc (aarch64*-*-*): Restore need_64bit_hwint=yes. -+ -+ This is necessary to build aarch64* compilers on i686 host. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213627. -+ 2014-08-05 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c -+ (aarch64_simd_builtin_type_mode): Delete. -+ (v8qi_UP): Remap to V8QImode. -+ (v4hi_UP): Remap to V4HImode. -+ (v2si_UP): Remap to V2SImode. -+ (v2sf_UP): Remap to V2SFmode. -+ (v1df_UP): Remap to V1DFmode. -+ (di_UP): Remap to DImode. -+ (df_UP): Remap to DFmode. -+ (v16qi_UP):V16QImode. -+ (v8hi_UP): Remap to V8HImode. -+ (v4si_UP): Remap to V4SImode. -+ (v4sf_UP): Remap to V4SFmode. -+ (v2di_UP): Remap to V2DImode. -+ (v2df_UP): Remap to V2DFmode. -+ (ti_UP): Remap to TImode. -+ (ei_UP): Remap to EImode. -+ (oi_UP): Remap to OImode. -+ (ci_UP): Map to CImode. -+ (xi_UP): Remap to XImode. -+ (si_UP): Remap to SImode. -+ (sf_UP): Remap to SFmode. -+ (hi_UP): Remap to HImode. -+ (qi_UP): Remap to QImode. -+ (aarch64_simd_builtin_datum): Make mode a machine_mode. -+ (VAR1): Build builtin name. -+ (aarch64_init_simd_builtins): Remove dead code. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213713. -+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.md (*cmov<mode>): Set type attribute to fcsel. -+ * config/arm/types.md (f_sels, f_seld): Delete. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213711. -+ 2014-08-07 Ian Bolton <ian.bolton@arm.com> -+ Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): -+ Use MOVN when one of the half-words is 0xffff. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213632. -+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/cortex-a15.md (cortex_a15_alu_shift): Add crc type -+ to reservation. -+ * config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213630. -+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.md (clzsi2): Set predicable_short_it attr to no. -+ (rbitsi2): Likewise. -+ (*arm_rev): Set predicable and predicable_short_it attributes. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213557. -+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * doc/md.texi (clrsb): Document. -+ (clz): Change reference to x into operand 1. -+ (ctz): Likewise. -+ (popcount): Likewise. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213551, r213556. -+ 2014-08-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * sched-deps.c (try_group_insn): Generalise macro fusion hook usage -+ to any two insns. Update comment. Rename to sched_macro_fuse_insns. -+ (sched_analyze_insn): Update use of try_group_insn to -+ sched_macro_fuse_insns. -+ * config/i386/i386.c (ix86_macro_fusion_pair_p): Reject 2nd -+ arguments that are not conditional jumps. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213490. -+ 2014-08-01 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-simd-builtins.def (dup_lane, get_lane): Delete. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213488. -+ 2014-08-01 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_classify_address): Accept all offset -+ for frame access when strict_p is false. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213485, r213486, r213487. -+ 2014-08-01 Renlin Li <renlin.li@arm.com> -+ Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (offset_7bit_signed_scaled_p): Rename to -+ aarch64_offset_7bit_signed_scaled_p, remove static and use it. -+ * config/aarch64/aarch64-protos.h (aarch64_offset_7bit_signed_scaled_p): -+ Declaration. -+ * config/aarch64/predicates.md (aarch64_mem_pair_offset): Define new -+ predicate. -+ * config/aarch64/aarch64.md (loadwb_pair, storewb_pair): Use -+ aarch64_mem_pair_offset. -+ -+ 2014-08-01 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Fix -+ offset. -+ (loadwb_pair<GPI:mode>_<P:mode>): Likewise. -+ * config/aarch64/aarch64.c (aarch64_gen_loadwb_pair): Likewise. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213379. -+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c -+ (aarch64_gimple_fold_builtin): Don't fold reduction operations for -+ BYTES_BIG_ENDIAN. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213378. -+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Vary -+ the generated mask based on BYTES_BIG_ENDIAN. -+ (aarch64_simd_check_vect_par_cnst_half): New. -+ * config/aarch64/aarch64-protos.h -+ (aarch64_simd_check_vect_par_cnst_half): New. -+ * config/aarch64/predicates.md (vect_par_cnst_hi_half): Refactor -+ the check out to aarch64_simd_check_vect_par_cnst_half. -+ (vect_par_cnst_lo_half): Likewise. -+ * config/aarch64/aarch64-simd.md -+ (aarch64_simd_move_hi_quad_<mode>): Always use vec_par_cnst_lo_half. -+ (move_hi_quad_<mode>): Always generate a low mask. -+ -+2014-08-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212927, r213304. -+ 2014-07-30 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/arm/arm.c (arm_get_frame_offsets): Adjust condition for -+ Thumb2. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/arm/arm.c (arm_get_frame_offsets): If both r3 and other -+ callee-saved registers are available for padding purpose -+ and r3 is not mandatory, then prefer use those callee-saved -+ instead of r3. -+ -+2014-08-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211717, r213692. -+ 2014-08-07 Kugan Vivekanandarajah <kuganv@linaro.org> -+ -+ * config/arm/arm.c (bdesc_2arg): Fix typo. -+ (arm_atomic_assign_expand_fenv): Remove The default implementation. -+ -+ 2014-06-17 Kugan Vivekanandarajah <kuganv@linaro.org> -+ -+ * config/arm/arm.c (arm_atomic_assign_expand_fenv): call -+ default_atomic_assign_expand_fenv for !TARGET_HARD_FLOAT. -+ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and -+ __builtins_arm_get_fpscr only when TARGET_HARD_FLOAT. -+ * config/arm/vfp.md (set_fpscr): Make pattern conditional on -+ TARGET_HARD_FLOAT. -+ (get_fpscr) : Likewise. -+ -+2014-08-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212989, r213628. -+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * convert.c (convert_to_integer): Guard transformation to lrint by -+ -fno-math-errno. -+ -+ 2014-07-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR middle-end/61876 -+ * convert.c (convert_to_integer): Do not convert BUILT_IN_ROUND and cast -+ when flag_errno_math is on. -+ -+2014-08-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ * LINARO-VERSION: Update. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212912, r212913. -+ 2014-07-22 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle CLRSB, CLZ. -+ (case UNSPEC): Handle UNSPEC_RBIT. -+ -+ 2014-07-22 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.md: Delete UNSPEC_CLS. -+ (clrsb<mode>2): Use clrsb RTL code instead of UNSPEC_CLS. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213555. -+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/61713 -+ * gcc/optabs.c (expand_atomic_test_and_set): Do not try to emit -+ move to subtarget in serial version if result is ignored. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213376. -+ 2014-07-31 Charles Baylis <charles.baylis@linaro.org> -+ -+ PR target/61948 -+ * config/arm/neon.md (ashldi3_neon): Don't emit arm_ashldi3_1bit unless -+ constraints are satisfied. -+ (<shift>di3_neon): Likewise. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211270, r211271, r211273, r211275, r212943, -+ r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954, -+ r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999, -+ r213000. -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function. -+ (aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function. -+ (aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_restore_callee_saves) -+ (aarch64_save_callee_saves): New parameter "skip_wb". -+ (aarch64_expand_prologue, aarch64_expand_epilogue): Update call site. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and -+ "wb_candidate2". -+ * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't -+ subtract outgoing area size when restoring stack_pointer_rtx. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_popwb_pair_reg) -+ (aarch64_gen_loadwb_pair): New helper function. -+ (aarch64_expand_epilogue): Simplify code using new helper functions. -+ * config/aarch64/aarch64.md (loadwb_pair<GPF:mode>_<P:mode>): Define. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_pushwb_pair_reg) -+ (aarch64_gen_storewb_pair): New helper function. -+ (aarch64_expand_prologue): Simplify code using new helper functions. -+ * config/aarch64/aarch64.md (storewb_pair<GPF:mode>_<P:mode>): Define. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves): -+ Rename to aarch64_save_callee_saves, remove restore code. -+ (aarch64_restore_callee_saves): New function. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted. -+ (aarch64_save_callee_saves): New function to handle reg save -+ for both core and vectore regs. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_gen_load_pair) -+ (aarch64_gen_store_pair): New helper function. -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Use new helper functions. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_next_callee_save): New function. -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Use aarch64_next_callee_save. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Hoist calculation of register rtx. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Remove 'increment'. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Use register offset in -+ cfun->machine->frame.reg_offset. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_save_or_restore_callee_save_registers) -+ (aarch64_save_or_restore_fprs): Remove base_rtx. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c -+ (aarch64_save_or_restore_callee_save_registers): Rename 'offset' -+ to 'start_offset'. Remove local variable 'start_offset'. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change -+ type to HOST_WIDE_INT. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_prologue) -+ (aarch64_save_or_restore_fprs) -+ (aarch64_save_or_restore_callee_save_registers): GNU-Stylize code. -+ -+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and -+ frame_size. -+ * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize -+ aarch64_frame hard_fp_offset and frame_size. -+ (aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and -+ frame_size; remove original_frame_size. -+ (aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise. -+ (aarch64_initial_elimination_offset): Remove frame_size and -+ offset. Use aarch64_frame frame_size. -+ -+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_layout_frame): Correct -+ initialization of R30 offset. Update offset. Iterate core -+ regisers upto X30. Remove X29, X30 specific code. -+ -+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define. -+ (aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED. -+ (aarch64_register_saved_on_entry): Adjust test. -+ -+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.h (machine_function): Move -+ saved_varargs_size from here... -+ (aarch64_frameGTY): ... to here. -+ -+ * config/aarch64/aarch64.c (aarch64_expand_prologue) -+ (aarch64_expand_epilogue, aarch64_final_eh_return_addr) -+ (aarch64_initial_elimination_offset) -+ (aarch64_setup_incoming_varargs): Adjust location of -+ saved_varargs_size. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212753. -+ 2014-07-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_frint_unspec_p): New function. -+ (aarch64_rtx_costs): Handle FIX, UNSIGNED_FIX, UNSPEC. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212752. -+ 2014-07-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type. -+ (vmlal_high_lane_s32): Likewise. -+ (vmlal_high_lane_u16): Likewise. -+ (vmlal_high_lane_u32): Likewise. -+ (vmlsl_high_lane_s16): Likewise. -+ (vmlsl_high_lane_s32): Likewise. -+ (vmlsl_high_lane_u16): Likewise. -+ (vmlsl_high_lane_u32): Likewise. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212512. -+ 2014-07-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/cortex-a15.md (cortex_a15_alu): Handle clz, rbit. -+ * config/arm/cortex-a5.md (cortex_a5_alu): Likewise. -+ * config/arm/cortex-a53.md (cortex_a53_alu): Likewise. -+ * config/arm/cortex-a7.md (cortex_a7_alu_reg): Likewise. -+ * config/arm/cortex-a9.md (cortex_a9_dp): Likewise. -+ * config/arm/cortex-m4.md (cortex_m4_alu): Likewise. -+ * config/arm/cortex-r4.md (cortex_r4_alu): Likewise. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212358. -+ 2014-07-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (cortexa5_extra_costs): New table. -+ (arm_cortex_a5_tune): Use cortexa5_extra_costs. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212296. -+ 2014-07-04 Tom de Vries <tom@codesourcery.com> -+ -+ * config/aarch64/aarch64-simd.md -+ (define_insn "vec_unpack_trunc_<mode>"): Fix constraint. -+ -+2014-08-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212142, r212225. -+ 2014-07-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Delete unused -+ variable i. -+ -+ 2014-06-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-simd.md (vec_perm): Enable for bigendian. -+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Remove assert -+ against bigendian and adjust indices. -+ -+2014-08-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211779. -+ 2014-06-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm_neon.h (vadd_f32): Change #ifdef to __FAST_MATH. -+ -+2014-07-30 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211503. -+ 2014-06-12 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/arm_neon.h (vmlaq_n_f64, vmlsq_n_f64, vrsrtsq_f64, -+ vcge_p8, vcgeq_p8, vcgez_p8, vcgez_u8, vcgez_u16, vcgez_u32, vcgez_u64, -+ vcgezq_p8, vcgezq_u8, vcgezq_u16, vcgezq_u32, vcgezq_u64, vcgezd_u64, -+ vcgt_p8, vcgtq_p8, vcgtz_p8, vcgtz_u8, vcgtz_u16, vcgtz_u32, vcgtz_u64, -+ vcgtzq_p8, vcgtzq_u8, vcgtzq_u16, vcgtzq_u32, vcgtzq_u64, vcgtzd_u64, -+ vcle_p8, vcleq_p8, vclez_p8, vclez_u64, vclezq_p8, vclezd_u64, vclt_p8, -+ vcltq_p8, vcltz_p8, vcltzq_p8, vcltzd_u64): Remove functions as they are -+ not in the spec. -+ -+2014-07-30 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211140. -+ 2014-06-02 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.md (set_fpcr): Drop ISB after FPCR write. -+ -+2014-07-29 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ * LINARO-VERSION: Update. -+ -+2014-07-20 Yvan Roux <yvan.roux@linaro.org> -+ -+ Revert: -+ 2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211129. -+ 2014-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ PR target/61154 -+ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. -+ * config/arm/arm.md (mov64 splitter): Replace const_double_operand -+ with immediate_operand. -+ -+2014-07-19 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ * LINARO-VERSION: Update. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211887, r211899. -+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to -+ "yes" where needed. -+ -+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in -+ vector registers. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211440. -+ 2014-06-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers. -+ * Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to -+ dependencies. -+ * config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define. -+ (aarch64_crc_builtin_datum): New struct. -+ (aarch64_crc_builtin_data): New. -+ (aarch64_init_crc32_builtins): New function. -+ (aarch64_init_builtins): Initialise CRC32 builtins when appropriate. -+ (aarch64_crc32_expand_builtin): New. -+ (aarch64_expand_builtin): Add CRC32 builtin expansion case. -+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define -+ __ARM_FEATURE_CRC32 when appropriate. -+ (TARGET_CRC32): Define. -+ * config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H, -+ UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH, -+ UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values. -+ (aarch64_<crc_variant>): New pattern. -+ * config/aarch64/arm_acle.h: New file. -+ * config/aarch64/iterators.md (CRC): New int iterator. -+ (crc_variant, crc_mode): New int attributes. -+ * doc/aarch64-acle-intrinsics.texi: New file. -+ * doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics. -+ Include aarch64-acle-intrinsics.texi. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211174. -+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>): -+ New pattern. -+ * config/aarch64/aarch64.c (aarch64_evpc_rev): New function. -+ (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev. -+ * config/aarch64/iterators.md (REVERSE): New iterator. -+ (UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements. -+ (rev_op): New int_attribute. -+ * config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8, -+ vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8, -+ vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8, -+ vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8, -+ vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16, -+ vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8, -+ vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32): -+ Replace temporary __asm__ with __builtin_shuffle. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210216, r210218, r210219. -+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm_neon.h: Update comment. -+ * config/arm/neon-docgen.ml: Delete. -+ * config/arm/neon-gen.ml: Delete. -+ * doc/arm-neon-intrinsics.texi: Update comment. -+ -+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf -+ and v4sf versions. -+ (vand, vorr, veor, vorn, vbic): Remove. -+ * config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust -+ iterator. -+ (neon_vsub_unspec): Likewise. -+ (neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove. -+ -+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm_neon.h (vadd_s8): GNU C implementation -+ (vadd_s16): Likewise. -+ (vadd_s32): Likewise. -+ (vadd_f32): Likewise. -+ (vadd_u8): Likewise. -+ (vadd_u16): Likewise. -+ (vadd_u32): Likewise. -+ (vadd_s64): Likewise. -+ (vadd_u64): Likewise. -+ (vaddq_s8): Likewise. -+ (vaddq_s16): Likewise. -+ (vaddq_s32): Likewise. -+ (vaddq_s64): Likewise. -+ (vaddq_f32): Likewise. -+ (vaddq_u8): Likewise. -+ (vaddq_u16): Likewise. -+ (vaddq_u32): Likewise. -+ (vaddq_u64): Likewise. -+ (vmul_s8): Likewise. -+ (vmul_s16): Likewise. -+ (vmul_s32): Likewise. -+ (vmul_f32): Likewise. -+ (vmul_u8): Likewise. -+ (vmul_u16): Likewise. -+ (vmul_u32): Likewise. -+ (vmul_p8): Likewise. -+ (vmulq_s8): Likewise. -+ (vmulq_s16): Likewise. -+ (vmulq_s32): Likewise. -+ (vmulq_f32): Likewise. -+ (vmulq_u8): Likewise. -+ (vmulq_u16): Likewise. -+ (vmulq_u32): Likewise. -+ (vsub_s8): Likewise. -+ (vsub_s16): Likewise. -+ (vsub_s32): Likewise. -+ (vsub_f32): Likewise. -+ (vsub_u8): Likewise. -+ (vsub_u16): Likewise. -+ (vsub_u32): Likewise. -+ (vsub_s64): Likewise. -+ (vsub_u64): Likewise. -+ (vsubq_s8): Likewise. -+ (vsubq_s16): Likewise. -+ (vsubq_s32): Likewise. -+ (vsubq_s64): Likewise. -+ (vsubq_f32): Likewise. -+ (vsubq_u8): Likewise. -+ (vsubq_u16): Likewise. -+ (vsubq_u32): Likewise. -+ (vsubq_u64): Likewise. -+ (vand_s8): Likewise. -+ (vand_s16): Likewise. -+ (vand_s32): Likewise. -+ (vand_u8): Likewise. -+ (vand_u16): Likewise. -+ (vand_u32): Likewise. -+ (vand_s64): Likewise. -+ (vand_u64): Likewise. -+ (vandq_s8): Likewise. -+ (vandq_s16): Likewise. -+ (vandq_s32): Likewise. -+ (vandq_s64): Likewise. -+ (vandq_u8): Likewise. -+ (vandq_u16): Likewise. -+ (vandq_u32): Likewise. -+ (vandq_u64): Likewise. -+ (vorr_s8): Likewise. -+ (vorr_s16): Likewise. -+ (vorr_s32): Likewise. -+ (vorr_u8): Likewise. -+ (vorr_u16): Likewise. -+ (vorr_u32): Likewise. -+ (vorr_s64): Likewise. -+ (vorr_u64): Likewise. -+ (vorrq_s8): Likewise. -+ (vorrq_s16): Likewise. -+ (vorrq_s32): Likewise. -+ (vorrq_s64): Likewise. -+ (vorrq_u8): Likewise. -+ (vorrq_u16): Likewise. -+ (vorrq_u32): Likewise. -+ (vorrq_u64): Likewise. -+ (veor_s8): Likewise. -+ (veor_s16): Likewise. -+ (veor_s32): Likewise. -+ (veor_u8): Likewise. -+ (veor_u16): Likewise. -+ (veor_u32): Likewise. -+ (veor_s64): Likewise. -+ (veor_u64): Likewise. -+ (veorq_s8): Likewise. -+ (veorq_s16): Likewise. -+ (veorq_s32): Likewise. -+ (veorq_s64): Likewise. -+ (veorq_u8): Likewise. -+ (veorq_u16): Likewise. -+ (veorq_u32): Likewise. -+ (veorq_u64): Likewise. -+ (vbic_s8): Likewise. -+ (vbic_s16): Likewise. -+ (vbic_s32): Likewise. -+ (vbic_u8): Likewise. -+ (vbic_u16): Likewise. -+ (vbic_u32): Likewise. -+ (vbic_s64): Likewise. -+ (vbic_u64): Likewise. -+ (vbicq_s8): Likewise. -+ (vbicq_s16): Likewise. -+ (vbicq_s32): Likewise. -+ (vbicq_s64): Likewise. -+ (vbicq_u8): Likewise. -+ (vbicq_u16): Likewise. -+ (vbicq_u32): Likewise. -+ (vbicq_u64): Likewise. -+ (vorn_s8): Likewise. -+ (vorn_s16): Likewise. -+ (vorn_s32): Likewise. -+ (vorn_u8): Likewise. -+ (vorn_u16): Likewise. -+ (vorn_u32): Likewise. -+ (vorn_s64): Likewise. -+ (vorn_u64): Likewise. -+ (vornq_s8): Likewise. -+ (vornq_s16): Likewise. -+ (vornq_s32): Likewise. -+ (vornq_s64): Likewise. -+ (vornq_u8): Likewise. -+ (vornq_u16): Likewise. -+ (vornq_u32): Likewise. -+ (vornq_u64): Likewise. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210151. -+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8, -+ vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32, -+ vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32, -+ vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32, -+ vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8, -+ vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16, -+ vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16, -+ vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209794. -+ 2014-04-25 Marek Polacek <polacek@redhat.com> -+ -+ PR c/60114 -+ * c-parser.c (c_parser_initelt): Pass input_location to -+ process_init_element. -+ (c_parser_initval): Pass loc to process_init_element. -+ * c-tree.h (process_init_element): Adjust declaration. -+ * c-typeck.c (push_init_level): Pass input_location to -+ process_init_element. -+ (pop_init_level): Likewise. -+ (set_designator): Likewise. -+ (output_init_element): Add location_t parameter. Pass loc to -+ digest_init. -+ (output_pending_init_elements): Pass input_location to -+ output_init_element. -+ (process_init_element): Add location_t parameter. Pass loc to -+ output_init_element. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211771. -+ 2014-06-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * genattrtab.c (n_bypassed): New variable. -+ (process_bypasses): Initialise n_bypassed. -+ Count number of bypassed reservations. -+ (make_automaton_attrs): Allocate space for bypassed reservations -+ rather than number of bypasses. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210861. -+ 2014-05-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/predicates.md (aarch64_call_insn_operand): New -+ predicate. -+ * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints. -+ * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn): -+ Adjust for tailcalling through registers. -+ * config/aarch64/aarch64.h (enum reg_class): New caller save -+ register class. -+ (REG_CLASS_NAMES): Likewise. -+ (REG_CLASS_CONTENTS): Likewise. -+ * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall): -+ Allow tailcalling without decls. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211314. -+ 2014-06-06 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New. -+ * config/aarch64/aarch64.c (aarch64_move_pointer): New. -+ (aarch64_progress_pointer): Likewise. -+ (aarch64_copy_one_part_and_move_pointers): Likewise. -+ (aarch64_expand_movmen): Likewise. -+ * config/aarch64/aarch64.h (MOVE_RATIO): Set low. -+ * config/aarch64/aarch64.md (movmem<mode>): New. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211185, 211186. -+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc/config/aarch64/aarch64-builtins.c -+ (aarch64_types_binop_uus_qualifiers, -+ aarch64_types_shift_to_unsigned_qualifiers, -+ aarch64_types_unsigned_shiftacc_qualifiers): Define. -+ * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd, -+ uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n, -+ sqshlu_n, uqshl_n): Update qualifiers. -+ * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32, -+ vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8, -+ vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32, -+ vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8, -+ vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16, -+ vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, -+ vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64, -+ vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16, -+ vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64, -+ vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16, -+ vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32, -+ vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64, -+ vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8, -+ vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8, -+ vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16, -+ vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32, -+ vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64, -+ vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8, -+ vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8, -+ vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16, -+ vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16, -+ vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32, -+ vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64, -+ vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8, -+ vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8, -+ vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16, -+ vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts. -+ -+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc/config/aarch64/aarch64-builtins.c -+ (aarch64_types_binop_ssu_qualifiers): New static data. -+ (TYPES_BINOP_SSU): Define. -+ * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl, -+ urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47 -+ * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32, -+ vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64, -+ vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50 -+ vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8, -+ vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52 -+ vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53 -+ vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness -+ suffix to builtin function name, remove cast. 55 -+ (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32, -+ vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57 -+ vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211408, 211416. -+ 2014-06-10 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix -+ REG_CFA_RESTORE mode. -+ -+ 2014-06-10 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs) -+ (aarch64_save_or_restore_callee_save_registers): Fix layout. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211418. -+ 2014-06-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64-simd.md (move_lo_quad_<mode>): -+ Change second alternative type to f_mcr. -+ * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th -+ and 12th alternatives' types to f_mcr and f_mrc. -+ (*movdi_aarch64): Same for 12th and 13th alternatives. -+ (*movsf_aarch64): Change 9th alternatives' type to mov_reg. -+ (aarch64_movtilow_tilow): Change type to fmov. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211371. -+ 2014-06-09 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm-modes.def: Remove XFmode. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211268. -+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack -+ layout comment. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211129. -+ 2014-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ PR target/61154 -+ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. -+ * config/arm/arm.md (mov64 splitter): Replace const_double_operand -+ with immediate_operand. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211073. -+ 2014-05-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw -+ to mov_imm. -+ * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211050. -+ 2014-05-29 Richard Earnshaw <rearnsha@arm.com> -+ Richard Sandiford <rdsandiford@googlemail.com> -+ -+ * arm/iterators.md (shiftable_ops): New code iterator. -+ (t2_binop0, arith_shift_insn): New code attributes. -+ * arm/predicates.md (shift_nomul_operator): New predicate. -+ * arm/arm.md (insn_enabled): Delete. -+ (enabled): Remove insn_enabled test. -+ (*arith_shiftsi): Delete. Replace with ... -+ (*<arith_shift_insn>_multsi): ... new pattern. -+ (*<arith_shift_insn>_shiftsi): ... new pattern. -+ * config/arm/arm.c (arm_print_operand): Handle operand format 'b'. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210996. -+ 2014-05-27 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64.md (stack_protect_set_<mode>): -+ Use <w> for the register in assembly template. -+ (stack_protect_test): Use the mode of operands[0] for the -+ result. -+ (stack_protect_test_<mode>): Use <w> for the register -+ in assembly template. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210967. -+ 2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/neon.md (neon_bswap<mode>): New pattern. -+ * config/arm/arm.c (neon_itype): Add NEON_BSWAP. -+ (arm_init_neon_builtins): Handle NEON_BSWAP. -+ Define required type nodes. -+ (arm_expand_neon_builtin): Handle NEON_BSWAP. -+ (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins. -+ * config/arm/arm_neon_builtins.def (bswap): Define builtins. -+ * config/arm/iterators.md (VDQHSD): New mode iterator. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210471. -+ 2014-05-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL -+ enum name for PARAM_SCHED_PRESSURE_ALGORITHM. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210369. -+ 2014-05-13 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR. -+ (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR. -+ Remove associated type declarations and initialisations. -+ (arm_expand_neon_builtin): Likewise. -+ (neon_emit_pair_result_insn): Delete. -+ * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete. -+ * config/arm/neon.md (neon_vtrn<mode>): Delete. -+ (neon_vzip<mode>): Likewise. -+ (neon_vuzp<mode>): Likewise. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211058, 211177. -+ 2014-05-29 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, -+ TYPES_BINOPV): New static data. -+ * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. -+ * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): -+ New patterns. -+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match -+ patterns for EXT. -+ (aarch64_evpc_ext): New function. -+ -+ * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. -+ -+ * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, -+ vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, -+ vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, -+ vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, -+ vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. -+ -+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle -+ location == 0. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209797. -+ 2014-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): -+ Use HOST_WIDE_INT_C for mask literal. -+ (aarch_rev16_shleft_mask_imm_p): Likewise. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211148. -+ 2014-06-02 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER): -+ /lib/ld-linux32-aarch64.so.1 is used for ILP32. -+ (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32. -+ file whose name depends on -mabi= and -mbig-endian. -+ * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64 -+ better and handle ilp32 too. -+ (MULTILIB_OPTIONS): Delete. -+ (MULTILIB_DIRNAMES): Delete. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210828, r211103. -+ 2014-05-31 Kugan Vivekanandarajah <kuganv@linaro.org> -+ -+ * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define. -+ (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR. -+ (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr -+ and __builtins_arm_get_fpscr. -+ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and -+ __builtins_arm_get_fpscr. -+ (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and -+ __builtins_arm_ldfpscr. -+ (arm_atomic_assign_expand_fenv): New function. -+ * config/arm/vfp.md (set_fpscr): New pattern. -+ (get_fpscr) : Likewise. -+ * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and -+ VUNSPEC_SET_FPSCR. -+ * doc/extend.texi (AARCH64 Built-in Functions) : Document -+ __builtins_arm_set_fpscr, __builtins_arm_get_fpscr. -+ -+ 2014-05-23 Kugan Vivekanandarajah <kuganv@linaro.org> -+ -+ * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New -+ define. -+ * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv): -+ New function declaration. -+ * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add -+ AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR. -+ AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR. -+ (aarch64_init_builtins) : Initialize builtins -+ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. -+ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. -+ (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr -+ __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr, -+ and __builtins_aarch64_set_fpsr. -+ (aarch64_atomic_assign_expand_fenv): New function. -+ * config/aarch64/aarch64.md (set_fpcr): New pattern. -+ (get_fpcr) : Likewise. -+ (set_fpsr) : Likewise. -+ (get_fpsr) : Likewise. -+ (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR -+ and UNSPECV_SET_FPSR. -+ * doc/extend.texi (AARCH64 Built-in Functions) : Document -+ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. -+ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210355. -+ 2014-05-13 Ian Bolton <ian.bolton@arm.com> -+ -+ * config/aarch64/aarch64-protos.h -+ (aarch64_hard_regno_caller_save_mode): New prototype. -+ * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode): -+ New function. -+ * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209943. -+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, -+ vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, -+ vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, -+ vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, -+ vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, -+ vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, -+ vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, -+ vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. -+ -+2014-06-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ * LINARO-VERSION: Update. -+ -+2014-06-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ Revert: -+ 2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209643. -+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. -+ -+2014-06-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498, -+ 210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507, -+ 210508, 210509, 210510, 210512, 211205, 211206. -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New. -+ (cpu_addrcost_table): Use it. -+ * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it. -+ (aarch64_address_cost): Rewrite using aarch64_classify_address, -+ move it. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. -+ (cortexa57_vector_cost): Likewise. -+ (cortexa57_tunings): Use them. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New. -+ (TARGET_RTX_COSTS): Call it. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally -+ emit instructions, return number of instructions which would -+ be emitted. -+ (aarch64_add_constant): Update call to aarch64_build_constant. -+ (aarch64_output_mi_thunk): Likewise. -+ (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost -+ a CONST_DOUBLE. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename -+ to... -+ (aarch64_strip_extend): ...this, don't strip shifts, check RTX is -+ well formed. -+ (aarch64_rtx_mult_cost): New. -+ (aarch64_rtx_costs): Use it, refactor as appropriate. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philip Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing -+ for SET RTX. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address -+ costs when costing loads and stores to memory. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for -+ logical operations. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost -+ ZERO_EXTEND and SIGN_EXTEND better. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for -+ rotates and shifts. -+ -+ 2014-03-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New. -+ (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for -+ DIV/MOD. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison -+ operators. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA, -+ FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF, -+ HIGH, LO_SUM. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case -+ where we were unable to cost an RTX. -+ -+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case. -+ -+ 2014-06-03 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function. -+ (aarch64_rtx_costs): Use aarch64_if_then_else_costs. -+ -+ 2014-06-03 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non -+ comparisons for OP0. -+ -+2014-06-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ * LINARO-VERSION: Update. -+ -+2014-06-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211211. -+ 2014-06-04 Bin Cheng <bin.cheng@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_classify_address) -+ (aarch64_legitimize_reload_address): Support full addressing modes -+ for vector modes. -+ * config/aarch64/aarch64.md (mov<mode>, movmisalign<mode>) -+ (*aarch64_simd_mov<mode>, *aarch64_simd_mov<mode>): Relax predicates. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209906. -+ 2014-04-29 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8, -+ vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32, -+ vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32, -+ vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32, -+ vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8, -+ vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16, -+ vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16, -+ vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209897. -+ 2014-04-29 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * calls.c (initialize_argument_information): Always treat -+ PUSH_ARGS_REVERSED as 1, simplify code accordingly. -+ (expand_call): Likewise. -+ (emit_library_call_calue_1): Likewise. -+ * expr.c (PUSH_ARGS_REVERSED): Do not define. -+ (emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify -+ code accordingly. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209880. -+ 2014-04-28 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c -+ (aarch64_types_storestruct_lane_qualifiers): New. -+ (TYPES_STORESTRUCT_LANE): Likewise. -+ * config/aarch64/aarch64-simd-builtins.def (st2_lane): New. -+ (st3_lane): Likewise. -+ (st4_lane): Likewise. -+ * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New. -+ (vec_store_lanesci_lane<mode>): Likewise. -+ (vec_store_lanesxi_lane<mode>): Likewise. -+ (aarch64_st2_lane<VQ:mode>): Likewise. -+ (aarch64_st3_lane<VQ:mode>): Likewise. -+ (aarch64_st4_lane<VQ:mode>): Likewise. -+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE. -+ * config/aarch64/arm_neon.h -+ (__ST2_LANE_FUNC): Rewrite using builtins, update use points to -+ use new macro arguments. -+ (__ST3_LANE_FUNC): Likewise. -+ (__ST4_LANE_FUNC): Likewise. -+ * config/aarch64/iterators.md (V_TWO_ELEM): New. -+ (V_THREE_ELEM): Likewise. -+ (V_FOUR_ELEM): Likewise. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209878. -+ 2014-04-28 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New. -+ * config/aarch64/aarch64.c -+ (aarch64_cannot_change_mode_class): Weaken conditions. -+ (aarch64_modes_tieable_p): New. -+ * config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209808. -+ 2014-04-25 Jiong Wang <jiong.wang@arm.com> -+ -+ * config/arm/predicates.md (call_insn_operand): Add long_call check. -+ * config/arm/arm.md (sibcall, sibcall_value): Force the address to -+ reg for long_call. -+ * config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call -+ restriction. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209806. -+ 2014-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (arm_cortex_a8_tune): Initialise -+ T16-related fields. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209742, 209749. -+ 2014-04-24 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian. -+ -+ 2014-04-24 Tejas Belagod <tejas.belagod@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements -+ for big-endian. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209736. -+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c -+ (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, -+ BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. -+ * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern. -+ * config/aarch64/aarch64-simd-builtins.def: Define vector bswap -+ builtins. -+ * config/aarch64/iterator.md (VDQHSD): New mode iterator. -+ (Vrevsuff): New mode attribute. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209712. -+ 2014-04-23 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test) -+ (stack_protect_set_<mode>, stack_protect_test_<mode>): Add -+ machine descriptions for Stack Smashing Protector. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209711. -+ 2014-04-23 Richard Earnshaw <rearnsha@arm.com> -+ -+ * aarch64.md (<optab>_rol<mode>3): New pattern. -+ (<optab>_rolsi3_uxtw): Likewise. -+ * aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209710. -+ 2014-04-23 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields. -+ (arm_cortex_a12_tune): Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209706. -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209701, 209702, 209703, 209704, 209705. -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.md (arm_rev16si2): New pattern. -+ (arm_rev16si2_alt): Likewise. -+ * config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case. -+ -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ * config/aarch64/aarch64.md (rev16<mode>2): New pattern. -+ (rev16<mode>2_alt): Likewise. -+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. -+ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. -+ (aarch_rev16_shleft_mask_imm_p): Likewise. -+ (aarch_rev16_p_1): Likewise. -+ (aarch_rev16_p): Likewise. -+ * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. -+ (aarch_rev16_shright_mask_imm_p): Likewise. -+ (aarch_rev16_shleft_mask_imm_p): Likewise. -+ -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. -+ * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify -+ rev cost. -+ (cortex_a53_extra_costs): Likewise. -+ (cortex_a57_extra_costs): Likewise. -+ * config/arm/arm.c (cortexa9_extra_costs): Likewise. -+ (cortexa7_extra_costs): Likewise. -+ (cortexa8_extra_costs): Likewise. -+ (cortexa12_extra_costs): Likewise. -+ (cortexa15_extra_costs): Likewise. -+ (v7m_extra_costs): Likewise. -+ (arm_new_rtx_costs): Handle BSWAP. -+ -+ 2013-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (cortexa8_extra_costs): New table. -+ (arm_cortex_a8_tune): New tuning struct. -+ * config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct. -+ -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * config/arm/arm.c (arm_new_rtx_costs): Handle FMA. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209659. -+ 2014-04-22 Richard Henderson <rth@redhat.com> -+ -+ * config/aarch64/aarch64 (addti3, subti3): New expanders. -+ (add<GPI>3_compare0): Remove leading * from name. -+ (add<GPI>3_carryin): Likewise. -+ (sub<GPI>3_compare0): Likewise. -+ (sub<GPI>3_carryin): Likewise. -+ (<su_optab>mulditi3): New expander. -+ (multi3): New expander. -+ (madd<GPI>): Remove leading * from name. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209645. -+ 2014-04-22 Andrew Pinski <apinski@cavium.com> -+ -+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): -+ Handle TLS for ILP32. -+ * config/aarch64/aarch64.md (tlsie_small): Rename to ... -+ (tlsie_small_<mode>): this and handle PTR. -+ (tlsie_small_sidi): New pattern. -+ (tlsle_small): Change to an expand to handle ILP32. -+ (tlsle_small_<mode>): New pattern. -+ (tlsdesc_small): Rename to ... -+ (tlsdesc_small_<mode>): this and handle PTR. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209643. -+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209641, 209642. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. -+ (aarch64_types_signed_unsigned_qualifiers): Qualifier added. -+ (aarch64_types_signed_poly_qualifiers): Likewise. -+ (aarch64_types_unsigned_signed_qualifiers): Likewise. -+ (aarch64_types_poly_signed_qualifiers): Likewise. -+ (TYPES_REINTERP_SS): Type macro added. -+ (TYPES_REINTERP_SU): Likewise. -+ (TYPES_REINTERP_SP): Likewise. -+ (TYPES_REINTERP_US): Likewise. -+ (TYPES_REINTERP_PS): Likewise. -+ (aarch64_fold_builtin): New expression folding added. -+ * config/aarch64/aarch64-simd-builtins.def (REINTERP): -+ Declarations removed. -+ (REINTERP_SS): Declarations added. -+ (REINTERP_US): Likewise. -+ (REINTERP_PS): Likewise. -+ (REINTERP_SU): Likewise. -+ (REINTERP_SP): Likewise. -+ * config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented. -+ (vreinterpretq_p8_f64): Likewise. -+ (vreinterpret_p16_f64): Likewise. -+ (vreinterpretq_p16_f64): Likewise. -+ (vreinterpret_f32_f64): Likewise. -+ (vreinterpretq_f32_f64): Likewise. -+ (vreinterpret_f64_f32): Likewise. -+ (vreinterpret_f64_p8): Likewise. -+ (vreinterpret_f64_p16): Likewise. -+ (vreinterpret_f64_s8): Likewise. -+ (vreinterpret_f64_s16): Likewise. -+ (vreinterpret_f64_s32): Likewise. -+ (vreinterpret_f64_s64): Likewise. -+ (vreinterpret_f64_u8): Likewise. -+ (vreinterpret_f64_u16): Likewise. -+ (vreinterpret_f64_u32): Likewise. -+ (vreinterpret_f64_u64): Likewise. -+ (vreinterpretq_f64_f32): Likewise. -+ (vreinterpretq_f64_p8): Likewise. -+ (vreinterpretq_f64_p16): Likewise. -+ (vreinterpretq_f64_s8): Likewise. -+ (vreinterpretq_f64_s16): Likewise. -+ (vreinterpretq_f64_s32): Likewise. -+ (vreinterpretq_f64_s64): Likewise. -+ (vreinterpretq_f64_u8): Likewise. -+ (vreinterpretq_f64_u16): Likewise. -+ (vreinterpretq_f64_u32): Likewise. -+ (vreinterpretq_f64_u64): Likewise. -+ (vreinterpret_s64_f64): Likewise. -+ (vreinterpretq_s64_f64): Likewise. -+ (vreinterpret_u64_f64): Likewise. -+ (vreinterpretq_u64_f64): Likewise. -+ (vreinterpret_s8_f64): Likewise. -+ (vreinterpretq_s8_f64): Likewise. -+ (vreinterpret_s16_f64): Likewise. -+ (vreinterpretq_s16_f64): Likewise. -+ (vreinterpret_s32_f64): Likewise. -+ (vreinterpretq_s32_f64): Likewise. -+ (vreinterpret_u8_f64): Likewise. -+ (vreinterpretq_u8_f64): Likewise. -+ (vreinterpret_u16_f64): Likewise. -+ (vreinterpretq_u16_f64): Likewise. -+ (vreinterpret_u32_f64): Likewise. -+ (vreinterpretq_u32_f64): Likewise. -+ -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. -+ * config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed. -+ (vreinterpret_p8_s8): Likewise. -+ * config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast. -+ (vreinterpret_p8_s16): Likewise. -+ (vreinterpret_p8_s32): Likewise. -+ (vreinterpret_p8_s64): Likewise. -+ (vreinterpret_p8_f32): Likewise. -+ (vreinterpret_p8_u8): Likewise. -+ (vreinterpret_p8_u16): Likewise. -+ (vreinterpret_p8_u32): Likewise. -+ (vreinterpret_p8_u64): Likewise. -+ (vreinterpret_p8_p16): Likewise. -+ (vreinterpretq_p8_s8): Likewise. -+ (vreinterpretq_p8_s16): Likewise. -+ (vreinterpretq_p8_s32): Likewise. -+ (vreinterpretq_p8_s64): Likewise. -+ (vreinterpretq_p8_f32): Likewise. -+ (vreinterpretq_p8_u8): Likewise. -+ (vreinterpretq_p8_u16): Likewise. -+ (vreinterpretq_p8_u32): Likewise. -+ (vreinterpretq_p8_u64): Likewise. -+ (vreinterpretq_p8_p16): Likewise. -+ (vreinterpret_p16_s8): Likewise. -+ (vreinterpret_p16_s16): Likewise. -+ (vreinterpret_p16_s32): Likewise. -+ (vreinterpret_p16_s64): Likewise. -+ (vreinterpret_p16_f32): Likewise. -+ (vreinterpret_p16_u8): Likewise. -+ (vreinterpret_p16_u16): Likewise. -+ (vreinterpret_p16_u32): Likewise. -+ (vreinterpret_p16_u64): Likewise. -+ (vreinterpret_p16_p8): Likewise. -+ (vreinterpretq_p16_s8): Likewise. -+ (vreinterpretq_p16_s16): Likewise. -+ (vreinterpretq_p16_s32): Likewise. -+ (vreinterpretq_p16_s64): Likewise. -+ (vreinterpretq_p16_f32): Likewise. -+ (vreinterpretq_p16_u8): Likewise. -+ (vreinterpretq_p16_u16): Likewise. -+ (vreinterpretq_p16_u32): Likewise. -+ (vreinterpretq_p16_u64): Likewise. -+ (vreinterpretq_p16_p8): Likewise. -+ (vreinterpret_f32_s8): Likewise. -+ (vreinterpret_f32_s16): Likewise. -+ (vreinterpret_f32_s32): Likewise. -+ (vreinterpret_f32_s64): Likewise. -+ (vreinterpret_f32_u8): Likewise. -+ (vreinterpret_f32_u16): Likewise. -+ (vreinterpret_f32_u32): Likewise. -+ (vreinterpret_f32_u64): Likewise. -+ (vreinterpret_f32_p8): Likewise. -+ (vreinterpret_f32_p16): Likewise. -+ (vreinterpretq_f32_s8): Likewise. -+ (vreinterpretq_f32_s16): Likewise. -+ (vreinterpretq_f32_s32): Likewise. -+ (vreinterpretq_f32_s64): Likewise. -+ (vreinterpretq_f32_u8): Likewise. -+ (vreinterpretq_f32_u16): Likewise. -+ (vreinterpretq_f32_u32): Likewise. -+ (vreinterpretq_f32_u64): Likewise. -+ (vreinterpretq_f32_p8): Likewise. -+ (vreinterpretq_f32_p16): Likewise. -+ (vreinterpret_s64_s8): Likewise. -+ (vreinterpret_s64_s16): Likewise. -+ (vreinterpret_s64_s32): Likewise. -+ (vreinterpret_s64_f32): Likewise. -+ (vreinterpret_s64_u8): Likewise. -+ (vreinterpret_s64_u16): Likewise. -+ (vreinterpret_s64_u32): Likewise. -+ (vreinterpret_s64_u64): Likewise. -+ (vreinterpret_s64_p8): Likewise. -+ (vreinterpret_s64_p16): Likewise. -+ (vreinterpretq_s64_s8): Likewise. -+ (vreinterpretq_s64_s16): Likewise. -+ (vreinterpretq_s64_s32): Likewise. -+ (vreinterpretq_s64_f32): Likewise. -+ (vreinterpretq_s64_u8): Likewise. -+ (vreinterpretq_s64_u16): Likewise. -+ (vreinterpretq_s64_u32): Likewise. -+ (vreinterpretq_s64_u64): Likewise. -+ (vreinterpretq_s64_p8): Likewise. -+ (vreinterpretq_s64_p16): Likewise. -+ (vreinterpret_u64_s8): Likewise. -+ (vreinterpret_u64_s16): Likewise. -+ (vreinterpret_u64_s32): Likewise. -+ (vreinterpret_u64_s64): Likewise. -+ (vreinterpret_u64_f32): Likewise. -+ (vreinterpret_u64_u8): Likewise. -+ (vreinterpret_u64_u16): Likewise. -+ (vreinterpret_u64_u32): Likewise. -+ (vreinterpret_u64_p8): Likewise. -+ (vreinterpret_u64_p16): Likewise. -+ (vreinterpretq_u64_s8): Likewise. -+ (vreinterpretq_u64_s16): Likewise. -+ (vreinterpretq_u64_s32): Likewise. -+ (vreinterpretq_u64_s64): Likewise. -+ (vreinterpretq_u64_f32): Likewise. -+ (vreinterpretq_u64_u8): Likewise. -+ (vreinterpretq_u64_u16): Likewise. -+ (vreinterpretq_u64_u32): Likewise. -+ (vreinterpretq_u64_p8): Likewise. -+ (vreinterpretq_u64_p16): Likewise. -+ (vreinterpret_s8_s16): Likewise. -+ (vreinterpret_s8_s32): Likewise. -+ (vreinterpret_s8_s64): Likewise. -+ (vreinterpret_s8_f32): Likewise. -+ (vreinterpret_s8_u8): Likewise. -+ (vreinterpret_s8_u16): Likewise. -+ (vreinterpret_s8_u32): Likewise. -+ (vreinterpret_s8_u64): Likewise. -+ (vreinterpret_s8_p8): Likewise. -+ (vreinterpret_s8_p16): Likewise. -+ (vreinterpretq_s8_s16): Likewise. -+ (vreinterpretq_s8_s32): Likewise. -+ (vreinterpretq_s8_s64): Likewise. -+ (vreinterpretq_s8_f32): Likewise. -+ (vreinterpretq_s8_u8): Likewise. -+ (vreinterpretq_s8_u16): Likewise. -+ (vreinterpretq_s8_u32): Likewise. -+ (vreinterpretq_s8_u64): Likewise. -+ (vreinterpretq_s8_p8): Likewise. -+ (vreinterpretq_s8_p16): Likewise. -+ (vreinterpret_s16_s8): Likewise. -+ (vreinterpret_s16_s32): Likewise. -+ (vreinterpret_s16_s64): Likewise. -+ (vreinterpret_s16_f32): Likewise. -+ (vreinterpret_s16_u8): Likewise. -+ (vreinterpret_s16_u16): Likewise. -+ (vreinterpret_s16_u32): Likewise. -+ (vreinterpret_s16_u64): Likewise. -+ (vreinterpret_s16_p8): Likewise. -+ (vreinterpret_s16_p16): Likewise. -+ (vreinterpretq_s16_s8): Likewise. -+ (vreinterpretq_s16_s32): Likewise. -+ (vreinterpretq_s16_s64): Likewise. -+ (vreinterpretq_s16_f32): Likewise. -+ (vreinterpretq_s16_u8): Likewise. -+ (vreinterpretq_s16_u16): Likewise. -+ (vreinterpretq_s16_u32): Likewise. -+ (vreinterpretq_s16_u64): Likewise. -+ (vreinterpretq_s16_p8): Likewise. -+ (vreinterpretq_s16_p16): Likewise. -+ (vreinterpret_s32_s8): Likewise. -+ (vreinterpret_s32_s16): Likewise. -+ (vreinterpret_s32_s64): Likewise. -+ (vreinterpret_s32_f32): Likewise. -+ (vreinterpret_s32_u8): Likewise. -+ (vreinterpret_s32_u16): Likewise. -+ (vreinterpret_s32_u32): Likewise. -+ (vreinterpret_s32_u64): Likewise. -+ (vreinterpret_s32_p8): Likewise. -+ (vreinterpret_s32_p16): Likewise. -+ (vreinterpretq_s32_s8): Likewise. -+ (vreinterpretq_s32_s16): Likewise. -+ (vreinterpretq_s32_s64): Likewise. -+ (vreinterpretq_s32_f32): Likewise. -+ (vreinterpretq_s32_u8): Likewise. -+ (vreinterpretq_s32_u16): Likewise. -+ (vreinterpretq_s32_u32): Likewise. -+ (vreinterpretq_s32_u64): Likewise. -+ (vreinterpretq_s32_p8): Likewise. -+ (vreinterpretq_s32_p16): Likewise. -+ (vreinterpret_u8_s8): Likewise. -+ (vreinterpret_u8_s16): Likewise. -+ (vreinterpret_u8_s32): Likewise. -+ (vreinterpret_u8_s64): Likewise. -+ (vreinterpret_u8_f32): Likewise. -+ (vreinterpret_u8_u16): Likewise. -+ (vreinterpret_u8_u32): Likewise. -+ (vreinterpret_u8_u64): Likewise. -+ (vreinterpret_u8_p8): Likewise. -+ (vreinterpret_u8_p16): Likewise. -+ (vreinterpretq_u8_s8): Likewise. -+ (vreinterpretq_u8_s16): Likewise. -+ (vreinterpretq_u8_s32): Likewise. -+ (vreinterpretq_u8_s64): Likewise. -+ (vreinterpretq_u8_f32): Likewise. -+ (vreinterpretq_u8_u16): Likewise. -+ (vreinterpretq_u8_u32): Likewise. -+ (vreinterpretq_u8_u64): Likewise. -+ (vreinterpretq_u8_p8): Likewise. -+ (vreinterpretq_u8_p16): Likewise. -+ (vreinterpret_u16_s8): Likewise. -+ (vreinterpret_u16_s16): Likewise. -+ (vreinterpret_u16_s32): Likewise. -+ (vreinterpret_u16_s64): Likewise. -+ (vreinterpret_u16_f32): Likewise. -+ (vreinterpret_u16_u8): Likewise. -+ (vreinterpret_u16_u32): Likewise. -+ (vreinterpret_u16_u64): Likewise. -+ (vreinterpret_u16_p8): Likewise. -+ (vreinterpret_u16_p16): Likewise. -+ (vreinterpretq_u16_s8): Likewise. -+ (vreinterpretq_u16_s16): Likewise. -+ (vreinterpretq_u16_s32): Likewise. -+ (vreinterpretq_u16_s64): Likewise. -+ (vreinterpretq_u16_f32): Likewise. -+ (vreinterpretq_u16_u8): Likewise. -+ (vreinterpretq_u16_u32): Likewise. -+ (vreinterpretq_u16_u64): Likewise. -+ (vreinterpretq_u16_p8): Likewise. -+ (vreinterpretq_u16_p16): Likewise. -+ (vreinterpret_u32_s8): Likewise. -+ (vreinterpret_u32_s16): Likewise. -+ (vreinterpret_u32_s32): Likewise. -+ (vreinterpret_u32_s64): Likewise. -+ (vreinterpret_u32_f32): Likewise. -+ (vreinterpret_u32_u8): Likewise. -+ (vreinterpret_u32_u16): Likewise. -+ (vreinterpret_u32_u64): Likewise. -+ (vreinterpret_u32_p8): Likewise. -+ (vreinterpret_u32_p16): Likewise. -+ (vreinterpretq_u32_s8): Likewise. -+ (vreinterpretq_u32_s16): Likewise. -+ (vreinterpretq_u32_s32): Likewise. -+ (vreinterpretq_u32_s64): Likewise. -+ (vreinterpretq_u32_f32): Likewise. -+ (vreinterpretq_u32_u8): Likewise. -+ (vreinterpretq_u32_u16): Likewise. -+ (vreinterpretq_u32_u64): Likewise. -+ (vreinterpretq_u32_p8): Likewise. -+ (vreinterpretq_u32_p16): Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209640. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * gcc/config/aarch64/aarch64-simd.md (aarch64_s<optab><mode>): -+ Pattern extended. -+ * config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator -+ extended. -+ (sqabs): Likewise. -+ * config/aarch64/arm_neon.h (vqneg_s64): New intrinsic. -+ (vqnegd_s64): Likewise. -+ (vqabs_s64): Likewise. -+ (vqabsd_s64): Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209627, 209636. -+ 2014-04-22 Renlin <renlin.li@arm.com> -+ Jiong Wang <jiong.wang@arm.com> -+ -+ * config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset". -+ * config/aarch64/aarch64.c (aarch64_layout_frame) -+ (aarch64_initial_elimination_offset): Likewise. -+ -+ 2014-04-22 Marcus Shawcroft <marcus.shawcroft@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_initial_elimination_offset): -+ Fix indentation. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209618. -+ 2014-04-22 Renlin Li <Renlin.Li@arm.com> -+ -+ * config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust -+ the output asm format. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209617. -+ 2014-04-22 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * config/aarch64/aarch64-simd.md -+ (aarch64_cm<optab>di): Always split. -+ (*aarch64_cm<optab>di): New. -+ (aarch64_cmtstdi): Always split. -+ (*aarch64_cmtstdi): New. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209615. -+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> -+ -+ * config/arm/arm.c (arm_hard_regno_mode_ok): Loosen -+ restrictions on core registers for DImode values in Thumb2. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209613, r209614. -+ 2014-04-22 Ian Bolton <ian.bolton@arm.com> -+ -+ * config/arm/arm.md (*anddi_notdi_zesidi): New pattern. -+ * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern. -+ -+ 2014-04-22 Ian Bolton <ian.bolton@arm.com> -+ -+ * config/arm/thumb2.md (*iordi_notdi_di): New pattern. -+ (*iordi_notzesidi_di): Likewise. -+ (*iordi_notsesidi_di): Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209561. -+ 2014-04-22 Ian Bolton <ian.bolton@arm.com> -+ -+ * config/arm/arm-protos.h (tune_params): New struct members. -+ * config/arm/arm.c: Initialise tune_params per processor. -+ (thumb2_reorg): Suppress conversion from t32 to t16 when optimizing -+ for speed, based on new tune_params. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209559. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro -+ added. -+ * config/aarch64/aarch64-simd-builtins.def (frintn): Use added -+ macro. -+ * config/aarch64/aarch64-simd.md (<frint_pattern>): Comment -+ corrected. -+ * config/aarch64/aarch64.md (<frint_pattern>): Likewise. -+ * config/aarch64/arm_neon.h (vrnd_f64): Added. -+ (vrnda_f64): Likewise. -+ (vrndi_f64): Likewise. -+ (vrndm_f64): Likewise. -+ (vrndn_f64): Likewise. -+ (vrndp_f64): Likewise. -+ (vrndx_f64): Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209419. -+ 2014-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR rtl-optimization/60663 -+ * config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case, -+ avoid 0 cost. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209457. -+ 2014-04-16 Andrew Pinski <apinski@cavium.com> -+ -+ * config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32 -+ definition. -+ -+2014-05-19 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ * LINARO-VERSION: Update. -+ -+2014-05-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209889. -+ 2014-04-29 Zhenqiang Chen <zhenqiang.chen@linaro.org> -+ -+ * config/aarch64/aarch64.md (mov<mode>cc): New for GPF. -+ -+2014-05-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209556. -+ 2014-04-22 Zhenqiang Chen <zhenqiang.chen@linaro.org> -+ -+ * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure -+ GET_MODE_SIZE argument is enum machine_mode. -+ -+2014-04-28 Yvan Roux <yvan.roux@linaro.org> -+ -+ * LINARO-VERSION: Bump version. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. -+ * LINARO-VERSION: New file. -+ * configure.ac: Add Linaro version string. ---- a/src/gcc/testsuite/gcc.target/arm/pr44788.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr44788.c -@@ -2,6 +2,8 @@ - /* { dg-require-effective-target arm_thumb2_ok } */ - /* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -march=armv7-a -mfpu=vfp3 -mfloat-abi=softfp" } */ - -+extern void foo (float *); -+ - void joint_decode(float* mlt_buffer1, int t) { - int i; - float decode_buffer[1060]; ---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c -@@ -5,8 +5,11 @@ - - #define N 32 - -+float __attribute__((aligned(16))) input[N]; -+float __attribute__((aligned(16))) output[N]; -+ - void --foo (float *output, float *input) -+foo () - { - int i = 0; - /* Vectorizable. */ ---- a/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_neon_ok } */ -+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#define N 32 -+ -+float __attribute__((aligned(16))) input[N]; -+int __attribute__((aligned(16))) output[N]; -+ -+void -+foo () -+{ -+ int i = 0; -+ /* Vectorizable. */ -+ for (i = 0; i < N; i++) -+ output[i] = __builtin_lceilf (input[i]); -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c -@@ -3,7 +3,7 @@ - /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ - /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */ - --extern void baz (float); -+extern void bar (float); - - void - foo (float *p, float a, int n) -@@ -13,4 +13,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fldmdbs" } } */ -+/* { dg-final { scan-assembler "vldmdb.32" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr60606-4.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-4.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int -+f (void) -+{ -+ register unsigned int r[50] asm ("r1"); /* { dg-error "suitable for a register" } */ -+ return r[1]; -+} ---- a/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c -+++ b/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c -@@ -1,4 +1,4 @@ --/* { dg-do compile } */ -+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */ - /* { dg-options "-O1" } */ - - unsigned long long or64 (unsigned long long input) ---- a/src/gcc/testsuite/gcc.target/arm/pr58784.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c -@@ -11,6 +11,9 @@ - char stepsRemoved; - ptp_tlv_t tlv[1]; - } ptp_message_announce_t; -+ -+extern void f (ptp_message_announce_t *); -+ - int ptplib_send_announce(int sequenceId, int i) - { - ptp_message_announce_t tx_packet; ---- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c -@@ -0,0 +1,65 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline --save-temps" } */ -+ -+extern void abort (void); -+ -+typedef long long s64int; -+typedef int s32int; -+typedef unsigned long long u64int; -+typedef unsigned int u32int; -+ -+s64int -+iordi_di_notdi (s64int a, s64int b) -+{ -+ return (a | ~b); -+} -+ -+s64int -+iordi_di_notzesidi (s64int a, u32int b) -+{ -+ return (a | ~(u64int) b); -+} -+ -+s64int -+iordi_notdi_zesidi (s64int a, u32int b) -+{ -+ return (~a | (u64int) b); -+} -+ -+s64int -+iordi_di_notsesidi (s64int a, s32int b) -+{ -+ return (a | ~(s64int) b); -+} -+ -+int main () -+{ -+ s64int a64 = 0xdeadbeef00000000ll; -+ s64int b64 = 0x000000004f4f0112ll; -+ s64int c64 = 0xdeadbeef000f0000ll; -+ -+ u32int c32 = 0x01124f4f; -+ s32int d32 = 0xabbaface; -+ -+ s64int z = iordi_di_notdi (a64, b64); -+ if (z != 0xffffffffb0b0feedll) -+ abort (); -+ -+ z = iordi_di_notzesidi (a64, c32); -+ if (z != 0xfffffffffeedb0b0ll) -+ abort (); -+ -+ z = iordi_notdi_zesidi (c64, c32); -+ if (z != 0x21524110fff2ffffll) -+ abort (); -+ -+ z = iordi_di_notsesidi (a64, d32); -+ if (z != 0xdeadbeef54450531ll) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c -@@ -3,7 +3,7 @@ - /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */ - /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */ - --extern void baz (float); -+extern void bar (float); - - void - foo (float *p, float a, int n) -@@ -13,4 +13,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fldmias" } } */ -+/* { dg-final { scan-assembler "vldmia.32" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/cold-lc.c -+++ b/src/gcc/testsuite/gcc.target/arm/cold-lc.c -@@ -7,6 +7,7 @@ - struct task_struct *task; - }; - extern struct thread_info *current_thread_info (void); -+extern int show_stack (struct task_struct *, unsigned long *); - - void dump_stack (void) - { ---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c -@@ -13,4 +13,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fldmdbd" } } */ -+/* { dg-final { scan-assembler "vldmdb.64" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c -@@ -12,4 +12,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fstmdbs" } } */ -+/* { dg-final { scan-assembler "vstmdb.32" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c -@@ -13,4 +13,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fldmiad" } } */ -+/* { dg-final { scan-assembler "vldmia.64" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c -@@ -12,4 +12,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fstmias" } } */ -+/* { dg-final { scan-assembler "vstmia.32" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c -@@ -12,4 +12,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fstmdbd" } } */ -+/* { dg-final { scan-assembler "vstmdb.64" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_vfp_ok } */ -+/* { dg-options "-O2 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_vfp } */ -+ -+int -+foofloat (float x) -+{ -+ return __builtin_lceilf (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */ -+ -+ -+int -+foodouble (double x) -+{ -+ return __builtin_lceil (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c -@@ -12,4 +12,4 @@ - while (n--); - } - --/* { dg-final { scan-assembler "fstmiad" } } */ -+/* { dg-final { scan-assembler "vstmia.64" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrns16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrns16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vexts64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_s64.x" -+ -+/* Don't scan assembler for vext - it can be optimized into a move from r0. */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_u8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnu16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQs8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqs8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqf32.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextu64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_u64.x" -+ -+/* Don't scan assembler for vext - it can be optimized into a move from r0. */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qp8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqp8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32p8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_u8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQs64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_s64.x" -+ -+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_p16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qp16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQs16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqs16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrns8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrns8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_s32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qs32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQu64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_u64.x" -+ -+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqu16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64s8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_u32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qu32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqp16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_p16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQs32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqs32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vexts32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_s32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqu32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzps8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzps8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_u32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32s16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32s16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqp8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqp8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qs8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32u16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32u16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64p16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64p16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64s32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64s32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16q_s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16qs8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp -+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp -@@ -0,0 +1,35 @@ -+# Copyright (C) 1997-2014 Free Software Foundation, Inc. -+ -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an ARM target. -+if ![istarget arm*-*-*] then { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" "" -+ -+# All done. -+dg-finish ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64u32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64u32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qu8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpp16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzps32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzps32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpu32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_p16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQs32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_s32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_p16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qp16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqp16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs32.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_u32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnp8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqu8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu32.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16s8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32u8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64p8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpp8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipp16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c -@@ -0,0 +1,26 @@ -+/* Test the `vextp64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_crypto_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_crypto } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (void); -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly64x1_t in1 = {0}; -+ poly64x1_t in2 = {1}; -+ poly64x1_t actual = vext_p64 (in1, in2, 0); -+ if (actual != in1) -+ abort (); -+ -+ return 0; -+} -+ -+/* Don't scan assembler for vext - it can be optimized into a move from r0. -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qp8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnp16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrns32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrns32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQs8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_s8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16q_p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16qp8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnu32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqu8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_f32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qf32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqf32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipp8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_f32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c -@@ -0,0 +1,33 @@ -+/* Test the `vextQp64' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_crypto_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_crypto } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (void); -+ -+poly64x2_t -+test_vextq_p64_1 (poly64x2_t a, poly64x2_t b) -+{ -+ return vextq_p64(a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ poly64x2_t in1 = {0, 1}; -+ poly64x2_t in2 = {2, 3}; -+ poly64x2_t actual = test_vextq_p64_1 (in1, in2); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != i + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vexts8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_s8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16p8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16p8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQp16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqp16.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQs32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqs32.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnQu32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnqu32.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnu8.x" -+ -+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_s16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qs16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64f32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64f32.x" -+ -+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64u8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_u16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qu16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32p16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32p16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_p8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpf32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQs16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqs16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vexts16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_s16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqu16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpu8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_f32.x" -+ -+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_u16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqf32.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qu8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64s16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64s16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16q_u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16qu8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64u16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64u16.x" -+ -+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev64q_s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev64qs8.x" -+ -+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextp8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/ext_p8.x" -+ -+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzps16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzps16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpQs8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpqs8.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vuzpu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vuzpu16.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQs16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_s16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32s8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32s8.x" -+ -+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_s16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qs16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vextQu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/extq_u16.x" -+ -+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipf32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vtrnf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vtrnf32.x" -+ -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev32q_u16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev32qu16.x" -+ -+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu16' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu16.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu8.x" -+ -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vrev16u8' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+#include "../../aarch64/simd/vrev16u8.x" -+ -+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_neon_ok } */ -+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#define N 32 -+ -+float __attribute__((aligned(16))) input[N]; -+int __attribute__((aligned(16))) output[N]; -+ -+void -+foo () -+{ -+ int i = 0; -+ /* Vectorizable. */ -+ for (i = 0; i < N; i++) -+ output[i] = __builtin_lfloorf (input[i]); -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr51835.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr51835.c -@@ -13,5 +13,5 @@ - return (unsigned int)d; - } - --/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */ --/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */ -+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */ -+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */ ---- a/src/gcc/testsuite/gcc.target/arm/20031108-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/20031108-1.c -@@ -20,6 +20,9 @@ - - Rec_Pointer Ptr_Glob; - -+extern int Proc_7 (int, int, int *); -+ -+void - Proc_1 (Ptr_Val_Par) - Rec_Pointer Ptr_Val_Par; - { ---- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c -@@ -11,6 +11,8 @@ - - #define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5) - -+extern void foo (int *, int *); -+ - void - bar (uint32_t *ptr, int y) - { ---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c -@@ -5,8 +5,11 @@ - - #define N 32 - -+float __attribute__((aligned(16))) input[N]; -+float __attribute__((aligned(16))) output[N]; -+ - void --foo (float *output, float *input) -+foo () - { - int i = 0; - /* Vectorizable. */ ---- a/src/gcc/testsuite/gcc.target/arm/pr43920-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr43920-2.c -@@ -4,6 +4,8 @@ - - #include <stdio.h> - -+extern int lseek(int, long, int); -+ - int getFileStartAndLength (int fd, int *start_, size_t *length_) - { - int start, end; ---- a/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c -+++ b/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c -@@ -1,4 +1,4 @@ --/* { dg-do compile } */ -+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */ - /* { dg-options "-O1" } */ - - unsigned long long xor64 (unsigned long long input) ---- a/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_neon_ok } */ -+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#define N 32 -+ -+float __attribute__((aligned(16))) input[N]; -+int __attribute__((aligned(16))) output[N]; -+ -+void -+foo () -+{ -+ int i = 0; -+ /* Vectorizable. */ -+ for (i = 0; i < N; i++) -+ output[i] = __builtin_lroundf (input[i]); -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c -+++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c -@@ -0,0 +1,12 @@ -+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */ -+/* { dg-options "-O2 -march=armv5te -marm" } */ -+/* { dg-final { scan-assembler "bx" } } */ -+/* { dg-final { scan-assembler-not "blx" } } */ -+ -+int lcal (int) __attribute__ ((long_call)); -+ -+int -+dec (int a) -+{ -+ return lcal (a); -+} ---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c -@@ -5,8 +5,11 @@ - - #define N 32 - -+float __attribute__((aligned(16))) input[N]; -+float __attribute__((aligned(16))) output[N]; -+ - void --foo (float *output, float *input) -+foo () - { - int i = 0; - /* Vectorizable. */ ---- a/src/gcc/testsuite/gcc.target/arm/pr61948.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr61948.c -@@ -0,0 +1,16 @@ -+/* PR target/61948 */ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-require-effective-target arm_thumb2_ok } */ -+/* { dg-options "-O2 -mthumb" } */ -+/* { dg-add-options arm_neon } */ -+ -+long long f (long long *c) -+{ -+ long long t = c[0]; -+ asm ("nop" : : : "r0", "r3", "r4", "r5", -+ "r6", "r7", "r8", "r9", -+ "r10", "r11", "r12", "memory"); -+ return t >> 1; -+} -+ ---- a/src/gcc/testsuite/gcc.target/arm/pr51968.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr51968.c -@@ -1,6 +1,6 @@ - /* PR target/51968 */ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */ -+/* { dg-options "-O2 -Wno-implicit-function-declaration -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */ - /* { dg-require-effective-target arm_neon_ok } */ - - typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8))); ---- a/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_vfp_ok } */ -+/* { dg-options "-O2 -march=armv8-a -ffast-math" } */ -+/* { dg-add-options arm_v8_vfp } */ -+ -+int -+foofloat (float x) -+{ -+ return __builtin_lroundf (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvta.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */ -+ -+ -+int -+foodouble (double x) -+{ -+ return __builtin_lround (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvta.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr60650.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr60650.c -@@ -20,6 +20,10 @@ - int a, c, d; - long long e; - -+extern int foo1 (struct btrfs_root *, int, int, int); -+extern int foo2 (struct btrfs_root *, int, int); -+ -+int - truncate_one_csum (struct btrfs_root *p1, long long p2, long long p3) - { - int f, g, i = p1->fs_info->sb->s_blocksize_bits; ---- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c -@@ -11,40 +11,40 @@ - - void test_sf() { - /* abssf2_vfp */ -- /* { dg-final { scan-assembler "fabss" } } */ -+ /* { dg-final { scan-assembler "vabs.f32" } } */ - f1 = fabsf (f1); - /* negsf2_vfp */ -- /* { dg-final { scan-assembler "fnegs" } } */ -+ /* { dg-final { scan-assembler "vneg.f32" } } */ - f1 = -f1; - /* addsf3_vfp */ -- /* { dg-final { scan-assembler "fadds" } } */ -+ /* { dg-final { scan-assembler "vadd.f32" } } */ - f1 = f2 + f3; - /* subsf3_vfp */ -- /* { dg-final { scan-assembler "fsubs" } } */ -+ /* { dg-final { scan-assembler "vsub.f32" } } */ - f1 = f2 - f3; - /* divsf3_vfp */ -- /* { dg-final { scan-assembler "fdivs" } } */ -+ /* { dg-final { scan-assembler "vdiv.f32" } } */ - f1 = f2 / f3; - /* mulsf3_vfp */ -- /* { dg-final { scan-assembler "fmuls" } } */ -+ /* { dg-final { scan-assembler "vmul.f32" } } */ - f1 = f2 * f3; - /* mulsf3negsf_vfp */ -- /* { dg-final { scan-assembler "fnmuls" } } */ -+ /* { dg-final { scan-assembler "vnmul.f32" } } */ - f1 = -f2 * f3; - /* mulsf3addsf_vfp */ -- /* { dg-final { scan-assembler "fmacs" } } */ -+ /* { dg-final { scan-assembler "vmla.f32" } } */ - f1 = f2 * f3 + f1; - /* mulsf3subsf_vfp */ -- /* { dg-final { scan-assembler "fmscs" } } */ -+ /* { dg-final { scan-assembler "vnmls.f32" } } */ - f1 = f2 * f3 - f1; - /* mulsf3negsfaddsf_vfp */ -- /* { dg-final { scan-assembler "fnmacs" } } */ -+ /* { dg-final { scan-assembler "vmls.f32" } } */ - f1 = f2 - f3 * f1; - /* mulsf3negsfsubsf_vfp */ -- /* { dg-final { scan-assembler "fnmscs" } } */ -+ /* { dg-final { scan-assembler "vnmla.f32" } } */ - f1 = -f2 * f3 - f1; - /* sqrtsf2_vfp */ -- /* { dg-final { scan-assembler "fsqrts" } } */ -+ /* { dg-final { scan-assembler "vsqrt.f32" } } */ - f1 = sqrtf (f1); - } - -@@ -52,40 +52,40 @@ - - void test_df() { - /* absdf2_vfp */ -- /* { dg-final { scan-assembler "fabsd" } } */ -+ /* { dg-final { scan-assembler "vabs.f64" } } */ - d1 = fabs (d1); - /* negdf2_vfp */ -- /* { dg-final { scan-assembler "fnegd" } } */ -+ /* { dg-final { scan-assembler "vneg.f64" } } */ - d1 = -d1; - /* adddf3_vfp */ -- /* { dg-final { scan-assembler "faddd" } } */ -+ /* { dg-final { scan-assembler "vadd.f64" } } */ - d1 = d2 + d3; - /* subdf3_vfp */ -- /* { dg-final { scan-assembler "fsubd" } } */ -+ /* { dg-final { scan-assembler "vsub.f64" } } */ - d1 = d2 - d3; - /* divdf3_vfp */ -- /* { dg-final { scan-assembler "fdivd" } } */ -+ /* { dg-final { scan-assembler "vdiv.f64" } } */ - d1 = d2 / d3; - /* muldf3_vfp */ -- /* { dg-final { scan-assembler "fmuld" } } */ -+ /* { dg-final { scan-assembler "vmul.f64" } } */ - d1 = d2 * d3; - /* muldf3negdf_vfp */ -- /* { dg-final { scan-assembler "fnmuld" } } */ -+ /* { dg-final { scan-assembler "vnmul.f64" } } */ - d1 = -d2 * d3; - /* muldf3adddf_vfp */ -- /* { dg-final { scan-assembler "fmacd" } } */ -+ /* { dg-final { scan-assembler "vmla.f64" } } */ - d1 = d2 * d3 + d1; - /* muldf3subdf_vfp */ -- /* { dg-final { scan-assembler "fmscd" } } */ -+ /* { dg-final { scan-assembler "vnmls.f64" } } */ - d1 = d2 * d3 - d1; - /* muldf3negdfadddf_vfp */ -- /* { dg-final { scan-assembler "fnmacd" } } */ -+ /* { dg-final { scan-assembler "vmls.f64" } } */ - d1 = d2 - d3 * d1; - /* muldf3negdfsubdf_vfp */ -- /* { dg-final { scan-assembler "fnmscd" } } */ -+ /* { dg-final { scan-assembler "vnmla.f64" } } */ - d1 = -d2 * d3 - d1; - /* sqrtdf2_vfp */ -- /* { dg-final { scan-assembler "fsqrtd" } } */ -+ /* { dg-final { scan-assembler "vsqrt.f64" } } */ - d1 = sqrt (d1); - } - -@@ -94,46 +94,46 @@ - - void test_convert () { - /* extendsfdf2_vfp */ -- /* { dg-final { scan-assembler "fcvtds" } } */ -+ /* { dg-final { scan-assembler "vcvt.f64.f32" } } */ - d1 = f1; - /* truncdfsf2_vfp */ -- /* { dg-final { scan-assembler "fcvtsd" } } */ -+ /* { dg-final { scan-assembler "vcvt.f32.f64" } } */ - f1 = d1; - /* truncsisf2_vfp */ -- /* { dg-final { scan-assembler "ftosizs" } } */ -+ /* { dg-final { scan-assembler "vcvt.s32.f32" } } */ - i1 = f1; - /* truncsidf2_vfp */ -- /* { dg-final { scan-assembler "ftosizd" } } */ -+ /* { dg-final { scan-assembler "vcvt.s32.f64" } } */ - i1 = d1; - /* fixuns_truncsfsi2 */ -- /* { dg-final { scan-assembler "ftouizs" } } */ -+ /* { dg-final { scan-assembler "vcvt.u32.f32" } } */ - u1 = f1; - /* fixuns_truncdfsi2 */ -- /* { dg-final { scan-assembler "ftouizd" } } */ -+ /* { dg-final { scan-assembler "vcvt.u32.f64" } } */ - u1 = d1; - /* floatsisf2_vfp */ -- /* { dg-final { scan-assembler "fsitos" } } */ -+ /* { dg-final { scan-assembler "vcvt.f32.s32" } } */ - f1 = i1; - /* floatsidf2_vfp */ -- /* { dg-final { scan-assembler "fsitod" } } */ -+ /* { dg-final { scan-assembler "vcvt.f64.s32" } } */ - d1 = i1; - /* floatunssisf2 */ -- /* { dg-final { scan-assembler "fuitos" } } */ -+ /* { dg-final { scan-assembler "vcvt.f32.u32" } } */ - f1 = u1; - /* floatunssidf2 */ -- /* { dg-final { scan-assembler "fuitod" } } */ -+ /* { dg-final { scan-assembler "vcvt.f64.u32" } } */ - d1 = u1; - } - - void test_ldst (float f[], double d[]) { -- /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */ -- /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ -+ /* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */ -+ /* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ - /* { dg-final { scan-assembler "add.+ r0, #1024" } } */ -- /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\]\\\]\n" } } */ -+ /* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */ - f[256] = f[255] + f[-255]; - -- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */ -- /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ -- /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */ -+ /* { dg-final { scan-assembler "vldr.64.+ \\\[r1, #1016\\\]" } } */ -+ /* { dg-final { scan-assembler "vldr.64.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */ -+ /* { dg-final { scan-assembler "vstr.64.+ \\\[r1, #256\\\]" } } */ - d[32] = d[127] + d[-127]; - } ---- a/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c -@@ -0,0 +1,36 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_hw } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ -+/* { dg-add-options "arm_neon" } */ -+ -+extern void abort (); -+ -+#define N 16 -+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, -+ -12.5f, -15.6f, -18.7f, -21.8f, -+ 24.9f, 27.1f, 30.2f, 33.3f, -+ 36.4f, 39.5f, 42.6f, 45.7f}; -+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, -+ -9.0f, 1.0f, -2.0f, 3.0f, -+ -4.0f, -5.0f, 6.0f, 7.0f, -+ -8.0f, -9.0f, 10.0f, 11.0f}; -+float r[N]; -+ -+int -+main (void) -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ r[i] = __builtin_copysignf (a[i], b[i]); -+ -+ /* check results: */ -+ for (i = 0; i < N; i++) -+ if (r[i] != __builtin_copysignf (a[i], b[i])) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/rev16.c -+++ b/src/gcc/testsuite/gcc.target/arm/rev16.c -@@ -0,0 +1,35 @@ -+/* { dg-options "-O2" } */ -+/* { dg-do run } */ -+ -+extern void abort (void); -+ -+typedef unsigned int __u32; -+ -+__u32 -+__rev16_32_alt (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) -+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); -+} -+ -+__u32 -+__rev16_32 (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) -+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); -+} -+ -+int -+main (void) -+{ -+ volatile __u32 in32 = 0x12345678; -+ volatile __u32 expected32 = 0x34127856; -+ -+ if (__rev16_32 (in32) != expected32) -+ abort (); -+ -+ if (__rev16_32_alt (in32) != expected32) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c -@@ -0,0 +1,65 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline --save-temps" } */ -+ -+extern void abort (void); -+ -+typedef long long s64int; -+typedef int s32int; -+typedef unsigned long long u64int; -+typedef unsigned int u32int; -+ -+s64int -+anddi_di_notdi (s64int a, s64int b) -+{ -+ return (a & ~b); -+} -+ -+s64int -+anddi_di_notzesidi (s64int a, u32int b) -+{ -+ return (a & ~(u64int) b); -+} -+ -+s64int -+anddi_notdi_zesidi (s64int a, u32int b) -+{ -+ return (~a & (u64int) b); -+} -+ -+s64int -+anddi_di_notsesidi (s64int a, s32int b) -+{ -+ return (a & ~(s64int) b); -+} -+ -+int main () -+{ -+ s64int a64 = 0xdeadbeef0000ffffll; -+ s64int b64 = 0x000000005f470112ll; -+ s64int c64 = 0xdeadbeef300f0000ll; -+ -+ u32int c32 = 0x01124f4f; -+ s32int d32 = 0xabbaface; -+ -+ s64int z = anddi_di_notdi (c64, b64); -+ if (z != 0xdeadbeef20080000ll) -+ abort (); -+ -+ z = anddi_di_notzesidi (a64, c32); -+ if (z != 0xdeadbeef0000b0b0ll) -+ abort (); -+ -+ z = anddi_notdi_zesidi (c64, c32); -+ if (z != 0x0000000001104f4fll) -+ abort (); -+ -+ z = anddi_di_notsesidi (a64, d32); -+ if (z != 0x0000000000000531ll) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "bic\t" 6 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr63210.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr63210.c -@@ -0,0 +1,12 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-mthumb -Os " } */ -+/* { dg-require-effective-target arm_thumb1_ok } */ -+ -+int foo1 (int c); -+int foo2 (int c); -+ -+int test (int c) -+{ -+ return (foo1 (c) || foo2 (c)); -+} -+/* { dg-final { object-size text <= 28 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr60606-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int -+f (void) -+{ -+ register unsigned pc asm ("pc"); /* { dg-error "not general enough" } */ -+ -+ return pc > 0x12345678; -+} ---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c -+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c -@@ -5,8 +5,11 @@ - - #define N 32 - -+float __attribute__((aligned(16))) input[N]; -+float __attribute__((aligned(16))) output[N]; -+ - void --foo (float *output, float *input) -+foo () - { - int i = 0; - /* Vectorizable. */ ---- a/src/gcc/testsuite/gcc.target/arm/pr60650-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr60650-2.c -@@ -4,17 +4,19 @@ - int a, h, j; - long long d, e, i; - int f; -+int - fn1 (void *p1, int p2) - { - switch (p2) - case 8: - { -- register b = *(long long *) p1, c asm ("r2"); -+ register int b = *(long long *) p1, c asm ("r2"); - asm ("%0": "=r" (a), "=r" (c):"r" (b), "r" (0)); - *(long long *) p1 = c; - } - } - -+int - fn2 () - { - int k; -@@ -27,8 +29,8 @@ - case 0: - ( - { -- register l asm ("r4"); -- register m asm ("r0"); -+ register int l asm ("r4"); -+ register int m asm ("r0"); - asm (" .err .endif\n\t": "=r" (h), "=r" (j):"r" (m), - "r" - (l));; ---- a/src/gcc/testsuite/gcc.target/arm/pr55642.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr55642.c -@@ -2,6 +2,8 @@ - /* { dg-do compile } */ - /* { dg-require-effective-target arm_thumb2_ok } */ - -+extern int abs (int); -+ - int - foo (int v) - { ---- a/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_vfp_ok } */ -+/* { dg-options "-O2 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_vfp } */ -+ -+int -+foofloat (float x) -+{ -+ return __builtin_lfloorf (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */ -+ -+ -+int -+foodouble (double x) -+{ -+ return __builtin_lfloor (x); -+} -+ -+/* { dg-final { scan-assembler-times "vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr60606-3.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-3.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+int -+f (void) -+{ -+ register unsigned int r asm ("cc"); /* { dg-error "not general enough|suitable for data type" } */ -+ return r; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c -@@ -0,0 +1,19 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * withoug outgoing. -+ * total frame size <= 256. -+ * number of callee-save reg == 1. -+ * optimized code should use "str !" for stack adjustment. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test1, 200, ) -+t_frame_run (test1) -+ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c -@@ -0,0 +1,17 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * with outgoing. -+ * total frame size > 512. -+ area except outgoing <= 512 -+ * number of callee-saved reg = 1. -+ * Split stack adjustment into two subtractions. -+ the first subtractions couldn't be optimized -+ into "str !" as it's > 256. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test9, 480, , 24, a[8], a[9], a[10]) -+t_frame_run (test9) ---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c -@@ -0,0 +1,97 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -fno-inline" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define VARIANTS(VARIANT, STRUCT) \ -+VARIANT (uint8, , 8, _u8, 6, STRUCT) \ -+VARIANT (uint16, , 4, _u16, 3, STRUCT) \ -+VARIANT (uint32, , 2, _u32, 1, STRUCT) \ -+VARIANT (uint64, , 1, _u64, 0, STRUCT) \ -+VARIANT (int8, , 8, _s8, 5, STRUCT) \ -+VARIANT (int16, , 4, _s16, 2, STRUCT) \ -+VARIANT (int32, , 2, _s32, 0, STRUCT) \ -+VARIANT (int64, , 1, _s64, 0, STRUCT) \ -+VARIANT (poly8, , 8, _p8, 7, STRUCT) \ -+VARIANT (poly16, , 4, _p16, 1, STRUCT) \ -+VARIANT (float32, , 2, _f32, 1, STRUCT) \ -+VARIANT (float64, , 1, _f64, 0, STRUCT) \ -+VARIANT (uint8, q, 16, _u8, 14, STRUCT) \ -+VARIANT (uint16, q, 8, _u16, 4, STRUCT) \ -+VARIANT (uint32, q, 4, _u32, 3, STRUCT) \ -+VARIANT (uint64, q, 2, _u64, 0, STRUCT) \ -+VARIANT (int8, q, 16, _s8, 13, STRUCT) \ -+VARIANT (int16, q, 8, _s16, 6, STRUCT) \ -+VARIANT (int32, q, 4, _s32, 2, STRUCT) \ -+VARIANT (int64, q, 2, _s64, 1, STRUCT) \ -+VARIANT (poly8, q, 16, _p8, 12, STRUCT) \ -+VARIANT (poly16, q, 8, _p16, 5, STRUCT) \ -+VARIANT (float32, q, 4, _f32, 1, STRUCT)\ -+VARIANT (float64, q, 2, _f64, 0, STRUCT) -+ -+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ -+int \ -+test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data, \ -+ const BASE##_t *overwrite) \ -+{ \ -+ BASE##x##ELTS##x##STRUCT##_t vectors; \ -+ BASE##_t temp[ELTS]; \ -+ int i,j; \ -+ for (i = 0; i < STRUCT; i++, data += ELTS) \ -+ vectors.val[i] = vld1##Q##SUFFIX (data); \ -+ vectors = vld##STRUCT##Q##_lane##SUFFIX (overwrite, vectors, LANE); \ -+ while (--i >= 0) \ -+ { \ -+ vst1##Q##SUFFIX (temp, vectors.val[i]); \ -+ data -= ELTS; /* Point at value loaded before vldN_lane. */ \ -+ for (j = 0; j < ELTS; j++) \ -+ if (temp[j] != (j == LANE ? overwrite[i] : data[j])) \ -+ return 1; \ -+ } \ -+ return 0; \ -+} -+ -+ -+/* Tests of vld2_dup and vld2q_dup. */ -+VARIANTS (TESTMETH, 2) -+/* Tests of vld3_dup and vld3q_dup. */ -+VARIANTS (TESTMETH, 3) -+/* Tests of vld4_dup and vld4q_dup. */ -+VARIANTS (TESTMETH, 4) -+ -+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \ -+ if (test_vld##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data, \ -+ BASE##_data) != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ /* Original data for all vector formats. */ -+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL, -+ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL, -+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL, -+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL}; -+ -+ /* Data with which vldN_lane will overwrite some of previous. */ -+ uint8_t uint8_data[4] = { 7, 11, 13, 17 }; -+ uint16_t uint16_data[4] = { 257, 263, 269, 271 }; -+ uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 }; -+ uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL, -+ 0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; -+ int8_t int8_data[4] = { -1, 3, -5, 7 }; -+ int16_t int16_data[4] = { 257, -259, 261, -263 }; -+ int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 }; -+ int64_t *int64_data = (int64_t *)uint64_data; -+ poly8_t poly8_data[4] = { 0, 7, 13, 18, }; -+ poly16_t poly16_data[4] = { 11111, 2222, 333, 44 }; -+ float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; -+ float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 }; -+ -+ VARIANTS (CHECK, 2); -+ VARIANTS (CHECK, 3); -+ VARIANTS (CHECK, 4); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c -@@ -0,0 +1,79 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX) \ -+int __attribute__ ((noinline)) \ -+test_vld##STRUCT##SUFFIX () \ -+{ \ -+ BASE##_t data[ELTS * STRUCT]; \ -+ BASE##_t temp[ELTS]; \ -+ BASE##x##ELTS##x##STRUCT##_t vectors; \ -+ int i,j; \ -+ for (i = 0; i < STRUCT * ELTS; i++) \ -+ data [i] = (BASE##_t) 2*i + 1; \ -+ asm volatile ("" : : : "memory"); \ -+ vectors = vld##STRUCT##SUFFIX (data); \ -+ for (i = 0; i < STRUCT; i++) \ -+ { \ -+ vst1##SUFFIX (temp, vectors.val[i]); \ -+ asm volatile ("" : : : "memory"); \ -+ for (j = 0; j < ELTS; j++) \ -+ if (temp[j] != data[i + STRUCT*j]) \ -+ return 1; \ -+ } \ -+ return 0; \ -+} -+ -+#define VARIANTS(VARIANT, STRUCT) \ -+VARIANT (uint8, 8, STRUCT, _u8) \ -+VARIANT (uint16, 4, STRUCT, _u16) \ -+VARIANT (uint32, 2, STRUCT, _u32) \ -+VARIANT (uint64, 1, STRUCT, _u64) \ -+VARIANT (int8, 8, STRUCT, _s8) \ -+VARIANT (int16, 4, STRUCT, _s16) \ -+VARIANT (int32, 2, STRUCT, _s32) \ -+VARIANT (int64, 1, STRUCT, _s64) \ -+VARIANT (poly8, 8, STRUCT, _p8) \ -+VARIANT (poly16, 4, STRUCT, _p16) \ -+VARIANT (float32, 2, STRUCT, _f32) \ -+VARIANT (float64, 1, STRUCT, _f64) \ -+VARIANT (uint8, 16, STRUCT, q_u8) \ -+VARIANT (uint16, 8, STRUCT, q_u16) \ -+VARIANT (uint32, 4, STRUCT, q_u32) \ -+VARIANT (uint64, 2, STRUCT, q_u64) \ -+VARIANT (int8, 16, STRUCT, q_s8) \ -+VARIANT (int16, 8, STRUCT, q_s16) \ -+VARIANT (int32, 4, STRUCT, q_s32) \ -+VARIANT (int64, 2, STRUCT, q_s64) \ -+VARIANT (poly8, 16, STRUCT, q_p8) \ -+VARIANT (poly16, 8, STRUCT, q_p16) \ -+VARIANT (float32, 4, STRUCT, q_f32) \ -+VARIANT (float64, 2, STRUCT, q_f64) -+ -+/* Tests of vld2 and vld2q. */ -+VARIANTS (TESTMETH, 2) -+ -+/* Tests of vld3 and vld3q. */ -+VARIANTS (TESTMETH, 3) -+ -+/* Tests of vld4 and vld4q. */ -+VARIANTS (TESTMETH, 4) -+ -+#define CHECK(BASE, ELTS, STRUCT, SUFFIX) \ -+ if (test_vld##STRUCT##SUFFIX () != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ VARIANTS (CHECK, 2) -+ VARIANTS (CHECK, 3) -+ VARIANTS (CHECK, 4) -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c -@@ -0,0 +1,54 @@ -+/* Test vqabs_s64 intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+int __attribute__ ((noinline)) -+test_vqabs_s64 (int64x1_t passed, int64_t expected) -+{ -+ return vget_lane_s64 (vqabs_s64 (passed), 0) != expected; -+} -+ -+int __attribute__ ((noinline)) -+test_vqabsd_s64 (int64_t passed, int64_t expected) -+{ -+ return vqabsd_s64 (passed) != expected; -+} -+ -+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */ -+ -+int -+main (int argc, char **argv) -+{ -+ /* Basic test. */ -+ if (test_vqabs_s64 (vcreate_s64 (-1), 1)) -+ abort (); -+ if (test_vqabsd_s64 (-1, 1)) -+ abort (); -+ -+ /* Getting absolute value of min int64_t. -+ Note, exact result cannot be represented in int64_t, -+ so max int64_t is expected. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff)) -+ abort (); -+ -+ /* Another input that gets max int64_t. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff)) -+ abort (); -+ -+ /* Checking that large positive numbers stay the same. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff)) -+ abort (); -+ -+ return 0; -+} -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp -@@ -0,0 +1,35 @@ -+# Copyright (C) 2014 Free Software Foundation, Inc. -+ -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if ![istarget aarch64*-*-*] then { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" "" -+ -+# All done. -+dg-finish ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c -@@ -0,0 +1,15 @@ -+/* Test the crc32b ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32b (uint32_t arg0, uint8_t arg1) -+{ -+ return __crc32b (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c -@@ -0,0 +1,15 @@ -+/* Test the crc32d ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32d (uint32_t arg0, uint64_t arg1) -+{ -+ return __crc32d (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c -@@ -0,0 +1,15 @@ -+/* Test the crc32cb ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32cb (uint32_t arg0, uint8_t arg1) -+{ -+ return __crc32cb (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c -@@ -0,0 +1,15 @@ -+/* Test the crc32cd ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32cd (uint32_t arg0, uint64_t arg1) -+{ -+ return __crc32cd (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c -@@ -0,0 +1,15 @@ -+/* Test the crc32w ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32w (uint32_t arg0, uint32_t arg1) -+{ -+ return __crc32w (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c -@@ -0,0 +1,15 @@ -+/* Test the crc32h ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32h (uint32_t arg0, uint16_t arg1) -+{ -+ return __crc32h (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c -@@ -0,0 +1,15 @@ -+/* Test the crc32cw ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32cw (uint32_t arg0, uint32_t arg1) -+{ -+ return __crc32cw (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c -@@ -0,0 +1,15 @@ -+/* Test the crc32ch ACLE intrinsic. */ -+ -+/* { dg-do assemble } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ -+ -+#include "arm_acle.h" -+ -+uint32_t -+test_crc32ch (uint32_t arg0, uint16_t arg1) -+{ -+ return __crc32ch (arg0, arg1); -+} -+ -+/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -@@ -0,0 +1,18 @@ -+/* Verify: -+ * without outgoing. -+ * total frame size > 512. -+ * number of callee-save reg >= 2. -+ * split the stack adjustment into two substractions, -+ the second could be optimized into "stp !". */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test13, 700, ) -+t_frame_run (test13) -+ -+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -@@ -0,0 +1,20 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * without outgoing. -+ * total frame size <= 256. -+ * number of callee-save regs >= 2. -+ * optimized code should use "stp !" for stack adjustment. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test2, 200, "x19") -+t_frame_run (test2) -+ -+ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-rtl-expand" } */ -+ -+extern void initialize_array (unsigned char *, int); -+ -+int -+test15 (void) -+{ -+ unsigned char a[480]; -+ -+ initialize_array (a, 480); -+ -+ if (a[0] == 0x10) -+ return 1; -+ -+ return 0; -+} -+ -+/* { dg-final { scan-rtl-dump "\\(mem\[^\\n\]*\\(plus\[^\\n\]*virtual-stack-vars" "expand" } } */ -+ -+/* { dg-final { cleanup-rtl-dump "expand" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c -@@ -0,0 +1,596 @@ -+/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define ABS(a) __builtin_fabs (a) -+#define ISNAN(a) __builtin_isnan (a) -+ -+#define DOUBLE_EQUALS(a, b, epsilon) \ -+( \ -+ ((a) == (b)) \ -+ || (ISNAN (a) && ISNAN (b)) \ -+ || (ABS (a - b) < epsilon) \ -+) -+ -+/* Pi accurate up to 16 digits. -+ Further digits are a closest binary approximation. */ -+#define PI_F64 3.14159265358979311599796346854 -+/* Hex representation in Double (IEEE754 Double precision 64-bit) is: -+ 0x400921FB54442D18. */ -+ -+/* E accurate up to 16 digits. -+ Further digits are a closest binary approximation. */ -+#define E_F64 2.71828182845904509079559829843 -+/* Hex representation in Double (IEEE754 Double precision 64-bit) is: -+ 0x4005BF0A8B145769. */ -+ -+float32x2_t __attribute__ ((noinline)) -+wrap_vreinterpret_f32_f64 (float64x1_t __a) -+{ -+ return vreinterpret_f32_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f32_f64 () -+{ -+ float64x1_t a; -+ float32x2_t b; -+ float64_t c[1] = { PI_F64 }; -+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ -+ float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; -+ float32_t e[2]; -+ int i; -+ -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_f32_f64 (a); -+ vst1_f32 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+int8x8_t __attribute__ ((noinline)) -+wrap_vreinterpret_s8_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s8_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_s8_f64 () -+{ -+ float64x1_t a; -+ int8x8_t b; -+ float64_t c[1] = { PI_F64 }; -+ int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; -+ int8_t e[8]; -+ int i; -+ -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s8_f64 (a); -+ vst1_s8 (e, b); -+ for (i = 0; i < 8; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int16x4_t __attribute__ ((noinline)) -+wrap_vreinterpret_s16_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s16_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_s16_f64 () -+{ -+ float64x1_t a; -+ int16x4_t b; -+ float64_t c[1] = { PI_F64 }; -+ int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; -+ int16_t e[4]; -+ int i; -+ -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s16_f64 (a); -+ vst1_s16 (e, b); -+ for (i = 0; i < 4; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int32x2_t __attribute__ ((noinline)) -+wrap_vreinterpret_s32_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s32_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_s32_f64 () -+{ -+ float64x1_t a; -+ int32x2_t b; -+ float64_t c[1] = { PI_F64 }; -+ int32_t d[2] = { 0x54442D18, 0x400921FB }; -+ int32_t e[2]; -+ int i; -+ -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s32_f64 (a); -+ vst1_s32 (e, b); -+ for (i = 0; i < 2; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_s64_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s64_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_s64_f64 () -+{ -+ float64x1_t a; -+ int64x1_t b; -+ float64_t c[1] = { PI_F64 }; -+ int64_t d[1] = { 0x400921FB54442D18 }; -+ int64_t e[1]; -+ int i; -+ -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s64_f64 (a); -+ vst1_s64 (e, b); -+ if (d[0] != e[0]) -+ return 1; -+ return 0; -+}; -+ -+float32x4_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f32_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_f32_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f32_f64 () -+{ -+ float64x2_t a; -+ float32x4_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ -+ /* Values corresponding to f32 reinterpret of -+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ -+ float32_t d[4] = { 3.3702805504E12, -+ 2.1426990032196044921875E0, -+ -2.8569523269651966444143014594E-32, -+ 2.089785099029541015625E0 }; -+ float32_t e[4]; -+ int i; -+ -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_f32_f64 (a); -+ vst1q_f32 (e, b); -+ for (i = 0; i < 4; i++) -+ { -+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) -+ return 1; -+ } -+ return 0; -+}; -+ -+int8x16_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s8_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s8_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_s8_f64 () -+{ -+ float64x2_t a; -+ int8x16_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, -+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; -+ int8_t e[16]; -+ int i; -+ -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s8_f64 (a); -+ vst1q_s8 (e, b); -+ for (i = 0; i < 16; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int16x8_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s16_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s16_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_s16_f64 () -+{ -+ float64x2_t a; -+ int16x8_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, -+ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; -+ int16_t e[8]; -+ int i; -+ -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s16_f64 (a); -+ vst1q_s16 (e, b); -+ for (i = 0; i < 8; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int32x4_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s32_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s32_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_s32_f64 () -+{ -+ float64x2_t a; -+ int32x4_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; -+ int32_t e[4]; -+ int i; -+ -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s32_f64 (a); -+ vst1q_s32 (e, b); -+ for (i = 0; i < 4; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+int64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s64_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s64_f64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_s64_f64 () -+{ -+ float64x2_t a; -+ int64x2_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; -+ int64_t e[2]; -+ int i; -+ -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s64_f64 (a); -+ vst1q_s64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_f32 (float32x2_t __a) -+{ -+ return vreinterpret_f64_f32 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_f32 () -+{ -+ float32x2_t a; -+ float64x1_t b; -+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ -+ float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; -+ -+ a = vld1_f32 (c); -+ b = wrap_vreinterpret_f64_f32 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s8 (int8x8_t __a) -+{ -+ return vreinterpret_f64_s8 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s8 () -+{ -+ int8x8_t a; -+ float64x1_t b; -+ int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; -+ -+ a = vld1_s8 (c); -+ b = wrap_vreinterpret_f64_s8 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s16 (int16x4_t __a) -+{ -+ return vreinterpret_f64_s16 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s16 () -+{ -+ int16x4_t a; -+ float64x1_t b; -+ int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; -+ -+ a = vld1_s16 (c); -+ b = wrap_vreinterpret_f64_s16 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s32 (int32x2_t __a) -+{ -+ return vreinterpret_f64_s32 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s32 () -+{ -+ int32x2_t a; -+ float64x1_t b; -+ int32_t c[2] = { 0x54442D18, 0x400921FB }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; -+ -+ a = vld1_s32 (c); -+ b = wrap_vreinterpret_f64_s32 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s64 (int64x1_t __a) -+{ -+ return vreinterpret_f64_s64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s64 () -+{ -+ int64x1_t a; -+ float64x1_t b; -+ int64_t c[1] = { 0x400921FB54442D18 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ -+ a = vld1_s64 (c); -+ b = wrap_vreinterpret_f64_s64 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_f32 (float32x4_t __a) -+{ -+ return vreinterpretq_f64_f32 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_f32 () -+{ -+ float32x4_t a; -+ float64x2_t b; -+ /* Values corresponding to f32 reinterpret of -+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ -+ float32_t c[4] = { 3.3702805504E12, -+ 2.1426990032196044921875E0, -+ -2.8569523269651966444143014594E-32, -+ 2.089785099029541015625E0 }; -+ -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; -+ -+ a = vld1q_f32 (c); -+ b = wrap_vreinterpretq_f64_f32 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s8 (int8x16_t __a) -+{ -+ return vreinterpretq_f64_s8 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s8 () -+{ -+ int8x16_t a; -+ float64x2_t b; -+ int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, -+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; -+ -+ a = vld1q_s8 (c); -+ b = wrap_vreinterpretq_f64_s8 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s16 (int16x8_t __a) -+{ -+ return vreinterpretq_f64_s16 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s16 () -+{ -+ int16x8_t a; -+ float64x2_t b; -+ int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, -+ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; -+ -+ a = vld1q_s16 (c); -+ b = wrap_vreinterpretq_f64_s16 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s32 (int32x4_t __a) -+{ -+ return vreinterpretq_f64_s32 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s32 () -+{ -+ int32x4_t a; -+ float64x2_t b; -+ int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; -+ -+ a = vld1q_s32 (c); -+ b = wrap_vreinterpretq_f64_s32 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s64 (int64x2_t __a) -+{ -+ return vreinterpretq_f64_s64 (__a); -+} -+ -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s64 () -+{ -+ int64x2_t a; -+ float64x2_t b; -+ int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; -+ -+ a = vld1q_s64 (c); -+ b = wrap_vreinterpretq_f64_s64 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; -+ -+int -+main (int argc, char **argv) -+{ -+ if (test_vreinterpret_f32_f64 ()) -+ abort (); -+ -+ if (test_vreinterpret_s8_f64 ()) -+ abort (); -+ if (test_vreinterpret_s16_f64 ()) -+ abort (); -+ if (test_vreinterpret_s32_f64 ()) -+ abort (); -+ if (test_vreinterpret_s64_f64 ()) -+ abort (); -+ -+ if (test_vreinterpretq_f32_f64 ()) -+ abort (); -+ -+ if (test_vreinterpretq_s8_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s16_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s32_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s64_f64 ()) -+ abort (); -+ -+ if (test_vreinterpret_f64_f32 ()) -+ abort (); -+ -+ if (test_vreinterpret_f64_s8 ()) -+ abort (); -+ if (test_vreinterpret_f64_s16 ()) -+ abort (); -+ if (test_vreinterpret_f64_s32 ()) -+ abort (); -+ if (test_vreinterpret_f64_s64 ()) -+ abort (); -+ -+ if (test_vreinterpretq_f64_f32 ()) -+ abort (); -+ -+ if (test_vreinterpretq_f64_s8 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s16 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s32 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s64 ()) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c -@@ -21,6 +21,6 @@ - leaf (); - } - --/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ - - /* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vect.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x -@@ -2,6 +2,7 @@ - typedef unsigned int *__restrict__ pRUINT; - typedef long long *__restrict__ pRINT64; - typedef unsigned long long *__restrict__ pRUINT64; -+extern int abs (int j); - - void test_orn (pRUINT a, pRUINT b, pRUINT c) - { ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c -@@ -0,0 +1,12 @@ -+/* Verify: -+ * with outgoing. -+ * total frame size > 512. -+ * number of callee-save reg >= 2. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test14, 700, , 8, a[8]) -+t_frame_run (test14) ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c -@@ -0,0 +1,14 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * without outgoing. -+ * total frame size <= 512 but > 256. -+ * number of callee-save reg == 1. -+ * we can't use "str !" to optimize stack adjustment. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test3, 400, ) -+t_frame_run (test3) ---- a/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c -@@ -2,10 +2,13 @@ - /* { dg-do compile } */ - - extern int __finite (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__)); -+extern int __finitef (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__)); -+extern int __signbit (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__)); -+extern int __signbitf (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__)); - int - __ecvt_r (value, ndigit, decpt, sign, buf, len) - double value; -- int ndigit, *decpt, *sign; -+ int ndigit, *decpt, *sign, len; - char *buf; - { - if ((sizeof (value) == sizeof (float) ? __finitef (value) : __finite (value)) && value != 0.0) ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c -@@ -0,0 +1,27 @@ -+/* Test the vpaddd_s64 AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3" } */ -+ -+#include "arm_neon.h" -+ -+#define SIZE 6 -+ -+extern void abort (void); -+ -+int64_t in[SIZE] = { -4l, 4l, -2l, 2l, -1l, 1l }; -+ -+int -+main (void) -+{ -+ int i; -+ -+ for (i = 0; i < SIZE / 2; ++i) -+ if (vpaddd_s64 (vld1q_s64 (in + 2 * i)) != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+int16x8_t -+test_vextq_s16_1 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 1); -+} -+ -+int16x8_t -+test_vextq_s16_2 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 2); -+} -+ -+int16x8_t -+test_vextq_s16_3 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 3); -+} -+ -+int16x8_t -+test_vextq_s16_4 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 4); -+} -+ -+int16x8_t -+test_vextq_s16_5 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 5); -+} -+ -+int16x8_t -+test_vextq_s16_6 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 6); -+} -+ -+int16x8_t -+test_vextq_s16_7 (int16x8_t a, int16x8_t b) -+{ -+ return vextq_s16 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ int16x8_t in1 = vld1q_s16 (arr1); -+ int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ int16x8_t in2 = vld1q_s16 (arr2); -+ int16_t exp[8]; -+ int16x8_t expected; -+ int16x8_t actual = test_vextq_s16_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s16_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1q_s16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c -@@ -0,0 +1,27 @@ -+/* Test the vpaddd_u64 AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3" } */ -+ -+#include "arm_neon.h" -+ -+#define SIZE 6 -+ -+extern void abort (void); -+ -+uint64_t in[SIZE] = { 4ul, 4ul, 2ul, 2ul, 1ul, 1ul }; -+ -+int -+main (void) -+{ -+ int i; -+ -+ for (i = 0; i < SIZE / 2; ++i) -+ if (vpaddd_u64 (vld1q_u64 (in + 2 * i)) != 2 * in[2 * i]) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+uint8x8_t -+test_vext_u8_1 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 1); -+} -+ -+uint8x8_t -+test_vext_u8_2 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 2); -+} -+ -+uint8x8_t -+test_vext_u8_3 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 3); -+} -+ -+uint8x8_t -+test_vext_u8_4 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 4); -+} -+ -+uint8x8_t -+test_vext_u8_5 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 5); -+} -+ -+uint8x8_t -+test_vext_u8_6 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 6); -+} -+ -+uint8x8_t -+test_vext_u8_7 (uint8x8_t a, uint8x8_t b) -+{ -+ return vext_u8 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ uint8x8_t in1 = vld1_u8 (arr1); -+ uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ uint8x8_t in2 = vld1_u8 (arr2); -+ uint8_t exp[8]; -+ uint8x8_t expected; -+ uint8x8_t actual = test_vext_u8_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u8_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1_u8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+uint16x8_t -+test_vextq_u16_1 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 1); -+} -+ -+uint16x8_t -+test_vextq_u16_2 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 2); -+} -+ -+uint16x8_t -+test_vextq_u16_3 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 3); -+} -+ -+uint16x8_t -+test_vextq_u16_4 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 4); -+} -+ -+uint16x8_t -+test_vextq_u16_5 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 5); -+} -+ -+uint16x8_t -+test_vextq_u16_6 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 6); -+} -+ -+uint16x8_t -+test_vextq_u16_7 (uint16x8_t a, uint16x8_t b) -+{ -+ return vextq_u16 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ uint16x8_t in1 = vld1q_u16 (arr1); -+ uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ uint16x8_t in2 = vld1q_u16 (arr2); -+ uint16_t exp[8]; -+ uint16x8_t expected; -+ uint16x8_t actual = test_vextq_u16_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u16_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1q_u16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzips16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+int16x8x2_t -+test_vuzpqs16 (int16x8_t _a, int16x8_t _b) -+{ -+ return vuzpq_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second)); -+ int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ int16x8_t expect1 = vld1q_s16 (exp1); -+ int16x8_t expect2 = vld1q_s16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqs8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qp8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnu16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+uint16x8x2_t -+test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b) -+{ -+ return vuzpq_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second)); -+ uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ uint16x8_t expect1 = vld1q_u16 (exp1); -+ uint16x8_t expect2 = vld1q_u16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+uint8x8x2_t -+test_vuzpu8 (uint8x8_t _a, uint8x8_t _b) -+{ -+ return vuzp_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second)); -+ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ uint8x8_t expect1 = vld1_u8 (exp1); -+ uint8x8_t expect2 = vld1_u8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextu16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_u16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQu8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_u8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x16_t -+test_vrev64qu8 (uint8x16_t _arg) -+{ -+ return vrev64q_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x16_t reversed = test_vrev64qu8 (inorder); -+ uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32p8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+int32x2x2_t -+test_vuzps32 (int32x2_t _a, int32x2_t _b) -+{ -+ return vuzp_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2}; -+ int32_t second[] = {3, 4}; -+ int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second)); -+ int32_t exp1[] = {1, 3}; -+ int32_t exp2[] = {2, 4}; -+ int32x2_t expect1 = vld1_s32 (exp1); -+ int32x2_t expect2 = vld1_s32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x -@@ -0,0 +1,17 @@ -+extern void abort (void); -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int64_t arr1[] = {0}; -+ int64x1_t in1 = vld1_s64 (arr1); -+ int64_t arr2[] = {1}; -+ int64x1_t in2 = vld1_s64 (arr2); -+ int64x1_t actual = vext_s64 (in1, in2, 0); -+ if (actual != in1) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+uint32x2x2_t -+test_vuzpu32 (uint32x2_t _a, uint32x2_t _b) -+{ -+ return vuzp_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2}; -+ uint32_t second[] = {3, 4}; -+ uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second)); -+ uint32_t exp1[] = {1, 3}; -+ uint32_t exp2[] = {2, 4}; -+ uint32x2_t expect1 = vld1_u32 (exp1); -+ uint32x2_t expect2 = vld1_u32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x -@@ -0,0 +1,17 @@ -+extern void abort (void); -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint64_t arr1[] = {0}; -+ uint64x1_t in1 = vld1_u64 (arr1); -+ uint64_t arr2[] = {1}; -+ uint64x1_t in2 = vld1_u64 (arr2); -+ uint64x1_t actual = vext_u64 (in1, in2, 0); -+ if (actual != in1) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrns8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqs16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qs32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64s8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int16x8x2_t -+test_vzipqs16 (int16x8_t _a, int16x8_t _b) -+{ -+ return vzipq_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second)); -+ int16x8_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ int16x8_t expected1 = vld1q_s16 (exp1); -+ int16x8_t expected2 = vld1q_s16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+float32x2x2_t -+test_vzipf32 (float32x2_t _a, float32x2_t _b) -+{ -+ return vzip_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2}; -+ float32_t second[] = {3, 4}; -+ float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second)); -+ float32x2_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 3}; -+ float32_t exp2[] = {2, 4}; -+ float32x2_t expected1 = vld1_f32 (exp1); -+ float32x2_t expected2 = vld1_f32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint8x8x2_t -+test_vzipu8 (uint8x8_t _a, uint8x8_t _b) -+{ -+ return vzip_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second)); -+ uint8x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ uint8x8_t expected1 = vld1_u8 (exp1); -+ uint8x8_t expected2 = vld1_u8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint16x8x2_t -+test_vzipqu16 (uint16x8_t _a, uint16x8_t _b) -+{ -+ return vzipq_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second)); -+ uint16x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ uint16x8_t expected1 = vld1q_u16 (exp1); -+ uint16x8_t expected2 = vld1q_u16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQs16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_s16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqp16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+poly8x8_t -+test_vext_p8_1 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 1); -+} -+ -+poly8x8_t -+test_vext_p8_2 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 2); -+} -+ -+poly8x8_t -+test_vext_p8_3 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 3); -+} -+ -+poly8x8_t -+test_vext_p8_4 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 4); -+} -+ -+poly8x8_t -+test_vext_p8_5 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 5); -+} -+ -+poly8x8_t -+test_vext_p8_6 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 6); -+} -+ -+poly8x8_t -+test_vext_p8_7 (poly8x8_t a, poly8x8_t b) -+{ -+ return vext_p8 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ poly8x8_t in1 = vld1_p8 (arr1); -+ poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ poly8x8_t in2 = vld1_p8 (arr2); -+ poly8_t exp[8]; -+ poly8x8_t expected; -+ poly8x8_t actual = test_vext_p8_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p8_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1_p8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqu32.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32s16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+poly8x8x2_t -+test_vuzpp8 (poly8x8_t _a, poly8x8_t _b) -+{ -+ return vuzp_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second)); -+ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ poly8x8_t expect1 = vld1_p8 (exp1); -+ poly8x8_t expect2 = vld1_p8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqp8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qs8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64s32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp -@@ -0,0 +1,45 @@ -+# Specific regression driver for AArch64 SIMD instructions. -+# Copyright (C) 2014 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } then { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CFLAGS -+if ![info exists DEFAULT_CFLAGS] then { -+ set DEFAULT_CFLAGS " -ansi -pedantic-errors" -+} -+ -+# Initialize `dg'. -+dg-init -+ -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" $DEFAULT_CFLAGS -+ -+# All done. -+dg-finish ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int16x4x2_t -+test_vtrns16 (int16x4_t _a, int16x4_t _b) -+{ -+ return vtrn_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4}; -+ int16_t second[] = {5, 6, 7, 8}; -+ int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second)); -+ int16x4_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 5, 3, 7}; -+ int16_t exp2[] = {2, 6, 4, 8}; -+ int16x4_t expected1 = vld1_s16 (exp1); -+ int16x4_t expected2 = vld1_s16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qu8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x16_t -+test_vrev64qp8 (poly8x16_t _arg) -+{ -+ return vrev64q_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x16_t reversed = test_vrev64qp8 (inorder); -+ poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint16x4x2_t -+test_vtrnu16 (uint16x4_t _a, uint16x4_t _b) -+{ -+ return vtrn_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4}; -+ uint16_t second[] = {5, 6, 7, 8}; -+ uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second)); -+ uint16x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 5, 3, 7}; -+ uint16_t exp2[] = {2, 6, 4, 8}; -+ uint16x4_t expected1 = vld1_u16 (exp1); -+ uint16x4_t expected2 = vld1_u16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+poly16x4_t -+test_vext_p16_1 (poly16x4_t a, poly16x4_t b) -+{ -+ return vext_p16 (a, b, 1); -+} -+ -+poly16x4_t -+test_vext_p16_2 (poly16x4_t a, poly16x4_t b) -+{ -+ return vext_p16 (a, b, 2); -+} -+ -+poly16x4_t -+test_vext_p16_3 (poly16x4_t a, poly16x4_t b) -+{ -+ return vext_p16 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ poly16_t arr1[] = {0, 1, 2, 3}; -+ poly16x4_t in1 = vld1_p16 (arr1); -+ poly16_t arr2[] = {4, 5, 6, 7}; -+ poly16x4_t in2 = vld1_p16 (arr2); -+ poly16_t exp[4]; -+ poly16x4_t expected; -+ poly16x4_t actual = test_vext_p16_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1_p16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p16_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1_p16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_p16_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1_p16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpp16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); -+ -+uint8x16x2_t -+test_vzipqu8 (uint8x16_t _a, uint8x16_t _b) -+{ -+ return vzipq_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second)); -+ uint8x16_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ uint8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ uint8x16_t expected1 = vld1q_u8 (exp1); -+ uint8x16_t expected2 = vld1q_u8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vextu64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_u64.x" -+ -+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely -+ return its first argument, so it is legitimate to optimize it out. */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpu32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qp16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+float32x4_t -+test_vextq_f32_1 (float32x4_t a, float32x4_t b) -+{ -+ return vextq_f32 (a, b, 1); -+} -+ -+float32x4_t -+test_vextq_f32_2 (float32x4_t a, float32x4_t b) -+{ -+ return vextq_f32 (a, b, 2); -+} -+ -+float32x4_t -+test_vextq_f32_3 (float32x4_t a, float32x4_t b) -+{ -+ return vextq_f32 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ float32_t arr1[] = {0, 1, 2, 3}; -+ float32x4_t in1 = vld1q_f32 (arr1); -+ float32_t arr2[] = {4, 5, 6, 7}; -+ float32x4_t in2 = vld1q_f32 (arr2); -+ float32_t exp[4]; -+ float32x4_t expected; -+ float32x4_t actual = test_vextq_f32_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1q_f32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_f32_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1q_f32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_f32_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1q_f32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqp16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnp8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x -@@ -0,0 +1,227 @@ -+extern void abort (void); -+ -+uint8x16_t -+test_vextq_u8_1 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 1); -+} -+ -+uint8x16_t -+test_vextq_u8_2 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 2); -+} -+ -+uint8x16_t -+test_vextq_u8_3 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 3); -+} -+ -+uint8x16_t -+test_vextq_u8_4 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 4); -+} -+ -+uint8x16_t -+test_vextq_u8_5 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 5); -+} -+ -+uint8x16_t -+test_vextq_u8_6 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 6); -+} -+ -+uint8x16_t -+test_vextq_u8_7 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 7); -+} -+ -+uint8x16_t -+test_vextq_u8_8 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 8); -+} -+ -+uint8x16_t -+test_vextq_u8_9 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 9); -+} -+ -+uint8x16_t -+test_vextq_u8_10 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 10); -+} -+ -+uint8x16_t -+test_vextq_u8_11 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 11); -+} -+ -+uint8x16_t -+test_vextq_u8_12 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 12); -+} -+ -+uint8x16_t -+test_vextq_u8_13 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 13); -+} -+ -+uint8x16_t -+test_vextq_u8_14 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 14); -+} -+ -+uint8x16_t -+test_vextq_u8_15 (uint8x16_t a, uint8x16_t b) -+{ -+ return vextq_u8 (a, b, 15); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -+ uint8x16_t in1 = vld1q_u8 (arr1); -+ uint8_t arr2[] = -+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; -+ uint8x16_t in2 = vld1q_u8 (arr2); -+ uint8_t exp[16]; -+ uint8x16_t expected; -+ uint8x16_t actual = test_vextq_u8_1 (in1, in2); -+ -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 1; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_2 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 2; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_3 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 3; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_4 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 4; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_5 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 5; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_6 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 6; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_7 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 7; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_8 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 8; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_9 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 9; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_10 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 10; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_11 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 11; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_12 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 12; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_13 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 13; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_14 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 14; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u8_15 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 15; -+ expected = vld1q_u8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqu32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64p8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32u8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16s8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+float32x4x2_t -+test_vuzpqf32 (float32x4_t _a, float32x4_t _b) -+{ -+ return vuzpq_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2, 3, 4}; -+ float32_t second[] = {5, 6, 7, 8}; -+ float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second)); -+ float32_t exp1[] = {1, 3, 5, 7}; -+ float32_t exp2[] = {2, 4, 6, 8}; -+ float32x4_t expect1 = vld1q_f32 (exp1); -+ float32x4_t expect2 = vld1q_f32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly8x8x2_t -+test_vzipp8 (poly8x8_t _a, poly8x8_t _b) -+{ -+ return vzip_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second)); -+ poly8x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ poly8x8_t expected1 = vld1_p8 (exp1); -+ poly8x8_t expected2 = vld1_p8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int32x4x2_t -+test_vtrnqs32 (int32x4_t _a, int32x4_t _b) -+{ -+ return vtrnq_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2, 3, 4}; -+ int32_t second[] = {5, 6, 7, 8}; -+ int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second)); -+ int32x4_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 5, 3, 7}; -+ int32_t exp2[] = {2, 6, 4, 8}; -+ int32x4_t expected1 = vld1q_s32 (exp1); -+ int32x4_t expected2 = vld1q_s32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c -@@ -0,0 +1,131 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline" } */ -+/* Stops the test_xxx methods being inlined into main, thus preventing constant -+ propagation. */ -+ -+#include "int_comparisons.x" -+ -+extern void abort (void); -+ -+#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort () -+ -+#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \ -+ BASETYPE##_t _a[2] = {2, 3}; \ -+ BASETYPE##x2_t a = vld1##SUFFIX (_a); \ -+ BASETYPE##_t _b[2] = {1, 3}; \ -+ BASETYPE##x2_t b = vld1##SUFFIX (_b); \ -+ RESTYPE res[2]; \ -+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1); \ -+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \ -+} -+ -+#define CHECK4(T, R0, R1, R2, R3) \ -+ if (res[0] != (T)R0 || res[1] != (T)R1 \ -+ || res[2] != (T)R2 || res[3] != (T)R3) abort () -+ -+#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \ -+ BASETYPE##_t _a[4] = {1, 2, 3, 4}; \ -+ BASETYPE##x4_t a = vld1##SUFFIX (_a); \ -+ BASETYPE##_t _b[4] = {4, 2, 1, 3}; \ -+ BASETYPE##x4_t b = vld1##SUFFIX (_b); \ -+ RESTYPE res[4]; \ -+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, -1, 0, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, -1, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, 0, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, 0, -1, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, 0, 0, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \ -+ CHECK4 (RESTYPE, 0, -1, -1, 0); \ -+} -+ -+#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7) \ -+ if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \ -+ || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \ -+ || res[7] != (T)R7) abort () -+ -+#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \ -+ BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8}; \ -+ BASETYPE##x8_t a = vld1##SUFFIX (_a); \ -+ BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9}; \ -+ BASETYPE##x8_t b = vld1##SUFFIX (_b); \ -+ RESTYPE res[8]; \ -+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \ -+ CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \ -+} -+ -+/* 16-way tests use same 8 values twice. */ -+#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7) \ -+ if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \ -+ || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \ -+ || res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1 \ -+ || res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4 \ -+ || res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort () -+ -+#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \ -+ BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \ -+ BASETYPE##x16_t a = vld1##SUFFIX (_a); \ -+ BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \ -+ BASETYPE##x16_t b = vld1##SUFFIX (_b); \ -+ RESTYPE res[16]; \ -+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \ -+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \ -+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \ -+ CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \ -+} -+ -+int -+main (int argc, char **argv) -+{ -+ TEST2 (int32, _s32, uint32_t, _u32); -+ TEST2 (uint32, _u32, uint32_t, _u32); -+ TEST2 (int64, q_s64, uint64_t, q_u64); -+ TEST2 (uint64, q_u64, uint64_t, q_u64); -+ -+ TEST4 (int16, _s16, uint16_t, _u16); -+ TEST4 (uint16, _u16, uint16_t, _u16); -+ TEST4 (int32, q_s32, uint32_t, q_u32); -+ TEST4 (uint32, q_u32, uint32_t, q_u32); -+ -+ TEST8 (int8, _s8, uint8_t, _u8); -+ TEST8 (uint8, _u8, uint8_t, _u8); -+ TEST8 (int16, q_s16, uint16_t, q_u16); -+ TEST8 (uint16, q_u16, uint16_t, q_u16); -+ -+ TEST16 (int8, q_s8, uint8_t, q_u8); -+ TEST16 (uint8, q_u8, uint8_t, q_u8); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint32x4x2_t -+test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b) -+{ -+ return vtrnq_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2, 3, 4}; -+ uint32_t second[] = {5, 6, 7, 8}; -+ uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second)); -+ uint32x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 5, 3, 7}; -+ uint32_t exp2[] = {2, 6, 4, 8}; -+ uint32x4_t expected1 = vld1q_u32 (exp1); -+ uint32x4_t expected2 = vld1q_u32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int32x4_t -+test_vrev64qs32 (int32x4_t _arg) -+{ -+ return vrev64q_s32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32x4_t inorder = {1, 2, 3, 4}; -+ int32x4_t reversed = test_vrev64qs32 (inorder); -+ int32x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint8x8x2_t -+test_vtrnu8 (uint8x8_t _a, uint8x8_t _b) -+{ -+ return vtrn_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second)); -+ uint8x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ uint8x8_t expected1 = vld1_u8 (exp1); -+ uint8x8_t expected2 = vld1_u8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint32x4_t -+test_vrev64qu32 (uint32x4_t _arg) -+{ -+ return vrev64q_u32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32x4_t inorder = {1, 2, 3, 4}; -+ uint32x4_t reversed = test_vrev64qu32 (inorder); -+ uint32x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQs64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_s64.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+int8x8_t -+test_vext_s8_1 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 1); -+} -+ -+int8x8_t -+test_vext_s8_2 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 2); -+} -+ -+int8x8_t -+test_vext_s8_3 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 3); -+} -+ -+int8x8_t -+test_vext_s8_4 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 4); -+} -+ -+int8x8_t -+test_vext_s8_5 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 5); -+} -+ -+int8x8_t -+test_vext_s8_6 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 6); -+} -+ -+int8x8_t -+test_vext_s8_7 (int8x8_t a, int8x8_t b) -+{ -+ return vext_s8 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ int8x8_t in1 = vld1_s8 (arr1); -+ int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ int8x8_t in2 = vld1_s8 (arr2); -+ int8_t exp[8]; -+ int8x8_t expected; -+ int8x8_t actual = test_vext_s8_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s8_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1_s8 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzips32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnp16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qp8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnu32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+int8x8x2_t -+test_vuzps8 (int8x8_t _a, int8x8_t _b) -+{ -+ return vuzp_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second)); -+ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ int8x8_t expect1 = vld1_s8 (exp1); -+ int8x8_t expect2 = vld1_s8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqu8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); -+ -+poly8x16x2_t -+test_vzipqp8 (poly8x16_t _a, poly8x16_t _b) -+{ -+ return vzipq_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second)); -+ poly8x16_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ poly8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ poly8x16_t expected1 = vld1q_p8 (exp1); -+ poly8x16_t expected2 = vld1q_p8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextp16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_p16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int16x4_t -+test_vrev32s16 (int16x4_t _arg) -+{ -+ return vrev32_s16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16x4_t inorder = {1, 2, 3, 4}; -+ int16x4_t reversed = test_vrev32s16 (inorder); -+ int16x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint16x4_t -+test_vrev32u16 (uint16x4_t _arg) -+{ -+ return vrev32_u16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16x4_t inorder = {1, 2, 3, 4}; -+ uint16x4_t reversed = test_vrev32u16 (inorder); -+ uint16x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly16x4_t -+test_vrev64p16 (poly16x4_t _arg) -+{ -+ return vrev64_p16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16x4_t inorder = {1, 2, 3, 4}; -+ poly16x4_t reversed = test_vrev64p16 (inorder); -+ poly16x4_t expected = {4, 3, 2, 1}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qf32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+float32x4x2_t -+test_vzipqf32 (float32x4_t _a, float32x4_t _b) -+{ -+ return vzipq_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2, 3, 4}; -+ float32_t second[] = {5, 6, 7, 8}; -+ float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second)); -+ float32x4_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 5, 2, 6}; -+ float32_t exp2[] = {3, 7, 4, 8}; -+ float32x4_t expected1 = vld1q_f32 (exp1); -+ float32x4_t expected2 = vld1q_f32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextu32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_u32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x -@@ -0,0 +1,227 @@ -+extern void abort (void); -+ -+poly8x16_t -+test_vextq_p8_1 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 1); -+} -+ -+poly8x16_t -+test_vextq_p8_2 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 2); -+} -+ -+poly8x16_t -+test_vextq_p8_3 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 3); -+} -+ -+poly8x16_t -+test_vextq_p8_4 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 4); -+} -+ -+poly8x16_t -+test_vextq_p8_5 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 5); -+} -+ -+poly8x16_t -+test_vextq_p8_6 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 6); -+} -+ -+poly8x16_t -+test_vextq_p8_7 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 7); -+} -+ -+poly8x16_t -+test_vextq_p8_8 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 8); -+} -+ -+poly8x16_t -+test_vextq_p8_9 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 9); -+} -+ -+poly8x16_t -+test_vextq_p8_10 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 10); -+} -+ -+poly8x16_t -+test_vextq_p8_11 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 11); -+} -+ -+poly8x16_t -+test_vextq_p8_12 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 12); -+} -+ -+poly8x16_t -+test_vextq_p8_13 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 13); -+} -+ -+poly8x16_t -+test_vextq_p8_14 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 14); -+} -+ -+poly8x16_t -+test_vextq_p8_15 (poly8x16_t a, poly8x16_t b) -+{ -+ return vextq_p8 (a, b, 15); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -+ poly8x16_t in1 = vld1q_p8 (arr1); -+ poly8_t arr2[] = -+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; -+ poly8x16_t in2 = vld1q_p8 (arr2); -+ poly8_t exp[16]; -+ poly8x16_t expected; -+ poly8x16_t actual = test_vextq_p8_1 (in1, in2); -+ -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 1; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_2 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 2; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_3 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 3; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_4 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 4; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_5 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 5; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_6 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 6; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_7 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 7; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_8 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 8; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_9 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 9; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_10 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 10; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_11 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 11; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_12 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 12; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_13 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 13; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_14 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 14; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p8_15 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 15; -+ expected = vld1q_p8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x16_t -+test_vrev64qs8 (int8x16_t _arg) -+{ -+ return vrev64q_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x16_t reversed = test_vrev64qs8 (inorder); -+ int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16p8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqs32.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+int16x4x2_t -+test_vuzps16 (int16x4_t _a, int16x4_t _b) -+{ -+ return vuzp_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4}; -+ int16_t second[] = {5, 6, 7, 8}; -+ int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second)); -+ int16_t exp1[] = {1, 3, 5, 7}; -+ int16_t exp2[] = {2, 4, 6, 8}; -+ int16x4_t expect1 = vld1_s16 (exp1); -+ int16x4_t expect2 = vld1_s16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+uint16x4x2_t -+test_vuzpu16 (uint16x4_t _a, uint16x4_t _b) -+{ -+ return vuzp_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4}; -+ uint16_t second[] = {5, 6, 7, 8}; -+ uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second)); -+ uint16_t exp1[] = {1, 3, 5, 7}; -+ uint16_t exp2[] = {2, 4, 6, 8}; -+ uint16x4_t expect1 = vld1_u16 (exp1); -+ uint16x4_t expect2 = vld1_u16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnu8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly8x8x2_t -+test_vtrnp8 (poly8x8_t _a, poly8x8_t _b) -+{ -+ return vtrn_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second)); -+ poly8x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ poly8x8_t expected1 = vld1_p8 (exp1); -+ poly8x8_t expected2 = vld1_p8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int16x8_t -+test_vrev32qs16 (int16x8_t _arg) -+{ -+ return vrev32q_s16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16x8_t reversed = test_vrev32qs16 (inorder); -+ int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64f32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int8x8x2_t -+test_vzips8 (int8x8_t _a, int8x8_t _b) -+{ -+ return vzip_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second)); -+ int8x8_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ int8x8_t expected1 = vld1_s8 (exp1); -+ int8x8_t expected2 = vld1_s8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQs32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_s32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint16x8_t -+test_vrev32qu16 (uint16x8_t _arg) -+{ -+ return vrev32q_u16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16x8_t reversed = test_vrev32qu16 (inorder); -+ uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qu16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64u8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+float32x2x2_t -+test_vtrnf32 (float32x2_t _a, float32x2_t _b) -+{ -+ return vtrn_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2}; -+ float32_t second[] = {3, 4}; -+ float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second)); -+ float32x2_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 3}; -+ float32_t exp2[] = {2, 4}; -+ float32x2_t expected1 = vld1_f32 (exp1); -+ float32x2_t expected2 = vld1_f32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vexts8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_s8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x8_t -+test_vrev16u8 (uint8x8_t _arg) -+{ -+ return vrev16_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8x8_t reversed = test_vrev16u8 (inorder); -+ uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqs16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x -@@ -0,0 +1,30 @@ -+extern void abort (void); -+ -+int64x2_t -+test_vextq_s64_1 (int64x2_t a, int64x2_t b) -+{ -+ return vextq_s64 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int64_t arr1[] = {0, 1}; -+ int64x2_t in1 = vld1q_s64 (arr1); -+ int64_t arr2[] = {2, 3}; -+ int64x2_t in2 = vld1q_s64 (arr2); -+ int64_t exp[2]; -+ int64x2_t expected; -+ int64x2_t actual = test_vextq_s64_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ exp[i] = i + 1; -+ expected = vld1q_s64 (exp); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly16x4x2_t -+test_vzipp16 (poly16x4_t _a, poly16x4_t _b) -+{ -+ return vzip_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4}; -+ poly16_t second[] = {5, 6, 7, 8}; -+ poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second)); -+ poly16x4_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 5, 2, 6}; -+ poly16_t exp2[] = {3, 7, 4, 8}; -+ poly16x4_t expected1 = vld1_p16 (exp1); -+ poly16x4_t expected2 = vld1_p16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x -@@ -0,0 +1,30 @@ -+extern void abort (void); -+ -+uint64x2_t -+test_vextq_u64_1 (uint64x2_t a, uint64x2_t b) -+{ -+ return vextq_u64 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint64_t arr1[] = {0, 1}; -+ uint64x2_t in1 = vld1q_u64 (arr1); -+ uint64_t arr2[] = {2, 3}; -+ uint64x2_t in2 = vld1q_u64 (arr2); -+ uint64_t exp[2]; -+ uint64x2_t expected; -+ uint64x2_t actual = test_vextq_u64_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ exp[i] = i + 1; -+ expected = vld1q_u64 (exp); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qu8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64u16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); -+ -+int8x16x2_t -+test_vzipqs8 (int8x16_t _a, int8x16_t _b) -+{ -+ return vzipq_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second)); -+ int8x16_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ int8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ int8x16_t expected1 = vld1q_s8 (exp1); -+ int8x16_t expected2 = vld1q_s8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x -@@ -0,0 +1,28 @@ -+extern void abort (void); -+ -+uint8x16x2_t -+test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b) -+{ -+ return vtrnq_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second)); -+ uint8x16_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; -+ uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; -+ uint8x16_t expected1 = vld1q_u8 (exp1); -+ uint8x16_t expected2 = vld1q_u8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x -@@ -0,0 +1,30 @@ -+extern void abort (void); -+ -+int32x2_t -+test_vext_s32_1 (int32x2_t a, int32x2_t b) -+{ -+ return vext_s32 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int32_t arr1[] = {0, 1}; -+ int32x2_t in1 = vld1_s32 (arr1); -+ int32_t arr2[] = {2, 3}; -+ int32x2_t in2 = vld1_s32 (arr2); -+ int32_t exp[2]; -+ int32x2_t expected; -+ int32x2_t actual = test_vext_s32_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ exp[i] = i + 1; -+ expected = vld1_s32 (exp); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzps16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x -@@ -0,0 +1,30 @@ -+extern void abort (void); -+ -+uint32x2_t -+test_vext_u32_1 (uint32x2_t a, uint32x2_t b) -+{ -+ return vext_u32 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint32_t arr1[] = {0, 1}; -+ uint32x2_t in1 = vld1_u32 (arr1); -+ uint32_t arr2[] = {2, 3}; -+ uint32x2_t in2 = vld1_u32 (arr2); -+ uint32_t exp[2]; -+ uint32x2_t expected; -+ uint32x2_t actual = test_vext_u32_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ exp[i] = i + 1; -+ expected = vld1_u32 (exp); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqs8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x -@@ -0,0 +1,227 @@ -+extern void abort (void); -+ -+int8x16_t -+test_vextq_s8_1 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 1); -+} -+ -+int8x16_t -+test_vextq_s8_2 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 2); -+} -+ -+int8x16_t -+test_vextq_s8_3 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 3); -+} -+ -+int8x16_t -+test_vextq_s8_4 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 4); -+} -+ -+int8x16_t -+test_vextq_s8_5 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 5); -+} -+ -+int8x16_t -+test_vextq_s8_6 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 6); -+} -+ -+int8x16_t -+test_vextq_s8_7 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 7); -+} -+ -+int8x16_t -+test_vextq_s8_8 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 8); -+} -+ -+int8x16_t -+test_vextq_s8_9 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 9); -+} -+ -+int8x16_t -+test_vextq_s8_10 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 10); -+} -+ -+int8x16_t -+test_vextq_s8_11 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 11); -+} -+ -+int8x16_t -+test_vextq_s8_12 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 12); -+} -+ -+int8x16_t -+test_vextq_s8_13 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 13); -+} -+ -+int8x16_t -+test_vextq_s8_14 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 14); -+} -+ -+int8x16_t -+test_vextq_s8_15 (int8x16_t a, int8x16_t b) -+{ -+ return vextq_s8 (a, b, 15); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -+ int8x16_t in1 = vld1q_s8 (arr1); -+ int8_t arr2[] = -+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; -+ int8x16_t in2 = vld1q_s8 (arr2); -+ int8_t exp[16]; -+ int8x16_t expected; -+ int8x16_t actual = test_vextq_s8_1 (in1, in2); -+ -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 1; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_2 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 2; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_3 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 3; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_4 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 4; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_5 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 5; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_6 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 6; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_7 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 7; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_8 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 8; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_9 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 9; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_10 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 10; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_11 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 11; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_12 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 12; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_13 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 13; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_14 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 14; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s8_15 (in1, in2); -+ for (i = 0; i < 16; i++) -+ exp[i] = i + 15; -+ expected = vld1q_s8 (exp); -+ for (i = 0; i < 16; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c -@@ -0,0 +1,36 @@ -+/* Test the `vextq_f64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+extern void abort (void); -+#include <stdio.h> -+ -+float64x2_t -+test_vextq_f64_1 (float64x2_t a, float64x2_t b) -+{ -+ return vextq_f64 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ float64_t arr1[] = {0, 1}; -+ float64x2_t in1 = vld1q_f64 (arr1); -+ float64_t arr2[] = {2, 3}; -+ float64x2_t in2 = vld1q_f64 (arr2); -+ float64_t exp[] = {1, 2}; -+ float64x2_t expected = vld1q_f64 (exp); -+ float64x2_t actual = test_vextq_f64_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c -@@ -0,0 +1,27 @@ -+/* Test the vpaddd_f64 AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3" } */ -+ -+#include "arm_neon.h" -+ -+#define SIZE 6 -+ -+extern void abort (void); -+ -+float64_t in[SIZE] = { -4.0, 4.0, -2.0, 2.0, -1.0, 1.0 }; -+ -+int -+main (void) -+{ -+ int i; -+ -+ for (i = 0; i < SIZE / 2; ++i) -+ if (vpaddd_f64 (vld1q_f64 (in + 2 * i)) != 0.0) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "faddp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qs16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqs16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipf32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x8_t -+test_vrev16p8 (poly8x8_t _arg) -+{ -+ return vrev16_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8x8_t reversed = test_vrev16p8 (inorder); -+ poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16u8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextp8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_p8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int8x8x2_t -+test_vtrns8 (int8x8_t _a, int8x8_t _b) -+{ -+ return vtrn_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second)); -+ int8x8_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ int8x8_t expected1 = vld1_s8 (exp1); -+ int8x8_t expected2 = vld1_s8 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int16x8x2_t -+test_vtrnqs16 (int16x8_t _a, int16x8_t _b) -+{ -+ return vtrnq_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second)); -+ int16x8_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ int16x8_t expected1 = vld1q_s16 (exp1); -+ int16x8_t expected2 = vld1q_s16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint16x8x2_t -+test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b) -+{ -+ return vtrnq_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second)); -+ uint16x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ uint16x8_t expected1 = vld1q_u16 (exp1); -+ uint16x8_t expected2 = vld1q_u16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x -@@ -0,0 +1,114 @@ -+extern void abort (void); -+ -+poly16x8_t -+test_vextq_p16_1 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 1); -+} -+ -+poly16x8_t -+test_vextq_p16_2 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 2); -+} -+ -+poly16x8_t -+test_vextq_p16_3 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 3); -+} -+ -+poly16x8_t -+test_vextq_p16_4 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 4); -+} -+ -+poly16x8_t -+test_vextq_p16_5 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 5); -+} -+ -+poly16x8_t -+test_vextq_p16_6 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 6); -+} -+ -+poly16x8_t -+test_vextq_p16_7 (poly16x8_t a, poly16x8_t b) -+{ -+ return vextq_p16 (a, b, 7); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; -+ poly16x8_t in1 = vld1q_p16 (arr1); -+ poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; -+ poly16x8_t in2 = vld1q_p16 (arr2); -+ poly16_t exp[8]; -+ poly16x8_t expected; -+ poly16x8_t actual = test_vextq_p16_1 (in1, in2); -+ -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 1; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_2 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 2; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_3 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 3; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_4 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 4; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_5 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 5; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_6 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 6; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_p16_7 (in1, in2); -+ for (i = 0; i < 8; i++) -+ exp[i] = i + 7; -+ expected = vld1q_p16 (exp); -+ for (i = 0; i < 8; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int16x8_t -+test_vrev64qs16 (int16x8_t _arg) -+{ -+ return vrev64q_s16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16x8_t reversed = test_vrev64qs16 (inorder); -+ int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint16x8_t -+test_vrev64qu16 (uint16x8_t _arg) -+{ -+ return vrev64q_u16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16x8_t reversed = test_vrev64qu16 (inorder); -+ uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x8_t -+test_vrev64u8 (uint8x8_t _arg) -+{ -+ return vrev64_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8x8_t reversed = test_vrev64u8 (inorder); -+ uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+poly16x8x2_t -+test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b) -+{ -+ return vuzpq_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second)); -+ poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; -+ poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; -+ poly16x8_t expect1 = vld1q_p16 (exp1); -+ poly16x8_t expect2 = vld1q_p16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrns16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+float32x2x2_t -+test_vuzpf32 (float32x2_t _a, float32x2_t _b) -+{ -+ return vuzp_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2}; -+ float32_t second[] = {3, 4}; -+ float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second)); -+ float32_t exp1[] = {1, 3}; -+ float32_t exp2[] = {2, 4}; -+ float32x2_t expect1 = vld1_f32 (exp1); -+ float32x2_t expect2 = vld1_f32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipu16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqf32.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqs8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x -@@ -0,0 +1,28 @@ -+extern void abort (void); -+ -+poly8x16x2_t -+test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b) -+{ -+ return vtrnq_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second)); -+ poly8x16_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; -+ poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; -+ poly8x16_t expected1 = vld1q_p8 (exp1); -+ poly8x16_t expected2 = vld1q_p8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int32x2_t -+test_vrev64s32 (int32x2_t _arg) -+{ -+ return vrev64_s32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32x2_t inorder = {1, 2}; -+ int32x2_t reversed = test_vrev64s32 (inorder); -+ int32x2_t expected = {2, 1}; -+ -+ for (i = 0; i < 2; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vexts16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_s16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x16_t -+test_vrev32qu8 (uint8x16_t _arg) -+{ -+ return vrev32q_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x16_t reversed = test_vrev32qu8 (inorder); -+ uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint32x2_t -+test_vrev64u32 (uint32x2_t _arg) -+{ -+ return vrev64_u32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32x2_t inorder = {1, 2}; -+ uint32x2_t reversed = test_vrev64u32 (inorder); -+ uint32x2_t expected = {2, 1}; -+ -+ for (i = 0; i < 2; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQf32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_f32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x16_t -+test_vrev16qu8 (uint8x16_t _arg) -+{ -+ return vrev16q_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x16_t reversed = test_vrev16qu8 (inorder); -+ uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqp8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qp16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly16x8x2_t -+test_vzipqp16 (poly16x8_t _a, poly16x8_t _b) -+{ -+ return vzipq_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second)); -+ poly16x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ poly16x8_t expected1 = vld1q_p16 (exp1); -+ poly16x8_t expected2 = vld1q_p16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqu16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qu32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint8x16x2_t -+test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b) -+{ -+ return vuzpq_u8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second)); -+ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; -+ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; -+ uint8x16_t expect1 = vld1q_u8 (exp1); -+ uint8x16_t expect2 = vld1q_u8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x8_t -+test_vrev64p8 (poly8x8_t _arg) -+{ -+ return vrev64_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8x8_t reversed = test_vrev64p8 (inorder); -+ poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint8x8_t -+test_vrev32u8 (uint8x8_t _arg) -+{ -+ return vrev32_u8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8x8_t reversed = test_vrev32u8 (inorder); -+ uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x8_t -+test_vrev16s8 (int8x8_t _arg) -+{ -+ return vrev16_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8x8_t reversed = test_vrev16s8 (inorder); -+ int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextu8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_u8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQu16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_u16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x -@@ -0,0 +1,68 @@ -+/* test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics -+ in arm_neon.h (excluding the 64x1 variants as these generally produce scalar -+ not vector ops). */ -+#include "arm_neon.h" -+ -+#define DONT_FORCE(X) -+ -+#define FORCE_SIMD(V1) asm volatile ("mov %d0, %1.d[0]" \ -+ : "=w"(V1) \ -+ : "w"(V1) \ -+ : /* No clobbers */); -+ -+#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \ -+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a) \ -+{ \ -+ uint##SIZE##_t res; \ -+ FORCE (a); \ -+ res = v##OP##SUFFIX (a); \ -+ FORCE (res); \ -+ return res; \ -+} -+ -+#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \ -+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \ -+{ \ -+ uint##SIZE##_t res; \ -+ FORCE (a); \ -+ FORCE (b); \ -+ res = v##OP##SUFFIX (a, b); \ -+ FORCE (res); \ -+ return res; \ -+} -+ -+#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \ -+OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \ -+OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE) -+ -+#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \ -+OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \ -+OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \ -+OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \ -+OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \ -+UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE) -+ -+ALL_OPS (8x8, int, _s8, DONT_FORCE) -+ALL_OPS (16x4, int, _s16, DONT_FORCE) -+ALL_OPS (32x2, int, _s32, DONT_FORCE) -+ALL_OPS (64x1, int, _s64, DONT_FORCE) -+ALL_OPS (64, int, d_s64, FORCE_SIMD) -+ALL_OPS (8x16, int, q_s8, DONT_FORCE) -+ALL_OPS (16x8, int, q_s16, DONT_FORCE) -+ALL_OPS (32x4, int, q_s32, DONT_FORCE) -+ALL_OPS (64x2, int, q_s64, DONT_FORCE) -+UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE) -+UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE) -+UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE) -+UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE) -+UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD) -+UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE) -+UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE) -+UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE) -+UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE) -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqs32.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzps8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqp8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64p16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32u16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x16_t -+test_vrev32qp8 (poly8x16_t _arg) -+{ -+ return vrev32q_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x16_t reversed = test_vrev32qp8 (inorder); -+ poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16q_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16qs8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly16x4x2_t -+test_vtrnp16 (poly16x4_t _a, poly16x4_t _b) -+{ -+ return vtrn_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4}; -+ poly16_t second[] = {5, 6, 7, 8}; -+ poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second)); -+ poly16x4_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 5, 3, 7}; -+ poly16_t exp2[] = {2, 6, 4, 8}; -+ poly16x4_t expected1 = vld1_p16 (exp1); -+ poly16x4_t expected2 = vld1_p16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int32x2x2_t -+test_vzips32 (int32x2_t _a, int32x2_t _b) -+{ -+ return vzip_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2}; -+ int32_t second[] = {3, 4}; -+ int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second)); -+ int32x2_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 3}; -+ int32_t exp2[] = {2, 4}; -+ int32x2_t expected1 = vld1_s32 (exp1); -+ int32x2_t expected2 = vld1_s32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64u32.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x16_t -+test_vrev16qp8 (poly8x16_t _arg) -+{ -+ return vrev16q_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x16_t reversed = test_vrev16qp8 (inorder); -+ poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint32x2x2_t -+test_vzipu32 (uint32x2_t _a, uint32x2_t _b) -+{ -+ return vzip_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2}; -+ uint32_t second[] = {3, 4}; -+ uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second)); -+ uint32x2_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 3}; -+ uint32_t exp2[] = {2, 4}; -+ uint32x2_t expected1 = vld1_u32 (exp1); -+ uint32x2_t expected2 = vld1_u32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+float32x4x2_t -+test_vtrnqf32 (float32x4_t _a, float32x4_t _b) -+{ -+ return vtrnq_f32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2, 3, 4}; -+ float32_t second[] = {5, 6, 7, 8}; -+ float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second)); -+ float32x4_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 5, 3, 7}; -+ float32_t exp2[] = {2, 6, 4, 8}; -+ float32x4_t expected1 = vld1q_f32 (exp1); -+ float32x4_t expected2 = vld1q_f32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x -@@ -0,0 +1,28 @@ -+extern void abort (void); -+ -+int8x16x2_t -+test_vtrnqs8 (int8x16_t _a, int8x16_t _b) -+{ -+ return vtrnq_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second)); -+ int8x16_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; -+ int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; -+ int8x16_t expected1 = vld1q_s8 (exp1); -+ int8x16_t expected2 = vld1q_s8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vexts64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_s64.x" -+ -+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely -+ return its first argument, so it is legitimate to optimize it out. */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzps32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+float32x4_t -+test_vrev64qf32 (float32x4_t _arg) -+{ -+ return vrev64q_f32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32x4_t inorder = {1, 2, 3, 4}; -+ float32x4_t reversed = test_vrev64qf32 (inorder); -+ float32x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+int16x4_t -+test_vext_s16_1 (int16x4_t a, int16x4_t b) -+{ -+ return vext_s16 (a, b, 1); -+} -+ -+int16x4_t -+test_vext_s16_2 (int16x4_t a, int16x4_t b) -+{ -+ return vext_s16 (a, b, 2); -+} -+ -+int16x4_t -+test_vext_s16_3 (int16x4_t a, int16x4_t b) -+{ -+ return vext_s16 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int16_t arr1[] = {0, 1, 2, 3}; -+ int16x4_t in1 = vld1_s16 (arr1); -+ int16_t arr2[] = {4, 5, 6, 7}; -+ int16x4_t in2 = vld1_s16 (arr2); -+ int16_t exp[4]; -+ int16x4_t expected; -+ int16x4_t actual = test_vext_s16_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1_s16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s16_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1_s16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_s16_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1_s16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+uint16x4_t -+test_vext_u16_1 (uint16x4_t a, uint16x4_t b) -+{ -+ return vext_u16 (a, b, 1); -+} -+ -+uint16x4_t -+test_vext_u16_2 (uint16x4_t a, uint16x4_t b) -+{ -+ return vext_u16 (a, b, 2); -+} -+ -+uint16x4_t -+test_vext_u16_3 (uint16x4_t a, uint16x4_t b) -+{ -+ return vext_u16 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint16_t arr1[] = {0, 1, 2, 3}; -+ uint16x4_t in1 = vld1_u16 (arr1); -+ uint16_t arr2[] = {4, 5, 6, 7}; -+ uint16x4_t in2 = vld1_u16 (arr2); -+ uint16_t exp[4]; -+ uint16x4_t expected; -+ uint16x4_t actual = test_vext_u16_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1_u16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u16_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1_u16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vext_u16_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1_u16 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqs32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly8x16x2_t -+test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b) -+{ -+ return vuzpq_p8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second)); -+ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; -+ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; -+ poly8x16_t expect1 = vld1q_p8 (exp1); -+ poly8x16_t expect2 = vld1q_p8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqu8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzips8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly8x8_t -+test_vrev32p8 (poly8x8_t _arg) -+{ -+ return vrev32_p8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8x8_t reversed = test_vrev32p8 (inorder); -+ poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x8_t -+test_vrev64s8 (int8x8_t _arg) -+{ -+ return vrev64_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8x8_t reversed = test_vrev64s8 (inorder); -+ int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpp8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+int32x4_t -+test_vextq_s32_1 (int32x4_t a, int32x4_t b) -+{ -+ return vextq_s32 (a, b, 1); -+} -+ -+int32x4_t -+test_vextq_s32_2 (int32x4_t a, int32x4_t b) -+{ -+ return vextq_s32 (a, b, 2); -+} -+ -+int32x4_t -+test_vextq_s32_3 (int32x4_t a, int32x4_t b) -+{ -+ return vextq_s32 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ int32_t arr1[] = {0, 1, 2, 3}; -+ int32x4_t in1 = vld1q_s32 (arr1); -+ int32_t arr2[] = {4, 5, 6, 7}; -+ int32x4_t in2 = vld1q_s32 (arr2); -+ int32_t exp[4]; -+ int32x4_t expected; -+ int32x4_t actual = test_vextq_s32_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1q_s32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s32_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1q_s32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_s32_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1q_s32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x -@@ -0,0 +1,58 @@ -+extern void abort (void); -+ -+uint32x4_t -+test_vextq_u32_1 (uint32x4_t a, uint32x4_t b) -+{ -+ return vextq_u32 (a, b, 1); -+} -+ -+uint32x4_t -+test_vextq_u32_2 (uint32x4_t a, uint32x4_t b) -+{ -+ return vextq_u32 (a, b, 2); -+} -+ -+uint32x4_t -+test_vextq_u32_3 (uint32x4_t a, uint32x4_t b) -+{ -+ return vextq_u32 (a, b, 3); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ uint32_t arr1[] = {0, 1, 2, 3}; -+ uint32x4_t in1 = vld1q_u32 (arr1); -+ uint32_t arr2[] = {4, 5, 6, 7}; -+ uint32x4_t in2 = vld1q_u32 (arr2); -+ uint32_t exp[4]; -+ uint32x4_t expected; -+ uint32x4_t actual = test_vextq_u32_1 (in1, in2); -+ -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 1; -+ expected = vld1q_u32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u32_2 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 2; -+ expected = vld1q_u32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ actual = test_vextq_u32_3 (in1, in2); -+ for (i = 0; i < 4; i++) -+ exp[i] = i + 3; -+ expected = vld1q_u32 (exp); -+ for (i = 0; i < 4; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQu64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_u64.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipp16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_s32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrns32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16q_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16qp8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+int32x4x2_t -+test_vuzpqs32 (int32x4_t _a, int32x4_t _b) -+{ -+ return vuzpq_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2, 3, 4}; -+ int32_t second[] = {5, 6, 7, 8}; -+ int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second)); -+ int32_t exp1[] = {1, 3, 5, 7}; -+ int32_t exp2[] = {2, 4, 6, 8}; -+ int32x4_t expect1 = vld1q_s32 (exp1); -+ int32x4_t expect2 = vld1q_s32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipu32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly16x4_t -+test_vrev32p16 (poly16x4_t _arg) -+{ -+ return vrev32_p16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16x4_t inorder = {1, 2, 3, 4}; -+ poly16x4_t reversed = test_vrev32p16 (inorder); -+ poly16x4_t expected = {2, 1, 4, 3}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+uint32x4x2_t -+test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b) -+{ -+ return vuzpq_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2, 3, 4}; -+ uint32_t second[] = {5, 6, 7, 8}; -+ uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second)); -+ uint32_t exp1[] = {1, 3, 5, 7}; -+ uint32_t exp2[] = {2, 4, 6, 8}; -+ uint32x4_t expect1 = vld1q_u32 (exp1); -+ uint32x4_t expect2 = vld1q_u32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c -@@ -0,0 +1,56 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+uint64_t in1 = 0x0123456789abcdefULL; -+uint64_t expected1 = 0x80c4a2e691d5b3f7ULL; -+ -+#define TEST8(BASETYPE, SUFFIX) \ -+void test8_##SUFFIX () \ -+{ \ -+ BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \ -+ uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \ -+ if (res != expected1) abort (); \ -+} -+ -+uint64_t in2 = 0xdeadbeefcafebabeULL; -+uint64_t expected2 = 0x7bb57df7537f5d7dULL; -+ -+#define TEST16(BASETYPE, SUFFIX) \ -+void test16_##SUFFIX () \ -+{ \ -+ BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \ -+ vcreate_##SUFFIX (in2)); \ -+ uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \ -+ uint64_t res1 = vgetq_lane_u64 (res, 0); \ -+ uint64_t res2 = vgetq_lane_u64 (res, 1); \ -+ if (res1 != expected1 || res2 != expected2) abort (); \ -+} -+ -+TEST8 (poly, p8); -+TEST8 (int, s8); -+TEST8 (uint, u8); -+ -+TEST16 (poly, p8); -+TEST16 (int, s8); -+TEST16 (uint, u8); -+ -+int -+main (int argc, char **argv) -+{ -+ test8_p8 (); -+ test8_s8 (); -+ test8_u8 (); -+ test16_p8 (); -+ test16_s8 (); -+ test16_u8 (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */ -+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vexts32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_s32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqu8.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x16_t -+test_vrev32qs8 (int8x16_t _arg) -+{ -+ return vrev32q_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x16_t reversed = test_vrev32qs8 (inorder); -+ int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x16_t -+test_vrev16qs8 (int8x16_t _arg) -+{ -+ return vrev16q_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x16_t reversed = test_vrev16qs8 (inorder); -+ int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; -+ -+ for (i = 0; i < 16; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int16x4_t -+test_vrev64s16 (int16x4_t _arg) -+{ -+ return vrev64_s16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16x4_t inorder = {1, 2, 3, 4}; -+ int16x4_t reversed = test_vrev64s16 (inorder); -+ int16x4_t expected = {4, 3, 2, 1}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQs8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_s8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+uint16x4_t -+test_vrev64u16 (uint16x4_t _arg) -+{ -+ return vrev64_u16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16x4_t inorder = {1, 2, 3, 4}; -+ uint16x4_t reversed = test_vrev64u16 (inorder); -+ uint16x4_t expected = {4, 3, 2, 1}; -+ -+ for (i = 0; i < 4; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x -@@ -0,0 +1,26 @@ -+extern void abort (void); -+ -+poly16x4x2_t -+test_vuzpp16 (poly16x4_t _a, poly16x4_t _b) -+{ -+ return vuzp_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4}; -+ poly16_t second[] = {5, 6, 7, 8}; -+ poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second)); -+ poly16_t exp1[] = {1, 3, 5, 7}; -+ poly16_t exp2[] = {2, 4, 6, 8}; -+ poly16x4_t expect1 = vld1_p16 (exp1); -+ poly16x4_t expect2 = vld1_p16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqf32.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_p8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipp8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqp16.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly16x8_t -+test_vrev32qp16 (poly16x8_t _arg) -+{ -+ return vrev32q_p16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16x8_t reversed = test_vrev32qp16 (inorder); -+ poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrnq_u32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnqu32.x" -+ -+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int8x16x2_t -+test_vuzpqs8 (int8x16_t _a, int8x16_t _b) -+{ -+ return vuzpq_s8 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second)); -+ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; -+ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; -+ int8x16_t expect1 = vld1q_s8 (exp1); -+ int8x16_t expect2 = vld1q_s8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int32x4x2_t -+test_vzipqs32 (int32x4_t _a, int32x4_t _b) -+{ -+ return vzipq_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2, 3, 4}; -+ int32_t second[] = {5, 6, 7, 8}; -+ int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second)); -+ int32x4_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 5, 2, 6}; -+ int32_t exp2[] = {3, 7, 4, 8}; -+ int32x4_t expected1 = vld1q_s32 (exp1); -+ int32x4_t expected2 = vld1q_s32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qs16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+int8x8_t -+test_vrev32s8 (int8x8_t _arg) -+{ -+ return vrev32_s8 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8x8_t reversed = test_vrev32s8 (inorder); -+ int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQp16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_p16.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint32x4x2_t -+test_vzipqu32 (uint32x4_t _a, uint32x4_t _b) -+{ -+ return vzipq_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2, 3, 4}; -+ uint32_t second[] = {5, 6, 7, 8}; -+ uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second)); -+ uint32x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 5, 2, 6}; -+ uint32_t exp2[] = {3, 7, 4, 8}; -+ uint32x4_t expected1 = vld1q_u32 (exp1); -+ uint32x4_t expected2 = vld1q_u32 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQu32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_u32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_p16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32p16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x -@@ -0,0 +1,30 @@ -+extern void abort (void); -+ -+float32x2_t -+test_vext_f32_1 (float32x2_t a, float32x2_t b) -+{ -+ return vext_f32 (a, b, 1); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ float32_t arr1[] = {0, 1}; -+ float32x2_t in1 = vld1_f32 (arr1); -+ float32_t arr2[] = {2, 3}; -+ float32x2_t in2 = vld1_f32 (arr2); -+ float32_t exp[2]; -+ float32x2_t expected; -+ float32x2_t actual = test_vext_f32_1 (in1, in2); -+ -+ for (i = 0; i < 2; i++) -+ exp[i] = i + 1; -+ expected = vld1_f32 (exp); -+ for (i = 0; i < 2; i++) -+ if (actual[i] != expected[i]) -+ abort (); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c -@@ -0,0 +1,25 @@ -+/* Test the `vextf64' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (void); -+ -+int -+main (int argc, char **argv) -+{ -+ int i, off; -+ float64x1_t in1 = {0}; -+ float64x1_t in2 = {1}; -+ float64x1_t actual = vext_f64 (in1, in2, 0); -+ if (actual != in1) -+ abort (); -+ -+ return 0; -+} -+ -+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely -+ return its first argument, so it is legitimate to optimize it out. */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpf32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzpq_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpqu16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpu8.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqf32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64_s16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64s16.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int32x2x2_t -+test_vtrns32 (int32x2_t _a, int32x2_t _b) -+{ -+ return vtrn_s32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2}; -+ int32_t second[] = {3, 4}; -+ int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second)); -+ int32x2_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 3}; -+ int32_t exp2[] = {2, 4}; -+ int32x2_t expected1 = vld1_s32 (exp1); -+ int32x2_t expected2 = vld1_s32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev16q_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev16qu8.x" -+ -+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int16x4x2_t -+test_vzips16 (int16x4_t _a, int16x4_t _b) -+{ -+ return vzip_s16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4}; -+ int16_t second[] = {5, 6, 7, 8}; -+ int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second)); -+ int16x4_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 5, 2, 6}; -+ int16_t exp2[] = {3, 7, 4, 8}; -+ int16x4_t expected1 = vld1_s16 (exp1); -+ int16x4_t expected2 = vld1_s16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev64q_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev64qs8.x" -+ -+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextQp8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "extq_p8.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint32x2x2_t -+test_vtrnu32 (uint32x2_t _a, uint32x2_t _b) -+{ -+ return vtrn_u32 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2}; -+ uint32_t second[] = {3, 4}; -+ uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second)); -+ uint32x2_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 3}; -+ uint32_t exp2[] = {2, 4}; -+ uint32x2_t expected1 = vld1_u32 (exp1); -+ uint32x2_t expected2 = vld1_u32 (exp2); -+ -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+uint16x4x2_t -+test_vzipu16 (uint16x4_t _a, uint16x4_t _b) -+{ -+ return vzip_u16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4}; -+ uint16_t second[] = {5, 6, 7, 8}; -+ uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second)); -+ uint16x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 5, 2, 6}; -+ uint16_t exp2[] = {3, 7, 4, 8}; -+ uint16x4_t expected1 = vld1_u16 (exp1); -+ uint16x4_t expected2 = vld1_u16 (exp2); -+ -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vuzp_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vuzpu16.x" -+ -+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32_s8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32s8.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vtrn_f32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vtrnf32.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vrev32q_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vrev32qu16.x" -+ -+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u16' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipqu16.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u8' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ -+ -+#include <arm_neon.h> -+#include "vzipu8.x" -+ -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+poly16x8x2_t -+test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b) -+{ -+ return vtrnq_p16 (_a, _b); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second)); -+ poly16x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; -+ poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; -+ poly16x8_t expected1 = vld1q_p16 (exp1); -+ poly16x8_t expected2 = vld1q_p16 (exp2); -+ -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fno-inline" } */ -+ -+/* Scan-assembler test, so, incorporate as little other code as possible. */ -+ -+#include "arm_neon.h" -+#include "int_comparisons.x" -+ -+/* Operations on all 18 integer types: (q?)_[su](8|16|32|64), d_[su]64. -+ (d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'. */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+ -+/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned). */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+ -+/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned). */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */ -+ -+/* Comparisons against immediate zero, on the 8 signed integer types only. */ -+ -+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */ -+/* For int64_t and int64x1_t, combine_simplify_rtx failure of -+ https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html -+ prevents generation of cmge....#0, instead producing mvn + sshr. */ -+/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */ -+/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */ -+/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent, -+ so allow either. cmgez issue above results in extra 2 * sshr....63. */ -+/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */ -+ -+// All should have been compiled into single insns without inverting result: -+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+poly16x8_t -+test_vrev64qp16 (poly16x8_t _arg) -+{ -+ return vrev64q_p16 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16x8_t reversed = test_vrev64qp16 (inorder); -+ poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; -+ -+ for (i = 0; i < 8; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c -@@ -0,0 +1,10 @@ -+/* Test the `vextf32' AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O3 -fno-inline" } */ -+ -+#include "arm_neon.h" -+#include "ext_f32.x" -+ -+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x -@@ -0,0 +1,22 @@ -+extern void abort (void); -+ -+float32x2_t -+test_vrev64f32 (float32x2_t _arg) -+{ -+ return vrev64_f32 (_arg); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32x2_t inorder = {1, 2}; -+ float32x2_t reversed = test_vrev64f32 (inorder); -+ float32x2_t expected = {2, 1}; -+ -+ for (i = 0; i < 2; i++) -+ if (reversed[i] != expected[i]) -+ abort (); -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c -@@ -0,0 +1,430 @@ -+/* Test vdup_lane intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "--save-temps -O1" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_f32_0 (float32x2_t a) -+{ -+ return vdup_lane_f32 (a, 0); -+} -+ -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_f32_1 (float32x2_t a) -+{ -+ return vdup_lane_f32 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_f32 () -+{ -+ float32x2_t a; -+ float32x2_t b; -+ int i; -+ float32_t c[2] = { 0.0 , 3.14 }; -+ float32_t d[2]; -+ -+ a = vld1_f32 (c); -+ b = wrap_vdup_lane_f32_0 (a); -+ vst1_f32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdup_lane_f32_1 (a); -+ vst1_f32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_f32_0 (float32x2_t a) -+{ -+ return vdupq_lane_f32 (a, 0); -+} -+ -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_f32_1 (float32x2_t a) -+{ -+ return vdupq_lane_f32 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_lane_f32 () -+{ -+ float32x2_t a; -+ float32x4_t b; -+ int i; -+ float32_t c[2] = { 0.0 , 3.14 }; -+ float32_t d[4]; -+ -+ a = vld1_f32 (c); -+ b = wrap_vdupq_lane_f32_0 (a); -+ vst1q_f32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdupq_lane_f32_1 (a); -+ vst1q_f32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_lane_s8_0 (int8x8_t a) -+{ -+ return vdup_lane_s8 (a, 0); -+} -+ -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_lane_s8_1 (int8x8_t a) -+{ -+ return vdup_lane_s8 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_s8 () -+{ -+ int8x8_t a; -+ int8x8_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ int8_t d[8]; -+ -+ a = vld1_s8 (c); -+ b = wrap_vdup_lane_s8_0 (a); -+ vst1_s8 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdup_lane_s8_1 (a); -+ vst1_s8 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s8_0 (int8x8_t a) -+{ -+ return vdupq_lane_s8 (a, 0); -+} -+ -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s8_1 (int8x8_t a) -+{ -+ return vdupq_lane_s8 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_lane_s8 () -+{ -+ int8x8_t a; -+ int8x16_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ int8_t d[16]; -+ -+ a = vld1_s8 (c); -+ b = wrap_vdupq_lane_s8_0 (a); -+ vst1q_s8 (d, b); -+ for (i = 0; i < 16; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdupq_lane_s8_1 (a); -+ vst1q_s8 (d, b); -+ for (i = 0; i < 16; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_lane_s16_0 (int16x4_t a) -+{ -+ return vdup_lane_s16 (a, 0); -+} -+ -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_lane_s16_1 (int16x4_t a) -+{ -+ return vdup_lane_s16 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_s16 () -+{ -+ int16x4_t a; -+ int16x4_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int16_t c[4] = { 0, 1, 2, 3 }; -+ int16_t d[4]; -+ -+ a = vld1_s16 (c); -+ b = wrap_vdup_lane_s16_0 (a); -+ vst1_s16 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdup_lane_s16_1 (a); -+ vst1_s16 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s16_0 (int16x4_t a) -+{ -+ return vdupq_lane_s16 (a, 0); -+} -+ -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s16_1 (int16x4_t a) -+{ -+ return vdupq_lane_s16 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_lane_s16 () -+{ -+ int16x4_t a; -+ int16x8_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int16_t c[4] = { 0, 1, 2, 3 }; -+ int16_t d[8]; -+ -+ a = vld1_s16 (c); -+ b = wrap_vdupq_lane_s16_0 (a); -+ vst1q_s16 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdupq_lane_s16_1 (a); -+ vst1q_s16 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_s32_0 (int32x2_t a) -+{ -+ return vdup_lane_s32 (a, 0); -+} -+ -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_s32_1 (int32x2_t a) -+{ -+ return vdup_lane_s32 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_s32 () -+{ -+ int32x2_t a; -+ int32x2_t b; -+ int i; -+ int32_t c[2] = { 0, 1 }; -+ int32_t d[2]; -+ -+ a = vld1_s32 (c); -+ b = wrap_vdup_lane_s32_0 (a); -+ vst1_s32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdup_lane_s32_1 (a); -+ vst1_s32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s32_0 (int32x2_t a) -+{ -+ return vdupq_lane_s32 (a, 0); -+} -+ -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s32_1 (int32x2_t a) -+{ -+ return vdupq_lane_s32 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_lane_s32 () -+{ -+ int32x2_t a; -+ int32x4_t b; -+ int i; -+ int32_t c[2] = { 0, 1 }; -+ int32_t d[4]; -+ -+ a = vld1_s32 (c); -+ b = wrap_vdupq_lane_s32_0 (a); -+ vst1q_s32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ b = wrap_vdupq_lane_s32_1 (a); -+ vst1q_s32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_lane_s64_0 (int64x1_t a) -+{ -+ return vdup_lane_s64 (a, 0); -+} -+ -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_lane_s64_1 (int64x1_t a) -+{ -+ return vdup_lane_s64 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_s64 () -+{ -+ int64x1_t a; -+ int64x1_t b; -+ int64_t c[1]; -+ int64_t d[1]; -+ -+ c[0] = 0; -+ a = vld1_s64 (c); -+ b = wrap_vdup_lane_s64_0 (a); -+ vst1_s64 (d, b); -+ if (c[0] != d[0]) -+ return 1; -+ -+ c[0] = 1; -+ a = vld1_s64 (c); -+ b = wrap_vdup_lane_s64_1 (a); -+ vst1_s64 (d, b); -+ if (c[0] != d[0]) -+ return 1; -+ return 0; -+} -+ -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s64_0 (int64x1_t a) -+{ -+ return vdupq_lane_s64 (a, 0); -+} -+ -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s64_1 (int64x1_t a) -+{ -+ return vdupq_lane_s64 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_lane_s64 () -+{ -+ int64x1_t a; -+ int64x2_t b; -+ int i; -+ int64_t c[1]; -+ int64_t d[2]; -+ -+ c[0] = 0; -+ a = vld1_s64 (c); -+ b = wrap_vdupq_lane_s64_0 (a); -+ vst1q_s64 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; -+ -+ c[0] = 1; -+ a = vld1_s64 (c); -+ b = wrap_vdupq_lane_s64_1 (a); -+ vst1q_s64 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; -+ return 0; -+} -+ -+int -+main () -+{ -+ -+ if (test_vdup_lane_f32 ()) -+ abort (); -+ if (test_vdup_lane_s8 ()) -+ abort (); -+ if (test_vdup_lane_s16 ()) -+ abort (); -+ if (test_vdup_lane_s32 ()) -+ abort (); -+ if (test_vdup_lane_s64 ()) -+ abort (); -+ if (test_vdupq_lane_f32 ()) -+ abort (); -+ if (test_vdupq_lane_s8 ()) -+ abort (); -+ if (test_vdupq_lane_s16 ()) -+ abort (); -+ if (test_vdupq_lane_s32 ()) -+ abort (); -+ if (test_vdupq_lane_s64 ()) -+ abort (); -+ -+ return 0; -+} -+ -+/* Asm check for test_vdup_lane_s8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ -+ -+/* Asm check for test_vdupq_lane_s8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ -+ -+/* Asm check for test_vdup_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ -+/* Asm check for test_vdup_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ -+ -+/* Asm check for test_vdupq_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ -+/* Asm check for test_vdupq_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ -+ -+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ -+ -+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -@@ -0,0 +1,19 @@ -+/* Verify: -+ * with outgoing. -+ * total frame size > 512. -+ area except outgoing <= 512 -+ * number of callee-save reg >= 2. -+ * split the stack adjustment into two substractions, -+ the first could be optimized into "stp !". */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test15, 480, , 8, a[8]) -+t_frame_run (test15) -+ -+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c -@@ -0,0 +1,17 @@ -+/* Test if a BSL-like instruction can be generated from a C idiom. */ -+/* { dg-do assemble } */ -+/* { dg-options "--save-temps -O3" } */ -+ -+#include <arm_neon.h> -+ -+/* Folds to BIF. */ -+ -+uint32x4_t -+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask) -+{ -+ return (mask & a) | (~mask & b); -+} -+ -+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c -@@ -0,0 +1,619 @@ -+/* Test vdup_lane intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "-O1 --save-temps" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_f32 (float32_t a) -+{ -+ return vdup_n_f32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_f32 () -+{ -+ float32_t a = 1.0; -+ float32x2_t b; -+ float32_t c[2]; -+ int i; -+ -+ b = wrap_vdup_n_f32 (a); -+ vst1_f32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_f32 (float32_t a) -+{ -+ return vdupq_n_f32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_f32 () -+{ -+ float32_t a = 1.0; -+ float32x4_t b; -+ float32_t c[4]; -+ int i; -+ -+ b = wrap_vdupq_n_f32 (a); -+ vst1q_f32 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+float64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_f64 (float64_t a) -+{ -+ return vdup_n_f64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_f64 () -+{ -+ float64_t a = 1.0; -+ float64x1_t b; -+ float64_t c[1]; -+ int i; -+ -+ b = wrap_vdup_n_f64 (a); -+ vst1_f64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_f64 (float64_t a) -+{ -+ return vdupq_n_f64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_f64 () -+{ -+ float64_t a = 1.0; -+ float64x2_t b; -+ float64_t c[2]; -+ int i; -+ -+ b = wrap_vdupq_n_f64 (a); -+ vst1q_f64 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+poly8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_p8 (poly8_t a) -+{ -+ return vdup_n_p8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_p8 () -+{ -+ poly8_t a = 1; -+ poly8x8_t b; -+ poly8_t c[8]; -+ int i; -+ -+ b = wrap_vdup_n_p8 (a); -+ vst1_p8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+poly8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_p8 (poly8_t a) -+{ -+ return vdupq_n_p8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_p8 () -+{ -+ poly8_t a = 1; -+ poly8x16_t b; -+ poly8_t c[16]; -+ int i; -+ -+ b = wrap_vdupq_n_p8 (a); -+ vst1q_p8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_s8 (int8_t a) -+{ -+ return vdup_n_s8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_s8 () -+{ -+ int8_t a = 1; -+ int8x8_t b; -+ int8_t c[8]; -+ int i; -+ -+ b = wrap_vdup_n_s8 (a); -+ vst1_s8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_s8 (int8_t a) -+{ -+ return vdupq_n_s8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_s8 () -+{ -+ int8_t a = 1; -+ int8x16_t b; -+ int8_t c[16]; -+ int i; -+ -+ b = wrap_vdupq_n_s8 (a); -+ vst1q_s8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_u8 (uint8_t a) -+{ -+ return vdup_n_u8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_u8 () -+{ -+ uint8_t a = 1; -+ uint8x8_t b; -+ uint8_t c[8]; -+ int i; -+ -+ b = wrap_vdup_n_u8 (a); -+ vst1_u8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_u8 (uint8_t a) -+{ -+ return vdupq_n_u8 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_u8 () -+{ -+ uint8_t a = 1; -+ uint8x16_t b; -+ uint8_t c[16]; -+ int i; -+ -+ b = wrap_vdupq_n_u8 (a); -+ vst1q_u8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+poly16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_p16 (poly16_t a) -+{ -+ return vdup_n_p16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_p16 () -+{ -+ poly16_t a = 1; -+ poly16x4_t b; -+ poly16_t c[4]; -+ int i; -+ -+ b = wrap_vdup_n_p16 (a); -+ vst1_p16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+poly16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_p16 (poly16_t a) -+{ -+ return vdupq_n_p16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_p16 () -+{ -+ poly16_t a = 1; -+ poly16x8_t b; -+ poly16_t c[8]; -+ int i; -+ -+ b = wrap_vdupq_n_p16 (a); -+ vst1q_p16 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_s16 (int16_t a) -+{ -+ return vdup_n_s16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_s16 () -+{ -+ int16_t a = 1; -+ int16x4_t b; -+ int16_t c[4]; -+ int i; -+ -+ b = wrap_vdup_n_s16 (a); -+ vst1_s16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_s16 (int16_t a) -+{ -+ return vdupq_n_s16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_s16 () -+{ -+ int16_t a = 1; -+ int16x8_t b; -+ int16_t c[8]; -+ int i; -+ -+ b = wrap_vdupq_n_s16 (a); -+ vst1q_s16 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_u16 (uint16_t a) -+{ -+ return vdup_n_u16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_u16 () -+{ -+ uint16_t a = 1; -+ uint16x4_t b; -+ uint16_t c[4]; -+ int i; -+ -+ b = wrap_vdup_n_u16 (a); -+ vst1_u16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_u16 (uint16_t a) -+{ -+ return vdupq_n_u16 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_u16 () -+{ -+ uint16_t a = 1; -+ uint16x8_t b; -+ uint16_t c[8]; -+ int i; -+ -+ b = wrap_vdupq_n_u16 (a); -+ vst1q_u16 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_s32 (int32_t a) -+{ -+ return vdup_n_s32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_s32 () -+{ -+ int32_t a = 1; -+ int32x2_t b; -+ int32_t c[2]; -+ int i; -+ -+ b = wrap_vdup_n_s32 (a); -+ vst1_s32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_s32 (int32_t a) -+{ -+ return vdupq_n_s32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_s32 () -+{ -+ int32_t a = 1; -+ int32x4_t b; -+ int32_t c[4]; -+ int i; -+ -+ b = wrap_vdupq_n_s32 (a); -+ vst1q_s32 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_u32 (uint32_t a) -+{ -+ return vdup_n_u32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_u32 () -+{ -+ uint32_t a = 1; -+ uint32x2_t b; -+ uint32_t c[2]; -+ int i; -+ -+ b = wrap_vdup_n_u32 (a); -+ vst1_u32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_u32 (uint32_t a) -+{ -+ return vdupq_n_u32 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_u32 () -+{ -+ uint32_t a = 1; -+ uint32x4_t b; -+ uint32_t c[4]; -+ int i; -+ -+ b = wrap_vdupq_n_u32 (a); -+ vst1q_u32 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_s64 (int64_t a) -+{ -+ return vdup_n_s64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_s64 () -+{ -+ int64_t a = 1; -+ int64x1_t b; -+ int64_t c[1]; -+ int i; -+ -+ b = wrap_vdup_n_s64 (a); -+ vst1_s64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_s64 (int64_t a) -+{ -+ return vdupq_n_s64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_s64 () -+{ -+ int64_t a = 1; -+ int64x2_t b; -+ int64_t c[2]; -+ int i; -+ -+ b = wrap_vdupq_n_s64 (a); -+ vst1q_s64 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_u64 (uint64_t a) -+{ -+ return vdup_n_u64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_n_u64 () -+{ -+ uint64_t a = 1; -+ uint64x1_t b; -+ uint64_t c[1]; -+ int i; -+ -+ b = wrap_vdup_n_u64 (a); -+ vst1_u64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+uint64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_u64 (uint64_t a) -+{ -+ return vdupq_n_u64 (a); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupq_n_u64 () -+{ -+ uint64_t a = 1; -+ uint64x2_t b; -+ uint64_t c[2]; -+ int i; -+ -+ b = wrap_vdupq_n_u64 (a); -+ vst1q_u64 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} -+ -+int -+main () -+{ -+ if (test_vdup_n_f32 ()) -+ abort (); -+ if (test_vdup_n_f64 ()) -+ abort (); -+ if (test_vdup_n_p8 ()) -+ abort (); -+ if (test_vdup_n_u8 ()) -+ abort (); -+ if (test_vdup_n_s8 ()) -+ abort (); -+ if (test_vdup_n_p16 ()) -+ abort (); -+ if (test_vdup_n_s16 ()) -+ abort (); -+ if (test_vdup_n_u16 ()) -+ abort (); -+ if (test_vdup_n_s32 ()) -+ abort (); -+ if (test_vdup_n_u32 ()) -+ abort (); -+ if (test_vdup_n_s64 ()) -+ abort (); -+ if (test_vdup_n_u64 ()) -+ abort (); -+ if (test_vdupq_n_f32 ()) -+ abort (); -+ if (test_vdupq_n_f64 ()) -+ abort (); -+ if (test_vdupq_n_p8 ()) -+ abort (); -+ if (test_vdupq_n_u8 ()) -+ abort (); -+ if (test_vdupq_n_s8 ()) -+ abort (); -+ if (test_vdupq_n_p16 ()) -+ abort (); -+ if (test_vdupq_n_s16 ()) -+ abort (); -+ if (test_vdupq_n_u16 ()) -+ abort (); -+ if (test_vdupq_n_s32 ()) -+ abort (); -+ if (test_vdupq_n_u32 ()) -+ abort (); -+ if (test_vdupq_n_s64 ()) -+ abort (); -+ if (test_vdupq_n_u64 ()) -+ abort (); -+ return 0; -+} -+ -+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. -+ Cannot force floating point value in general purpose regester. */ -+ -+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ -+ -+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ -+ -+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. -+ Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" -+ are not practical. */ -+ -+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -@@ -0,0 +1,19 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * without outgoing. -+ * total frame size <= 512 but > 256. -+ * number of callee-save reg >= 2. -+ * we can use "stp !" to optimize stack adjustment. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test4, 400, "x19") -+t_frame_run (test4) -+ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options " -O2 " } */ -+ -+float -+f_1 (float a, float b, float c, float d) -+{ -+ if (a > 0.0) -+ return c; -+ else -+ return 2.0; -+} -+ -+double -+f_2 (double a, double b, double c, double d) -+{ -+ if (a > b) -+ return c; -+ else -+ return d; -+} -+ -+/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c -@@ -8,11 +8,11 @@ - - - #define DEFN_SETV(type) \ -- set_vector_##type (pR##type a, type n) \ -- { \ -- int i; \ -- for (i=0; i<16; i++) \ -- a[i] = n; \ -+ void set_vector_##type (pR##type a, type n) \ -+ { \ -+ int i; \ -+ for (i=0; i<16; i++) \ -+ a[i] = n; \ - } - - #define DEFN_CHECKV(type) \ ---- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c -@@ -0,0 +1,59 @@ -+/* { dg-options "-O2" } */ -+/* { dg-do run } */ -+ -+extern void abort (void); -+ -+typedef unsigned int __u32; -+ -+__u32 -+__rev16_32_alt (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) -+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); -+} -+ -+__u32 -+__rev16_32 (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) -+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); -+} -+ -+typedef unsigned long long __u64; -+ -+__u64 -+__rev16_64_alt (__u64 x) -+{ -+ return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) -+ | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); -+} -+ -+__u64 -+__rev16_64 (__u64 x) -+{ -+ return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) -+ | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); -+} -+ -+int -+main (void) -+{ -+ volatile __u32 in32 = 0x12345678; -+ volatile __u32 expected32 = 0x34127856; -+ volatile __u64 in64 = 0x1234567890abcdefUL; -+ volatile __u64 expected64 = 0x34127856ab90efcdUL; -+ -+ if (__rev16_32 (in32) != expected32) -+ abort (); -+ -+ if (__rev16_32_alt (in32) != expected32) -+ abort (); -+ -+ if (__rev16_64 (in64) != expected64) -+ abort (); -+ -+ if (__rev16_64_alt (in64) != expected64) -+ abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c -@@ -0,0 +1,60 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -std=c99" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define VARIANTS(VARIANT) \ -+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \ -+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \ -+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \ -+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \ -+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \ -+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \ -+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \ -+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \ -+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \ -+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64) -+ -+ -+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ -+int \ -+test_vget_low_ ##SUFFIX (BASETYPE *data) \ -+{ \ -+ BASETYPE temp [NUM64]; \ -+ TYPE128 vec = vld1q_##SUFFIX (data); \ -+ TYPE64 high = vget_high_##SUFFIX (vec); \ -+ vst1_##SUFFIX (temp, high); \ -+ for (int i = 0; i < NUM64; i++) \ -+ if (temp[i] != data[i + NUM64]) \ -+ return 1; \ -+ return 0; \ -+} -+ -+VARIANTS (TESTMETH) -+ -+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ -+ if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ uint8_t uint8_t_data[16] = -+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 }; -+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 }; -+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 }; -+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL }; -+ int8_t int8_t_data[16] = -+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 }; -+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000}; -+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 }; -+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; -+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; -+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 }; -+ -+ VARIANTS (CHECK); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c -@@ -0,0 +1,84 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -fno-inline" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define VARIANTS(VARIANT, STRUCT) \ -+VARIANT (uint8, , 8, _u8, STRUCT) \ -+VARIANT (uint16, , 4, _u16, STRUCT) \ -+VARIANT (uint32, , 2, _u32, STRUCT) \ -+VARIANT (uint64, , 1, _u64, STRUCT) \ -+VARIANT (int8, , 8, _s8, STRUCT) \ -+VARIANT (int16, , 4, _s16, STRUCT) \ -+VARIANT (int32, , 2, _s32, STRUCT) \ -+VARIANT (int64, , 1, _s64, STRUCT) \ -+VARIANT (poly8, , 8, _p8, STRUCT) \ -+VARIANT (poly16, , 4, _p16, STRUCT) \ -+VARIANT (float32, , 2, _f32, STRUCT) \ -+VARIANT (float64, , 1, _f64, STRUCT) \ -+VARIANT (uint8, q, 16, _u8, STRUCT) \ -+VARIANT (uint16, q, 8, _u16, STRUCT) \ -+VARIANT (uint32, q, 4, _u32, STRUCT) \ -+VARIANT (uint64, q, 2, _u64, STRUCT) \ -+VARIANT (int8, q, 16, _s8, STRUCT) \ -+VARIANT (int16, q, 8, _s16, STRUCT) \ -+VARIANT (int32, q, 4, _s32, STRUCT) \ -+VARIANT (int64, q, 2, _s64, STRUCT) \ -+VARIANT (poly8, q, 16, _p8, STRUCT) \ -+VARIANT (poly16, q, 8, _p16, STRUCT) \ -+VARIANT (float32, q, 4, _f32, STRUCT) \ -+VARIANT (float64, q, 2, _f64, STRUCT) -+ -+#define TESTMETH(BASE, Q, ELTS, SUFFIX, STRUCT) \ -+int \ -+test_vld##STRUCT##Q##_dup##SUFFIX (const BASE##_t *data) \ -+{ \ -+ BASE##_t temp[ELTS]; \ -+ BASE##x##ELTS##x##STRUCT##_t vectors = \ -+ vld##STRUCT##Q##_dup##SUFFIX (data); \ -+ int i,j; \ -+ for (i = 0; i < STRUCT; i++) \ -+ { \ -+ vst1##Q##SUFFIX (temp, vectors.val[i]); \ -+ for (j = 0; j < ELTS; j++) \ -+ if (temp[j] != data[i]) \ -+ return 1; \ -+ } \ -+ return 0; \ -+} -+ -+/* Tests of vld2_dup and vld2q_dup. */ -+VARIANTS (TESTMETH, 2) -+/* Tests of vld3_dup and vld3q_dup. */ -+VARIANTS (TESTMETH, 3) -+/* Tests of vld4_dup and vld4q_dup. */ -+VARIANTS (TESTMETH, 4) -+ -+#define CHECK(BASE, Q, ELTS, SUFFIX, STRUCT) \ -+ if (test_vld##STRUCT##Q##_dup##SUFFIX (BASE ##_data) != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ uint8_t uint8_data[4] = { 7, 11, 13, 17 }; -+ uint16_t uint16_data[4] = { 257, 263, 269, 271 }; -+ uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 }; -+ uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL, -+ 0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; -+ int8_t int8_data[4] = { -1, 3, -5, 7 }; -+ int16_t int16_data[4] = { 257, -259, 261, -263 }; -+ int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 }; -+ int64_t *int64_data = (int64_t *)uint64_data; -+ poly8_t poly8_data[4] = { 0, 7, 13, 18, }; -+ poly16_t poly16_data[4] = { 11111, 2222, 333, 44 }; -+ float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; -+ float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 }; -+ -+ VARIANTS (CHECK, 2); -+ VARIANTS (CHECK, 3); -+ VARIANTS (CHECK, 4); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c -@@ -0,0 +1,343 @@ -+/* Test vdup_lane intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "-O1 --save-temps" } */ -+ -+#include <arm_neon.h> -+ -+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \ -+ : "=w"(V1) \ -+ : "w"(V1) \ -+ : /* No clobbers */) -+ -+extern void abort (void); -+ -+float32_t __attribute__ ((noinline)) -+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) -+{ -+ return vdups_lane_f32 (a, 0); -+} -+ -+float32_t __attribute__ ((noinline)) -+wrap_vdups_lane_f32_1 (float32x2_t a) -+{ -+ return vdups_lane_f32 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdups_lane_f32 () -+{ -+ float32x2_t a; -+ float32_t b; -+ float32_t c[2] = { 0.0, 1.0 }; -+ -+ a = vld1_f32 (c); -+ b = wrap_vdups_lane_f32_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_f32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+float64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) -+{ -+ return vdupd_lane_f64 (a, 0); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupd_lane_f64 () -+{ -+ float64x1_t a; -+ float64_t b; -+ float64_t c[1] = { 0.0 }; -+ a = vld1_f64 (c); -+ b = wrap_vdupd_lane_f64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} -+ -+int8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) -+{ -+ int8_t result = vdupb_lane_s8 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+int8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_s8_1 (int8x8_t a) -+{ -+ int8_t result = vdupb_lane_s8 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vdupb_lane_s8 () -+{ -+ int8x8_t a; -+ int8_t b; -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ -+ a = vld1_s8 (c); -+ b = wrap_vdupb_lane_s8_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdupb_lane_s8_1 (a); -+ if (c[1] != b) -+ return 1; -+ -+ return 0; -+} -+ -+uint8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) -+{ -+ uint8_t result = vdupb_lane_u8 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+uint8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_u8_1 (uint8x8_t a) -+{ -+ uint8_t result = vdupb_lane_u8 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vdupb_lane_u8 () -+{ -+ uint8x8_t a; -+ uint8_t b; -+ uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ -+ a = vld1_u8 (c); -+ b = wrap_vdupb_lane_u8_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdupb_lane_u8_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+int16_t __attribute__ ((noinline)) -+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) -+{ -+ int16_t result = vduph_lane_s16 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+int16_t __attribute__ ((noinline)) -+wrap_vduph_lane_s16_1 (int16x4_t a) -+{ -+ int16_t result = vduph_lane_s16 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vduph_lane_s16 () -+{ -+ int16x4_t a; -+ int16_t b; -+ int16_t c[4] = { 0, 1, 2, 3 }; -+ -+ a = vld1_s16 (c); -+ b = wrap_vduph_lane_s16_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vduph_lane_s16_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+uint16_t __attribute__ ((noinline)) -+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) -+{ -+ uint16_t result = vduph_lane_u16 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+uint16_t __attribute__ ((noinline)) -+wrap_vduph_lane_u16_1 (uint16x4_t a) -+{ -+ uint16_t result = vduph_lane_u16 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vduph_lane_u16 () -+{ -+ uint16x4_t a; -+ uint16_t b; -+ uint16_t c[4] = { 0, 1, 2, 3 }; -+ -+ a = vld1_u16 (c); -+ b = wrap_vduph_lane_u16_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vduph_lane_u16_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+int32_t __attribute__ ((noinline)) -+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) -+{ -+ int32_t result = vdups_lane_s32 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+int32_t __attribute__ ((noinline)) -+wrap_vdups_lane_s32_1 (int32x2_t a) -+{ -+ int32_t result = vdups_lane_s32 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vdups_lane_s32 () -+{ -+ int32x2_t a; -+ int32_t b; -+ int32_t c[2] = { 0, 1 }; -+ -+ a = vld1_s32 (c); -+ b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_s32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+uint32_t __attribute__ ((noinline)) -+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) -+{ -+ uint32_t result = vdups_lane_u32 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+uint32_t __attribute__ ((noinline)) -+wrap_vdups_lane_u32_1 (uint32x2_t a) -+{ -+ uint32_t result = vdups_lane_u32 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vdups_lane_u32 () -+{ -+ uint32x2_t a; -+ uint32_t b; -+ uint32_t c[2] = { 0, 1 }; -+ a = vld1_u32 (c); -+ b = wrap_vdups_lane_u32_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_u32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+uint64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) -+{ -+ return vdupd_lane_u64 (a, 0);; -+} -+ -+int __attribute__ ((noinline)) -+test_vdupd_lane_u64 () -+{ -+ uint64x1_t a; -+ uint64_t b; -+ uint64_t c[1] = { 0 }; -+ -+ a = vld1_u64 (c); -+ b = wrap_vdupd_lane_u64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} -+ -+int64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) -+{ -+ return vdupd_lane_u64 (a, 0); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupd_lane_s64 () -+{ -+ int64x1_t a; -+ int64_t b; -+ int64_t c[1] = { 0 }; -+ -+ a = vld1_s64 (c); -+ b = wrap_vdupd_lane_s64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} -+ -+int -+main () -+{ -+ if (test_vdups_lane_f32 ()) -+ abort (); -+ if (test_vdupd_lane_f64 ()) -+ abort (); -+ if (test_vdupb_lane_s8 ()) -+ abort (); -+ if (test_vdupb_lane_u8 ()) -+ abort (); -+ if (test_vduph_lane_s16 ()) -+ abort (); -+ if (test_vduph_lane_u16 ()) -+ abort (); -+ if (test_vdups_lane_s32 ()) -+ abort (); -+ if (test_vdups_lane_u32 ()) -+ abort (); -+ if (test_vdupd_lane_s64 ()) -+ abort (); -+ if (test_vdupd_lane_u64 ()) -+ abort (); -+ return 0; -+} -+ -+/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ -+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ -+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ -+ -+/* Asm check for vduph_lane_h16, vduph_lane_h16. */ -+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ -+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ -+ -+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ -+/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32. */ -+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ -+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ -+ -+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ -+/* Attempts to make the compiler generate vdupd are not practical. */ -+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c -@@ -0,0 +1,22 @@ -+/* Test vbslq_u64 can be folded. */ -+/* { dg-do assemble } */ -+/* { dg-options "--save-temps -O3" } */ -+#include <arm_neon.h> -+ -+/* Folds to BIC. */ -+ -+int32x4_t -+half_fold_int (uint32x4_t mask) -+{ -+ int32x4_t a = {0, 0, 0, 0}; -+ int32x4_t b = {2, 4, 8, 16}; -+ return vbslq_s32 (mask, a, b); -+} -+ -+/* { dg-final { scan-assembler-not "bsl\\tv" } } */ -+/* { dg-final { scan-assembler-not "bit\\tv" } } */ -+/* { dg-final { scan-assembler-not "bif\\tv" } } */ -+/* { dg-final { scan-assembler "bic\\tv" } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c -@@ -0,0 +1,28 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline --save-temps" } */ -+ -+extern void abort (void); -+ -+typedef float float32x2_t __attribute__ ((__vector_size__ ((8)))); -+typedef unsigned int uint32x2_t __attribute__ ((__vector_size__ ((8)))); -+ -+float32x2_t -+test_dup_1 (float32x2_t in) -+{ -+ return __builtin_shuffle (in, (uint32x2_t) {1, 1}); -+} -+ -+int -+main (int argc, char **argv) -+{ -+ float32x2_t test = {2.718, 3.141}; -+ float32x2_t res = test_dup_1 (test); -+ if (res[0] != test[1] || res[1] != test[1]) -+ abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "\[ \t\]*dup\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.s\\\[\[01\]\\\]" 1 } } */ -+/* { dg-final { scan-assembler-not "zip" } } */ -+/* { dg-final { cleanup-saved-temps } } */ -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c -@@ -0,0 +1,13 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * with outgoing. -+ * total frame size <= 512. -+ * one subtraction of the whole frame size. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test5, 300, "x19", 8, a[8]) -+t_frame_run (test5) ---- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c -@@ -5,48 +5,54 @@ - - extern void abort (void); - --int __attribute__ ((noinline)) --test_vld1_vst1 () --{ -- int8x8_t a; -- int8x8_t b; -- int i = 0; -- int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -- int8_t d[8]; -- a = vld1_s8 (c); -- asm volatile ("":::"memory"); -- vst1_s8 (d, a); -- asm volatile ("":::"memory"); -- for (; i < 8; i++) -- if (c[i] != d[i]) -- return 1; -- return 0; -+#define TESTMETH(TYPE, NUM, BASETYPE, SUFFIX) \ -+int __attribute__ ((noinline)) \ -+test_vld1_vst1##SUFFIX () \ -+{ \ -+ TYPE vec; \ -+ int i = 0; \ -+ BASETYPE src[NUM]; \ -+ BASETYPE dest[NUM]; \ -+ for (i = 0; i < NUM; i++) \ -+ src[i] = 2*i + 1; \ -+ asm volatile ("":::"memory"); \ -+ vec = vld1 ## SUFFIX (src); \ -+ asm volatile ("":::"memory"); \ -+ vst1 ## SUFFIX (dest, vec); \ -+ asm volatile ("":::"memory"); \ -+ for (i = 0; i < NUM; i++) \ -+ if (src[i] != dest[i]) \ -+ return 1; \ -+ return 0; \ - } - --int __attribute__ ((noinline)) --test_vld1q_vst1q () --{ -- int16x8_t a; -- int16x8_t b; -- int i = 0; -- int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -- int16_t d[8]; -- a = vld1q_s16 (c); -- asm volatile ("":::"memory"); -- vst1q_s16 (d, a); -- asm volatile ("":::"memory"); -- for (; i < 8; i++) -- if (c[i] != d[i]) -- return 1; -- return 0; --} -+#define VARIANTS(THING) \ -+THING (int8x8_t, 8, int8_t, _s8) \ -+THING (uint8x8_t, 8, uint8_t, _u8) \ -+THING (int16x4_t, 4, int16_t, _s16) \ -+THING (uint16x4_t, 4, uint16_t, _u16) \ -+THING (int32x2_t, 2, int32_t, _s32) \ -+THING (uint32x2_t, 2, uint32_t, _u32) \ -+THING (float32x2_t, 2, float32_t, _f32) \ -+THING (int8x16_t, 16, int8_t, q_s8) \ -+THING (uint8x16_t, 16, uint8_t, q_u8) \ -+THING (int16x8_t, 8, int16_t, q_s16) \ -+THING (uint16x8_t, 8, uint16_t, q_u16) \ -+THING (int32x4_t, 4, int32_t, q_s32) \ -+THING (uint32x4_t, 4, uint32_t, q_u32) \ -+THING (int64x2_t, 2, int64_t, q_s64) \ -+THING (uint64x2_t, 2, uint64_t, q_u64) \ -+THING (float64x2_t, 2, float64_t, q_f64) - -+VARIANTS (TESTMETH) -+ -+#define DOTEST(TYPE, NUM, BASETYPE, SUFFIX) \ -+ if (test_vld1_vst1##SUFFIX ()) \ -+ abort (); -+ - int - main () - { -- if (test_vld1_vst1 ()) -- abort (); -- if (test_vld1q_vst1q ()) -- abort (); -+ VARIANTS (DOTEST); - return 0; - } ---- a/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c -@@ -0,0 +1,95 @@ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline -O1" } */ -+ -+#define FCVTDEF(ftype,itype) \ -+void \ -+cvt_##itype##_to_##ftype (itype a, ftype b)\ -+{\ -+ ftype c;\ -+ c = (ftype) a;\ -+ if ( (c - b) > 0.00001) abort();\ -+} -+ -+#define force_simd_for_float(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :) -+#define force_simd_for_double(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :) -+ -+#define FCVTDEF_SISD(ftype,itype) \ -+void \ -+cvt_##itype##_to_##ftype##_sisd (itype a, ftype b)\ -+{\ -+ ftype c;\ -+ force_simd_for_##ftype(a);\ -+ c = (ftype) a;\ -+ if ( (c - b) > 0.00001) abort();\ -+} -+ -+#define FCVT(ftype,itype,ival,fval) cvt_##itype##_to_##ftype (ival, fval); -+#define FCVT_SISD(ftype,itype,ival,fval) cvt_##itype##_to_##ftype##_sisd (ival, fval); -+ -+typedef int int32_t; -+typedef unsigned int uint32_t; -+typedef long long int int64_t; -+typedef unsigned long long int uint64_t; -+ -+extern void abort(); -+ -+FCVTDEF (float, int32_t) -+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */ -+FCVTDEF (float, uint32_t) -+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */ -+FCVTDEF (double, int32_t) -+/* "scvtf\td\[0-9\]+,\ w\[0-9\]+" */ -+FCVTDEF (double, uint32_t) -+/* "ucvtf\td\[0-9\]+,\ w\[0-9\]+" */ -+FCVTDEF (float, int64_t) -+/* "scvtf\ts\[0-9\]+,\ x\[0-9\]+" */ -+FCVTDEF (float, uint64_t) -+/* "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" */ -+FCVTDEF (double, int64_t) -+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ x\[0-9\]+" } } */ -+FCVTDEF (double, uint64_t) -+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ x\[0-9\]+" } } */ -+FCVTDEF_SISD (float, int32_t) -+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */ -+FCVTDEF_SISD (double, int64_t) -+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ d\[0-9\]+" } } */ -+FCVTDEF_SISD (float, uint32_t) -+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */ -+FCVTDEF_SISD (double, uint64_t) -+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ d\[0-9\]+" } } */ -+FCVTDEF_SISD (float, int64_t) -+/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */ -+FCVTDEF_SISD (float, uint64_t) -+/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */ -+FCVTDEF_SISD (double, int32_t) -+/* { dg-final { scan-assembler-times "scvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */ -+FCVTDEF_SISD (double, uint32_t) -+/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */ -+ -+int32_t ival = -1234; -+int64_t llival = -13031303L; -+uint32_t uival = 1234; -+uint64_t ullival = 13031303L; -+ -+int main () -+{ -+ float x; -+ double y; -+ -+ FCVT (float, int32_t, ival, -1234.0); -+ FCVT (float, uint32_t, uival, 1234.0); -+ FCVT (float, int64_t, llival, -13031303.0); -+ FCVT (float, uint64_t, ullival, 13031303.0); -+ FCVT (double, int32_t, ival, -1234.0); -+ FCVT (double, uint32_t, uival, 1234.0); -+ FCVT (double, int64_t, llival, -13031303.0); -+ FCVT (double, uint64_t, ullival, 13031303.0); -+ FCVT_SISD (float, int32_t, ival, -1234.0); -+ FCVT_SISD (double, int64_t, llival, -13031303.0); -+ FCVT_SISD (float, uint32_t, uival, 1234.0); -+ FCVT_SISD (double, uint64_t, ullival, 13031303.0); -+ -+ return 0; -+} -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c -@@ -17,6 +17,11 @@ - }; - typedef struct _IO_FILE FILE; - extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream); -+extern void *memset (void *s, int c, size_t n); -+extern void *memcpy (void *dest, const void *src, size_t n); -+extern int fprintf (FILE *stream, const char *format, ...); -+extern char * safe_strncpy (char *dst, const char *src, size_t size); -+extern size_t strlen (const char *s); - extern struct _IO_FILE *stderr; - extern int optind; - struct aftype { ---- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+typedef void FP (int); -+ -+/* { dg-final { scan-assembler "br" } } */ -+/* { dg-final { scan-assembler-not "blr" } } */ -+void -+f1 (FP fp, int n) -+{ -+ (fp) (n); -+} -+ -+void -+f2 (int n, FP fp) -+{ -+ (fp) (n); -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c -@@ -0,0 +1,54 @@ -+/* { dg-do compile } */ -+ -+#include "arm_neon.h" -+ -+int32x4_t -+foo (int32x4_t a, int16x4_t b, int16x4_t c, int d) -+{ -+ return vqdmlal_lane_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo1 (int32x4_t a, int16x4_t b, int16x8_t c, int d) -+{ -+ return vqdmlal_laneq_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo2 (int32x4_t a, int16x4_t b, int16x4_t c, int d) -+{ -+ return vqdmlsl_lane_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo3 (int32x4_t a, int16x4_t b, int16x8_t c, int d) -+{ -+ return vqdmlsl_laneq_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo4 (int32x4_t a, int16x8_t b, int16x4_t c, int d) -+{ -+ return vqdmlal_high_lane_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo5 (int32x4_t a, int16x8_t b, int16x4_t c, int d) -+{ -+ return vqdmlsl_high_lane_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo6 (int32x4_t a, int16x8_t b, int16x8_t c, int d) -+{ -+ return vqdmlal_high_laneq_s16 (a, b, c, d); -+} -+ -+int32x4_t -+foo7 (int32x4_t a, int16x8_t b, int16x8_t c, int d) -+{ -+ return vqdmlsl_high_laneq_s16 (a, b, c, d); -+} -+ -+ -+/* { dg-excess-errors "incompatible type for argument" } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -@@ -0,0 +1,20 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * without outgoing. -+ * total frame size > 512. -+ * number of callee-saved reg == 1. -+ * split stack adjustment into two subtractions. -+ the second subtraction should use "str !". */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test6, 700, ) -+t_frame_run (test6) -+ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h -@@ -0,0 +1,94 @@ -+extern void abort (); -+ -+#define CVT(v) ((unsigned char)(v)) -+ -+static void __attribute__((noinline)) -+check_args_8 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, -+ int a8) -+{ -+ if (a0 != 0 -+ || a1 != 1 -+ || a2 != 2 -+ || a3 != 3 -+ || a4 != 4 -+ || a5 != 5 -+ || a6 != 6 -+ || a7 != 7 -+ || a8 != 8) -+ abort (); -+} -+ -+static void __attribute__((noinline)) -+check_args_24 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7, -+ int a8, int a9, int a10) -+{ -+ if (a0 != 0 -+ || a1 != 1 -+ || a2 != 2 -+ || a3 != 3 -+ || a4 != 4 -+ || a5 != 5 -+ || a6 != 6 -+ || a7 != 7 -+ || a8 != 8 -+ || a9 != 9 -+ || a10 != 10) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+initialize_array (unsigned char *a, int len) -+{ -+ int i; -+ -+ for (i = 0; i < (len / 2); i++) -+ { -+ a[i] = i; -+ a[len - i - 1] = i; -+ } -+ -+ return; -+} -+ -+#define t_frame_pattern(name, local_size, callee_saved)\ -+int \ -+name (void)\ -+{\ -+ unsigned char a[local_size];\ -+ initialize_array (a, local_size); \ -+ __asm__ ("":::callee_saved); \ -+ if (a[0] != a[local_size - 1] \ -+ || a[0] != 0) \ -+ return 0; \ -+ if (a[local_size / 2 - 1] != a[local_size / 2] \ -+ || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \ -+ return 0; \ -+ return 1; \ -+} -+ -+#define t_frame_pattern_outgoing(name, local_size, callee_saved, out_going_num, ...)\ -+int \ -+name (void)\ -+{\ -+ unsigned char a[local_size];\ -+ initialize_array (a, local_size); \ -+ __asm__ ("":::callee_saved); \ -+ if (a[0] != a[local_size - 1] \ -+ || a[0] != 0) \ -+ return 0; \ -+ if (a[local_size / 2 - 1] != a[local_size / 2] \ -+ || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \ -+ return 0; \ -+ check_args_ ## out_going_num (a[0], a[1], a[2], a[3], a[4], a[5], a[6],\ -+ a[7], __VA_ARGS__); \ -+ return 1; \ -+} -+ -+#define t_frame_run(name) \ -+int \ -+main (int argc, char **argv) \ -+{\ -+ if (!name ())\ -+ abort ();\ -+ return 0;\ -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c -@@ -0,0 +1,76 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX) \ -+int __attribute__ ((noinline)) \ -+test_vst##STRUCT##SUFFIX () \ -+{ \ -+ BASE##_t src[ELTS * STRUCT]; \ -+ BASE##_t dest[ELTS * STRUCT]; \ -+ BASE##x##ELTS##x##STRUCT##_t vectors; \ -+ int i,j; \ -+ for (i = 0; i < STRUCT * ELTS; i++) \ -+ src [i] = (BASE##_t) 2*i + 1; \ -+ for (i = 0; i < STRUCT; i++) \ -+ vectors.val[i] = vld1##SUFFIX (&src[i*ELTS]); \ -+ asm volatile ("" : : : "memory"); \ -+ vst##STRUCT##SUFFIX (dest, vectors); \ -+ asm volatile ("" : : : "memory"); \ -+ for (i = 0; i < STRUCT; i++) \ -+ { \ -+ for (j = 0; j < ELTS; j++) \ -+ if (src[i*ELTS + j] != dest[i + STRUCT*j]) \ -+ return 1; \ -+ } \ -+ return 0; \ -+} -+ -+#define VARIANTS(VARIANT, STRUCT) \ -+VARIANT (uint8, 8, STRUCT, _u8) \ -+VARIANT (uint16, 4, STRUCT, _u16) \ -+VARIANT (uint32, 2, STRUCT, _u32) \ -+VARIANT (uint64, 1, STRUCT, _u64) \ -+VARIANT (int8, 8, STRUCT, _s8) \ -+VARIANT (int16, 4, STRUCT, _s16) \ -+VARIANT (int32, 2, STRUCT, _s32) \ -+VARIANT (int64, 1, STRUCT, _s64) \ -+VARIANT (poly8, 8, STRUCT, _p8) \ -+VARIANT (poly16, 4, STRUCT, _p16) \ -+VARIANT (float32, 2, STRUCT, _f32) \ -+VARIANT (float64, 1, STRUCT, _f64) \ -+VARIANT (uint8, 16, STRUCT, q_u8) \ -+VARIANT (uint16, 8, STRUCT, q_u16) \ -+VARIANT (uint32, 4, STRUCT, q_u32) \ -+VARIANT (uint64, 2, STRUCT, q_u64) \ -+VARIANT (int8, 16, STRUCT, q_s8) \ -+VARIANT (int16, 8, STRUCT, q_s16) \ -+VARIANT (int32, 4, STRUCT, q_s32) \ -+VARIANT (int64, 2, STRUCT, q_s64) \ -+VARIANT (poly8, 16, STRUCT, q_p8) \ -+VARIANT (poly16, 8, STRUCT, q_p16) \ -+VARIANT (float32, 4, STRUCT, q_f32) \ -+VARIANT (float64, 2, STRUCT, q_f64) -+ -+/* Tests of vst2 and vst2q. */ -+VARIANTS (TESTMETH, 2) -+/* Tests of vst3 and vst3q. */ -+VARIANTS (TESTMETH, 3) -+/* Tests of vst4 and vst4q. */ -+VARIANTS (TESTMETH, 4) -+ -+#define CHECK(BASE, ELTS, STRUCT, SUFFIX) \ -+ if (test_vst##STRUCT##SUFFIX () != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ VARIANTS (CHECK, 2) -+ VARIANTS (CHECK, 3) -+ VARIANTS (CHECK, 4) -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c -@@ -8,11 +8,11 @@ - #include "vect-fmaxv-fminv.x" - - #define DEFN_SETV(type) \ -- set_vector_##type (pR##type a, type n) \ -- { \ -- int i; \ -- for (i=0; i<16; i++) \ -- a[i] = n; \ -+ void set_vector_##type (pR##type a, type n) \ -+ { \ -+ int i; \ -+ for (i=0; i<16; i++) \ -+ a[i] = n; \ - } - - #define DEFN_CHECKV(type) \ ---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c -@@ -193,7 +193,6 @@ - return b; - } - /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */ --/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */ - - Int32x1 - test_corners_sisd_si (Int32x1 b) -@@ -207,7 +206,6 @@ - return b; - } - /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */ --/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */ - - - ---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c -@@ -0,0 +1,21 @@ -+/* Test vbslq_f64 can be folded. */ -+/* { dg-do assemble } */ -+/* { dg-options "--save-temps -O3" } */ -+ -+#include <arm_neon.h> -+ -+/* Folds to ret. */ -+ -+float32x4_t -+fold_me (float32x4_t a, float32x4_t b) -+{ -+ uint32x4_t mask = {-1, -1, -1, -1}; -+ return vbslq_f32 (mask, a, b); -+} -+ -+/* { dg-final { scan-assembler-not "bsl\\tv" } } */ -+/* { dg-final { scan-assembler-not "bit\\tv" } } */ -+/* { dg-final { scan-assembler-not "bif\\tv" } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x -@@ -7,7 +7,7 @@ - for (i = 0; i < 8 / sizeof (TYPE); i++) \ - output[i] = *a; \ - } \ -- foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \ -+ void foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \ - { \ - int i; \ - for (i = 0; i < 32 / sizeof (TYPE); i++) \ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -@@ -0,0 +1,21 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * with outgoing. -+ * total frame size > 512. -+ area except outgoing <= 512 -+ * number of callee-saved reg >= 2. -+ * Split stack adjustment into two subtractions. -+ the first subtractions could be optimized into "stp !". */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10]) -+t_frame_run (test10) -+ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c -@@ -0,0 +1,105 @@ -+/* Test vrnd_f64 works correctly. */ -+/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (void); -+ -+/* Bit offset to round mode field in FPCR. */ -+#define RMODE_START 22 -+ -+#define FPROUNDING_ZERO 3 -+ -+/* Set RMODE field of FPCR control register -+ to rounding mode passed. */ -+void __inline __attribute__ ((__always_inline__)) -+set_rounding_mode (uint32_t mode) -+{ -+ uint32_t r; -+ -+ /* Read current FPCR. */ -+ asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :); -+ -+ /* Clear rmode. */ -+ r &= ~(3 << RMODE_START); -+ /* Calculate desired FPCR. */ -+ r |= mode << RMODE_START; -+ -+ /* Write desired FPCR back. */ -+ asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :); -+} -+ -+float64x1_t __attribute__ ((noinline)) -+compare_f64 (float64x1_t passed, float64_t expected) -+{ -+ return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected) -+ > __DBL_EPSILON__); -+} -+ -+void __attribute__ ((noinline)) -+run_round_tests (float64x1_t *tests, -+ float64_t expectations[][6]) -+{ -+ int i; -+ -+ for (i = 0; i < 6; i++) -+ { -+ if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i])) -+ abort (); -+ if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i])) -+ abort (); -+ if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i])) -+ abort (); -+ if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i])) -+ abort (); -+ if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i])) -+ abort (); -+ if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i])) -+ abort (); -+ if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i])) -+ abort (); -+ } -+} -+ -+int -+main (int argc, char **argv) -+{ -+ float64x1_t tests[6] = -+ { -+ vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */ -+ vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */ -+ vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */ -+ vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */ -+ vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */ -+ vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */ -+ }; -+ -+ float64_t expectations[7][6] = -+ { -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */ -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */ -+ { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */ -+ { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */ -+ { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */ -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */ -+ { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */ -+ }; -+ -+ /* Set floating point control register -+ to have predictable vrndx and vrndi behaviour. */ -+ set_rounding_mode (FPROUNDING_ZERO); -+ -+ run_round_tests (tests, expectations); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c -@@ -305,13 +305,28 @@ - return res; - } - --/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */ -+/* { dg-final { scan-assembler-times "\\tfaddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */ - -+float64_t -+test_vpaddd_f64 (float64x2_t a) -+{ -+ return vpaddd_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 2 } } */ -+ -+int64_t - test_vpaddd_s64 (int64x2_t a) - { - return vpaddd_s64 (a); - } - -+uint64_t -+test_vpaddd_u64 (uint64x2_t a) -+{ -+ return vpaddd_u64 (a); -+} -+ - /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */ - - uint64x1_t ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -@@ -0,0 +1,20 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * without outgoing. -+ * total frame size > 512. -+ * number of callee-saved reg == 2. -+ * split stack adjustment into two subtractions. -+ the second subtraction should use "stp !". */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test7, 700, "x19") -+t_frame_run (test7) -+ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c -@@ -34,6 +34,9 @@ - values []; - }; - extern const struct locale_data _nl_C_LC_TIME __attribute__ ((visibility ("hidden"))); -+extern void *memset (void *s, int c, size_t n); -+extern size_t strlen (const char *s); -+extern int __strncasecmp_l (const char *s1, const char *s2, size_t n, __locale_t locale); - char * - __strptime_internal (rp, fmt, tmp, statep , locale) - const char *rp; -@@ -40,6 +43,7 @@ - const char *fmt; - __locale_t locale; - void *statep; -+ int tmp; - { - struct locale_data *const current = locale->__locales[__LC_TIME]; - const char *rp_backup; -@@ -124,5 +128,9 @@ - } - char * - __strptime_l (buf, format, tm , locale) -+ int buf; -+ int format; -+ int tm; -+ int locale; - { - } ---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c -@@ -0,0 +1,24 @@ -+/* Test vbslq_f64 can be folded. */ -+/* { dg-do assemble } */ -+/* { dg-options "--save-temps -O3" } */ -+ -+#include <arm_neon.h> -+ -+/* Should fold out one half of the BSL, leaving just a BIC. */ -+ -+float32x4_t -+half_fold_me (uint32x4_t mask) -+{ -+ float32x4_t a = {0.0, 0.0, 0.0, 0.0}; -+ float32x4_t b = {2.0, 4.0, 8.0, 16.0}; -+ return vbslq_f32 (mask, a, b); -+ -+} -+ -+/* { dg-final { scan-assembler-not "bsl\\tv" } } */ -+/* { dg-final { scan-assembler-not "bit\\tv" } } */ -+/* { dg-final { scan-assembler-not "bif\\tv" } } */ -+/* { dg-final { scan-assembler "bic\\tv" } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c -@@ -0,0 +1,16 @@ -+/* Verify: -+ * without outgoing. -+ * total frame size <= 512. -+ * number of callee-save reg >= 2. -+ * optimized code should use "stp !" for stack adjustment. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern (test11, 400, ) -+t_frame_run (test11) -+ -+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c -@@ -0,0 +1,47 @@ -+/* Test vqneg_s64 intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+int __attribute__ ((noinline)) -+test_vqneg_s64 (int64x1_t passed, int64_t expected) -+{ -+ return vget_lane_s64 (vqneg_s64 (passed), 0) != expected; -+} -+ -+int __attribute__ ((noinline)) -+test_vqnegd_s64 (int64_t passed, int64_t expected) -+{ -+ return vqnegd_s64 (passed) != expected; -+} -+ -+/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */ -+ -+int -+main (int argc, char **argv) -+{ -+ /* Basic test. */ -+ if (test_vqneg_s64 (vcreate_s64 (-1), 1)) -+ abort (); -+ if (test_vqnegd_s64 (-1, 1)) -+ abort (); -+ -+ /* Negating max int64_t. */ -+ if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001)) -+ abort (); -+ if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001)) -+ abort (); -+ -+ /* Negating min int64_t. -+ Note, exact negation cannot be represented as int64_t. */ -+ if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff)) -+ abort (); -+ -+ return 0; -+} -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c -@@ -0,0 +1,60 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -std=c99" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define VARIANTS(VARIANT) \ -+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \ -+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \ -+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \ -+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \ -+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \ -+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \ -+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \ -+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \ -+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \ -+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64) -+ -+ -+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ -+int \ -+test_vget_low_ ##SUFFIX (BASETYPE *data) \ -+{ \ -+ BASETYPE temp [NUM64]; \ -+ TYPE128 vec = vld1q_##SUFFIX (data); \ -+ TYPE64 low = vget_low_##SUFFIX (vec); \ -+ vst1_##SUFFIX (temp, low); \ -+ for (int i = 0; i < NUM64; i++) \ -+ if (temp[i] != data[i]) \ -+ return 1; \ -+ return 0; \ -+} -+ -+VARIANTS (TESTMETH) -+ -+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \ -+ if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ uint8_t uint8_t_data[16] = -+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 }; -+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 }; -+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 }; -+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL }; -+ int8_t int8_t_data[16] = -+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 }; -+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000}; -+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 }; -+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; -+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; -+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 }; -+ -+ VARIANTS (CHECK); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -@@ -0,0 +1,18 @@ -+/* Verify: -+ * -fomit-frame-pointer. -+ * with outgoing. -+ * total frame size bigger than 512. -+ * number of callee-saved reg == 1. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test8, 700, , 8, a[8]) -+t_frame_run (test8) -+ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c -@@ -0,0 +1,85 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -fno-inline" } */ -+ -+#include <arm_neon.h> -+ -+extern void abort (void); -+ -+#define VARIANTS(VARIANT) \ -+VARIANT (uint8_t, , 8, uint8x8_t, _u8, 5) \ -+VARIANT (uint16_t, , 4, uint16x4_t, _u16, 3) \ -+VARIANT (uint32_t, , 2, uint32x2_t, _u32, 1) \ -+VARIANT (uint64_t, , 1, uint64x1_t, _u64, 0) \ -+VARIANT (int8_t, , 8, int8x8_t, _s8, 6) \ -+VARIANT (int16_t, , 4, int16x4_t, _s16, 2) \ -+VARIANT (int32_t, , 2, int32x2_t, _s32, 0) \ -+VARIANT (int64_t, , 1, int64x1_t, _s64, 0) \ -+VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6) \ -+VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2) \ -+VARIANT (float32_t, , 2, float32x2_t, _f32, 1) \ -+VARIANT (float64_t, , 1, float64x1_t, _f64, 0) \ -+VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11) \ -+VARIANT (uint16_t, q, 8, uint16x8_t, _u16, 7) \ -+VARIANT (uint32_t, q, 4, uint32x4_t, _u32, 2) \ -+VARIANT (uint64_t, q, 2, uint64x2_t, _u64, 1) \ -+VARIANT (int8_t, q, 16, int8x16_t, _s8, 13) \ -+VARIANT (int16_t, q, 8, int16x8_t, _s16, 5) \ -+VARIANT (int32_t, q, 4, int32x4_t, _s32, 3) \ -+VARIANT (int64_t, q, 2, int64x2_t, _s64, 0) \ -+VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14) \ -+VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6) \ -+VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \ -+VARIANT (float64_t, q, 2, float64x2_t, _f64, 1) -+ -+#define TESTMETH(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX) \ -+int \ -+test_vset_lane ##Q##SUFFIX (BASETYPE *data) \ -+{ \ -+ BASETYPE temp [NUM]; \ -+ TYPE vec = vld1##Q##SUFFIX (data); \ -+ TYPE vec2; \ -+ BASETYPE changed = data[INDEX] - INDEX; \ -+ int check; \ -+ vec = vset##Q##_lane##SUFFIX (changed, vec, INDEX); \ -+ asm volatile ("orr %0.16b, %1.16b, %1.16b" \ -+ : "=w"(vec2) : "w" (vec) : ); \ -+ vst1##Q##SUFFIX (temp, vec2); \ -+ for (check = 0; check < NUM; check++) \ -+ { \ -+ BASETYPE desired = data[check]; \ -+ if (check==INDEX) desired = changed; \ -+ if (temp[check] != desired) \ -+ return 1; \ -+ } \ -+ return 0; \ -+} -+ -+VARIANTS (TESTMETH) -+ -+#define CHECK(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX) \ -+ if (test_vset_lane##Q##SUFFIX (BASETYPE ## _ ## data) != 0) \ -+ abort (); -+ -+int -+main (int argc, char **argv) -+{ -+ uint8_t uint8_t_data[16] = -+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 }; -+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 }; -+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 }; -+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL }; -+ int8_t int8_t_data[16] = -+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 }; -+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000}; -+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 }; -+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL }; -+ poly8_t poly8_t_data[16] = -+ { 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 }; -+ poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 }; -+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 }; -+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 }; -+ -+ VARIANTS (CHECK); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c -@@ -0,0 +1,19 @@ -+/* Verify: -+ * with outgoing. -+ * total frame size <= 512. -+ * number of callee-save reg >= 2. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+#include "test_frame_common.h" -+ -+t_frame_pattern_outgoing (test12, 400, , 8, a[8]) -+t_frame_run (test12) -+ -+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ -+ -+/* Check epilogue using write-back. */ -+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/lib/gcc.exp -+++ b/src/gcc/testsuite/lib/gcc.exp -@@ -126,7 +126,9 @@ - global GCC_UNDER_TEST - global TOOL_OPTIONS - global TEST_ALWAYS_FLAGS -- -+ global flags_to_postpone -+ global board_info -+ - if {[target_info needs_status_wrapper] != "" && \ - [target_info needs_status_wrapper] != "0" && \ - [info exists gluefile] } { -@@ -162,8 +164,26 @@ - set options [concat "{additional_flags=$TOOL_OPTIONS}" $options] - } - -+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is -+ # appended here to multilib_flags as it can be overridden by the latter -+ # if it was added earlier. After the target_compile, multilib_flags is -+ # restored to its orignal content. -+ set tboard [target_info name] -+ if {[board_info $tboard exists multilib_flags]} { -+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]" -+ append board_info($tboard,multilib_flags) " $flags_to_postpone" -+ } -+ - lappend options "timeout=[timeout_value]" - lappend options "compiler=$GCC_UNDER_TEST" - set options [dg-additional-files-options $options $source] -- return [target_compile $source $dest $type $options] -+ set return_val [target_compile $source $dest $type $options] -+ -+ if {[board_info $tboard exists multilib_flags]} { -+ set board_info($tboard,multilib_flags) $orig_multilib_flags -+ set flags_to_postpone "" -+ } -+ -+ return $return_val - } -+ ---- a/src/gcc/testsuite/lib/g++.exp -+++ b/src/gcc/testsuite/lib/g++.exp -@@ -288,6 +288,8 @@ - global gluefile wrap_flags - global ALWAYS_CXXFLAGS - global GXX_UNDER_TEST -+ global flags_to_postpone -+ global board_info - - if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } { - lappend options "libs=${gluefile}" -@@ -313,10 +315,25 @@ - exec rm -f $rponame - } - -+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is -+ # appended here to multilib_flags as it can be overridden by the latter -+ # if it was added earlier. After the target_compile, multilib_flags is -+ # restored to its orignal content. -+ set tboard [target_info name] -+ if {[board_info $tboard exists multilib_flags]} { -+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]" -+ append board_info($tboard,multilib_flags) " $flags_to_postpone" -+ } -+ - set options [dg-additional-files-options $options $source] - - set result [target_compile $source $dest $type $options] - -+ if {[board_info $tboard exists multilib_flags]} { -+ set board_info($tboard,multilib_flags) $orig_multilib_flags -+ set flags_to_postpone "" -+ } -+ - return $result - } - ---- a/src/gcc/testsuite/lib/wrapper.exp -+++ b/src/gcc/testsuite/lib/wrapper.exp -@@ -34,9 +34,11 @@ - # became true for dejagnu-1.4.4. The set of warnings and code - # that gcc objects on may change, so just make sure -w is always - # passed to turn off all warnings. -+ unset_currtarget_info wrap_compile_flags - set_currtarget_info wrap_compile_flags \ - "$saved_wrap_compile_flags -w $flags" - set result [build_wrapper $filename] -+ unset_currtarget_info wrap_compile_flags - set_currtarget_info wrap_compile_flags "$saved_wrap_compile_flags" - if { $result != "" } { - set gluefile [lindex $result 0] ---- a/src/gcc/testsuite/lib/compat.exp -+++ b/src/gcc/testsuite/lib/compat.exp -@@ -134,7 +134,6 @@ - "$options"] - if ![${tool}_check_compile "$testcase $testname link" "" \ - $dest $comp_output] then { -- unresolved "$testcase $testname execute $optstr" - return - } - ---- a/src/gcc/testsuite/lib/gcc-defs.exp -+++ b/src/gcc/testsuite/lib/gcc-defs.exp -@@ -54,14 +54,19 @@ - if { [info proc ${tool}-dg-prune] != "" } { - global target_triplet - set gcc_output [${tool}-dg-prune $target_triplet $gcc_output] -+ if [string match "*::unsupported::*" $gcc_output] then { -+ regsub -- "::unsupported::" $gcc_output "" gcc_output -+ unsupported "$testcase: $gcc_output" -+ return 0 -+ } -+ } else { -+ set unsupported_message [${tool}_check_unsupported_p $gcc_output] -+ if { $unsupported_message != "" } { -+ unsupported "$testcase: $unsupported_message" -+ return 0 -+ } - } - -- set unsupported_message [${tool}_check_unsupported_p $gcc_output] -- if { $unsupported_message != "" } { -- unsupported "$testcase: $unsupported_message" -- return 0 -- } -- - # remove any leftover LF/CR to make sure any output is legit - regsub -all -- "\[\r\n\]*" $gcc_output "" gcc_output - ---- a/src/gcc/testsuite/lib/gfortran.exp -+++ b/src/gcc/testsuite/lib/gfortran.exp -@@ -234,6 +234,8 @@ - global gluefile wrap_flags - global ALWAYS_GFORTRANFLAGS - global GFORTRAN_UNDER_TEST -+ global flags_to_postpone -+ global board_info - - if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } { - lappend options "libs=${gluefile}" -@@ -240,10 +242,27 @@ - lappend options "ldflags=${wrap_flags}" - } - -+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is -+ # appended here to multilib_flags as it can be overridden by the latter -+ # if it was added earlier. After the target_compile, multilib_flags is -+ # restored to its orignal content. -+ set tboard [target_info name] -+ if {[board_info $tboard exists multilib_flags]} { -+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]" -+ append board_info($tboard,multilib_flags) " $flags_to_postpone" -+ } -+ - lappend options "compiler=$GFORTRAN_UNDER_TEST" - lappend options "timeout=[timeout_value]" - - set options [concat "$ALWAYS_GFORTRANFLAGS" $options] - set options [dg-additional-files-options $options $source] -- return [target_compile $source $dest $type $options] -+ set return_val [target_compile $source $dest $type $options] -+ -+ if {[board_info $tboard exists multilib_flags]} { -+ set board_info($tboard,multilib_flags) $orig_multilib_flags -+ set flags_to_postpone "" -+ } -+ -+ return $return_val - } ---- a/src/gcc/testsuite/lib/target-supports.exp -+++ b/src/gcc/testsuite/lib/target-supports.exp -@@ -2261,7 +2261,7 @@ - }] - } - --# Return 1 is this is an arm target using 32-bit instructions -+# Return 1 if this is an arm target using 32-bit instructions - proc check_effective_target_arm32 { } { - return [check_no_compiler_messages arm32 assembly { - #if !defined(__arm__) || (defined(__thumb__) && !defined(__thumb2__)) -@@ -2270,10 +2270,10 @@ - }] - } - --# Return 1 is this is an arm target not using Thumb -+# Return 1 if this is an arm target not using Thumb - proc check_effective_target_arm_nothumb { } { - return [check_no_compiler_messages arm_nothumb assembly { -- #if (defined(__thumb__) || defined(__thumb2__)) -+ #if !defined(__arm__) || (defined(__thumb__) || defined(__thumb2__)) - #error FOO - #endif - }] -@@ -2394,6 +2394,7 @@ - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=crypto-neon-fp-armv8" "-mfpu=crypto-neon-fp-armv8 -mfloat-abi=softfp"} { - if { [check_no_compiler_messages_nocache arm_crypto_ok object { - #include "arm_neon.h" -+ extern uint8x16_t vaeseq_u8 (uint8x16_t, uint8x16_t); - uint8x16_t - foo (uint8x16_t a, uint8x16_t b) - { -@@ -2538,6 +2539,7 @@ - "-mfpu=neon-fp16 -mfloat-abi=softfp"} { - if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object { - #include "arm_neon.h" -+ extern float16x4_t vcvt_f16_f32 (float32x4_t); - float16x4_t - foo (float32x4_t arg) - { -@@ -2613,6 +2615,7 @@ - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} { - if { [check_no_compiler_messages_nocache arm_neonv2_ok object { - #include "arm_neon.h" -+ extern float32x2_t vfma_f32 (float32x2_t, float32x2_t, float32x2_t); - float32x2_t - foo (float32x2_t a, float32x2_t b, float32x2_t c) - { -@@ -3324,6 +3327,43 @@ - return $et_vect_shift_saved - } - -+proc check_effective_target_whole_vector_shift { } { -+ if { [istarget x86_64-*-*] -+ || [istarget ia64-*-*] -+ || ([check_effective_target_arm32] -+ && [check_effective_target_arm_little_endian]) -+ || ([istarget mips*-*-*] -+ && [check_effective_target_mips_loongson]) } { -+ set answer 1 -+ } else { -+ set answer 0 -+ } -+ -+ verbose "check_effective_target_vect_long: returning $answer" 2 -+ return $answer -+} -+ -+# Return 1 if the target supports vector bswap operations. -+ -+proc check_effective_target_vect_bswap { } { -+ global et_vect_bswap_saved -+ -+ if [info exists et_vect_bswap_saved] { -+ verbose "check_effective_target_vect_bswap: using cached result" 2 -+ } else { -+ set et_vect_bswap_saved 0 -+ if { [istarget aarch64*-*-*] -+ || ([istarget arm*-*-*] -+ && [check_effective_target_arm_neon]) -+ } { -+ set et_vect_bswap_saved 1 -+ } -+ } -+ -+ verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 -+ return $et_vect_bswap_saved -+} -+ - # Return 1 if the target supports hardware vector shift operation for char. - - proc check_effective_target_vect_shift_char { } { -@@ -3522,8 +3562,7 @@ - } else { - set et_vect_perm_saved 0 - if { [is-effective-target arm_neon_ok] -- || ([istarget aarch64*-*-*] -- && [is-effective-target aarch64_little_endian]) -+ || [istarget aarch64*-*-*] - || [istarget powerpc*-*-*] - || [istarget spu-*-*] - || [istarget i?86-*-*] -@@ -5206,16 +5245,26 @@ - return $flags - } - -+if {![info exists flags_to_postpone]} { -+ set flags_to_postpone "" -+} -+ - # Add to FLAGS the flags needed to enable functions to bind locally - # when using pic/PIC passes in the testsuite. -+proc add_options_for_bind_pic_locally { flags } { -+ global flags_to_postpone - --proc add_options_for_bind_pic_locally { flags } { -+ # Instead of returning 'flags' with the -fPIE or -fpie appended, we save it -+ # in 'flags_to_postpone' and append it later in gcc_target_compile procedure in -+ # order to make sure that the multilib_flags doesn't override this. -+ - if {[check_no_compiler_messages using_pic2 assembly { - #if __PIC__ != 2 - #error FOO - #endif - }]} { -- return "$flags -fPIE" -+ set flags_to_postpone "-fPIE" -+ return $flags - } - if {[check_no_compiler_messages using_pic1 assembly { - #if __PIC__ != 1 -@@ -5222,9 +5271,9 @@ - #error FOO - #endif - }]} { -- return "$flags -fpie" -+ set flags_to_postpone "-fpie" -+ return $flags - } -- - return $flags - } - ---- a/src/gcc/testsuite/ChangeLog.linaro -+++ b/src/gcc/testsuite/ChangeLog.linaro -@@ -0,0 +1,1031 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2015-01-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r218451. -+ 2014-12-06 James Greenhalgh <james.greenhalgh@arm.com> -+ Sebastian Pop <s.pop@samsung.com> -+ Brian Rzycki <b.rzycki@samsung.com> -+ -+ PR tree-optimization/54742 -+ * gcc.dg/tree-ssa/ssa-dom-thread-6.c: New test. -+ * gcc.dg/tree-ssa/ssa-dom-thread-7.c: New test. -+ -+2015-01-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211075. -+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ gcc.target/arm/simd/vrev16p8_1.c: New file. -+ gcc.target/arm/simd/vrev16qp8_1.c: New file. -+ gcc.target/arm/simd/vrev16qs8_1.c: New file. -+ gcc.target/arm/simd/vrev16qu8_1.c: New file. -+ gcc.target/arm/simd/vrev16s8_1.c: New file. -+ gcc.target/arm/simd/vrev16u8_1.c: New file. -+ gcc.target/arm/simd/vrev32p16_1.c: New file. -+ gcc.target/arm/simd/vrev32p8_1.c: New file. -+ gcc.target/arm/simd/vrev32qp16_1.c: New file. -+ gcc.target/arm/simd/vrev32qp8_1.c: New file. -+ gcc.target/arm/simd/vrev32qs16_1.c: New file. -+ gcc.target/arm/simd/vrev32qs8_1.c: New file. -+ gcc.target/arm/simd/vrev32qu16_1.c: New file. -+ gcc.target/arm/simd/vrev32qu8_1.c: New file. -+ gcc.target/arm/simd/vrev32s16_1.c: New file. -+ gcc.target/arm/simd/vrev32s8_1.c: New file. -+ gcc.target/arm/simd/vrev32u16_1.c: New file. -+ gcc.target/arm/simd/vrev32u8_1.c: New file. -+ gcc.target/arm/simd/vrev64f32_1.c: New file. -+ gcc.target/arm/simd/vrev64p16_1.c: New file. -+ gcc.target/arm/simd/vrev64p8_1.c: New file. -+ gcc.target/arm/simd/vrev64qf32_1.c: New file. -+ gcc.target/arm/simd/vrev64qp16_1.c: New file. -+ gcc.target/arm/simd/vrev64qp8_1.c: New file. -+ gcc.target/arm/simd/vrev64qs16_1.c: New file. -+ gcc.target/arm/simd/vrev64qs32_1.c: New file. -+ gcc.target/arm/simd/vrev64qs8_1.c: New file. -+ gcc.target/arm/simd/vrev64qu16_1.c: New file. -+ gcc.target/arm/simd/vrev64qu32_1.c: New file. -+ gcc.target/arm/simd/vrev64qu8_1.c: New file. -+ gcc.target/arm/simd/vrev64s16_1.c: New file. -+ gcc.target/arm/simd/vrev64s32_1.c: New file. -+ gcc.target/arm/simd/vrev64s8_1.c: New file. -+ gcc.target/arm/simd/vrev64u16_1.c: New file. -+ gcc.target/arm/simd/vrev64u32_1.c: New file. -+ gcc.target/arm/simd/vrev64u8_1.c: New file. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209620. -+ 2014-04-22 Vidya Praveen <vidyapraveen@arm.com> -+ -+ * gcc.target/aarch64/cvtf_1.c: New. -+ -+2015-01-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217362. -+ 2014-11-11 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * gcc.target/aarch64/vbslq_f64_1.c: New. -+ * gcc.target/aarch64/vbslq_f64_2.c: Likewise. -+ * gcc.target/aarch64/vbslq_u64_1.c: Likewise. -+ * gcc.target/aarch64/vbslq_u64_2.c: Likewise. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r217742. -+ 2014-11-18 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ PR target/63937 -+ * gcc.dg/memset-2.c: New. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216638. -+ 2014-10-24 Christophe Lyon <christophe.lyon@linaro.org> -+ -+ * lib/wrapper.exp ({tool}_maybe_build_wrapper): Clear -+ wrap_compile_flags before setting it. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216544. -+ 2014-10-22 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/pic-constantpool1.c: Add explicit declaration. -+ * gcc.target/aarch64/pic-symrefplus.c: Likewise. -+ * gcc.target/aarch64/reload-valid-spoff.c: Likewise. -+ * gcc.target/aarch64/vect.x: Likewise. -+ * gcc.target/aarch64/vect-ld1r.x: Add return type. -+ * gcc.target/aarch64/vect-fmax-fmin.c: Likewise. -+ * gcc.target/aarch64/vect-fp.c: Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216543. -+ 2014-10-22 Jiong Wang <jiong.wang@arm.com> -+ -+ * lib/compat.exp (compat-run): Remove "unresolved". -+ * lib/gcc-defs.exp (${tools}_check_compile): Update code logic for -+ unsupported testcase. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r216517. -+ 2014-10-21 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/arm/20031108-1.c (Proc_7): Add explicit declaration. -+ (Proc_1): Add return type. -+ * gcc.target/arm/cold-lc.c (show_stack): Add explict declaration. -+ * gcc.target/arm/neon-modes-2.c (foo): Likewise. -+ * gcc.target/arm/pr43920-2.c (lseek): Likewise. -+ * gcc.target/arm/pr44788.c (foo): Likewise. -+ * gcc.target/arm/pr55642.c (abs): Likewise. -+ * gcc.target/arm/pr58784.c (f): Likewise. -+ * gcc.target/arm/pr60650.c (foo1, foo2): Likewise. -+ * gcc.target/arm/vfp-ldmdbs.c (bar): Likewise. -+ * gcc.target/arm/vfp-ldmias.c (bar): Likewise. -+ * gcc.target/arm/pr60650-2.c (fn1, fn2): Add return type and add type -+ for local variables. -+ * lib/target-supports.exp -+ (check_effective_target_arm_crypto_ok_nocache): Add declaration for -+ vaeseq_u8. -+ (check_effective_target_arm_neon_fp16_ok_nocache): Add declaration for -+ vcvt_f16_f32. -+ (check_effective_target_arm_neonv2_ok_nocache): Add declaration for -+ vfma_f32. -+ * gcc.target/arm/pr51968.c: Add -Wno-implicit-function-declaration. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215071. -+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/int_comparisons_1.c: Tighten regexp. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215540. -+ 2014-09-24 Zhenqiang Chen <zhenqiang.chen@arm.com> -+ -+ * gcc.target/arm/pr63210.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215475. -+ 2014-09-22 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.dg/vect/vect-reduc-or_1.c: New test. -+ * gcc.dg/vect/vect-reduc-or_2.c: Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215473. -+ 2014-09-22 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * lib/target-supports.exp (check_effective_target_whole_vector_shift): -+ New. -+ -+ * gcc.dg/vect/vect-reduc-mul_1.c: New test. -+ * gcc.dg/vect/vect-reduc-mul_2.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215177. -+ 2014-09-11 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vset_lane_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215129. -+ 2014-09-10 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vstN_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215126. -+ 2014-09-10 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vldN_lane_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215078. -+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vldN_dup_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215077. -+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vld1-vst1_1.c: Rewrite to test all variants. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215072. -+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vldN_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215047. -+ 2014-09-09 Tony Wang <tony.wang@arm.com> -+ -+ * gcc.target/arm/xordi3-opt.c: Disable this -+ test case for thumb1 target. -+ * gcc.target/arm/iordi3-opt.c: Ditto. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215046. -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/61749 -+ * gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c: New test. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214950. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vget_high_1.c: New test. -+ * gcc.target/aarch64/vget_low_1.c: Likewise. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214948. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/int_comparisons.x: New file. -+ * gcc.target/aarch64/simd/int_comparisons_1.c: New test. -+ * gcc.target/aarch64/simd/int_comparisons_2.c: Ditto. -+ -+2014-12-04 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213382. -+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New. -+ (test_vpaddd_s64): Likewise. -+ (test_vpaddd_s64): Likewise. -+ * gcc.target/aarch64/simd/vpaddd_f64: New. -+ * gcc.target/aarch64/simd/vpaddd_s64: New. -+ * gcc.target/aarch64/simd/vpaddd_u64: New. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-10-08 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214825, r214826, r215085. -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/vect-lceilf_1.c: Make input and output arrays global -+ and 16-byte aligned. -+ * gcc.target/arm/vect-lfloorf_1.c: Likewise. -+ * gcc.target/arm/vect-lroundf_1.c: Likewise. -+ * gcc.target/arm/vect-rounding-btruncf.c: Likewise. -+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise. -+ * gcc.target/arm/vect-rounding-floorf.c: Likewise. -+ * gcc.target/arm/vect-rounding-roundf.c: Likewise. -+ -+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/62275 -+ * gcc.target/arm/vect-lceilf_1.c: New test. -+ * gcc.target/arm/vect-lfloorf_1.c: Likewise. -+ * gcc.target/arm/vect-lroundf_1.c: Likewise. -+ -+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/62275 -+ * gcc.target/arm/lceil-vcvt_1.c: New test. -+ * gcc.target/arm/lfloor-vcvt_1.c: Likewise. -+ * gcc.target/arm/lround-vcvt_1.c: Likewise. -+ -+2014-10-06 Venkataramanan Kumar <venkataramanan.kumar@linaro.org> -+ -+ Backport from trunk r214943. -+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/vrbit_1.c: New test. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215385. -+ 2014-09-19 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * gcc.dg/ssp-3.c: New. -+ * gcc.dg/ssp-4.c: Likewise. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215136. -+ 2014-09-10 Xinliang David Li <davidxl@google.com> -+ -+ PR target/63209 -+ * gcc.c-torture/execute/pr63209.c: New test. -+ -+2014-10-06 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215067. -+ 2014-09-09 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/arm/vect-copysignf.c: New testcase. -+ -+2014-10-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r215050, r215051, r215052, r215053, r215054. -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/vfp-1.c: Updated expected assembly. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/vfp-1.c: Updated expected assembly. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/vfp-1.c: Updated expected assembly. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/vfp-1.c: Updated expected assembly. -+ -+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/pr51835.c: Update expected assembly. -+ * gcc.target/arm/vfp-1.c: Likewise. -+ * gcc.target/arm/vfp-ldmdbd.c: Likewise. -+ * gcc.target/arm/vfp-ldmdbs.c: Likewise. -+ * gcc.target/arm/vfp-ldmiad.c: Likewise. -+ * gcc.target/arm/vfp-ldmias.c: Likewise. -+ * gcc.target/arm/vfp-stmdbd.c: Likewise. -+ * gcc.target/arm/vfp-stmdbs.c: Likewise. -+ * gcc.target/arm/vfp-stmiad.c: Likewise. -+ * gcc.target/arm/vfp-stmias.c: Likewise. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r214526. -+ 2014-08-26 Joseph Myers <joseph@codesourcery.com> -+ -+ PR target/60606 -+ PR target/61330 -+ * gcc.dg/torture/pr60606-1.c, gcc.target/arm/pr60606-2.c, -+ gcc.target/arm/pr60606-3.c, gcc.target/arm/pr60606-4.c: New tests. -+ -+2014-09-03 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213659. -+ 2014-08-06 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vdup_n_2.c: New test. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213701. -+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.dg/pr61756.c: Remove arm-specific dg-options. -+ -+2014-08-26 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213488, r213489. -+ 2014-08-01 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/legitimize_stack_var_before_reload_1.c: New -+ testcase. -+ -+2014-08-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212927. -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.dg/ira-shrinkwrap-prep-1.c (target): Add arm_nothumb. -+ * gcc.dg/ira-shrinkwrap-prep-2.c (target): Likewise. -+ * gcc.dg/pr10474.c (target): Likewise. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213555. -+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR target/61713 -+ * gcc.dg/pr61756.c: New test. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r213376. -+ 2014-07-31 Charles Baylis <charles.baylis@linaro.org> -+ -+ PR target/61948 -+ * gcc.target/arm/pr61948.c: New test case. -+ -+2014-08-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212959, r212976, r212999, r213000. -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/test_frame_1.c: Match optimized instruction -+ sequences. -+ * gcc.target/aarch64/test_frame_2.c: Likewise. -+ * gcc.target/aarch64/test_frame_4.c: Likewise. -+ * gcc.target/aarch64/test_frame_6.c: Likewise. -+ * gcc.target/aarch64/test_frame_7.c: Likewise. -+ * gcc.target/aarch64/test_frame_8.c: Likewise. -+ * gcc.target/aarch64/test_frame_10.c: Likewise. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/test_frame_1.c: Match optimized instruction -+ sequences. -+ * gcc.target/aarch64/test_frame_10.c: Likewise. -+ * gcc.target/aarch64/test_frame_2.c: Likewise. -+ * gcc.target/aarch64/test_frame_4.c: Likewise. -+ * gcc.target/aarch64/test_frame_6.c: Likewise. -+ * gcc.target/aarch64/test_frame_7.c: Likewise. -+ * gcc.target/aarch64/test_frame_8.c: Likewise. -+ * gcc.target/aarch64/test_fp_attribute_1.c: Likewise. -+ -+ 2014-07-24 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/test_frame_12.c: Match optimized instruction -+ sequences. -+ -+ 2014-07-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/test_frame_common.h: New file. -+ * gcc.target/aarch64/test_frame_1.c: Likewise. -+ * gcc.target/aarch64/test_frame_2.c: Likewise. -+ * gcc.target/aarch64/test_frame_3.c: Likewise. -+ * gcc.target/aarch64/test_frame_4.c: Likewise. -+ * gcc.target/aarch64/test_frame_5.c: Likewise. -+ * gcc.target/aarch64/test_frame_6.c: Likewise. -+ * gcc.target/aarch64/test_frame_7.c: Likewise. -+ * gcc.target/aarch64/test_frame_8.c: Likewise. -+ * gcc.target/aarch64/test_frame_9.c: Likewise. -+ * gcc.target/aarch64/test_frame_10.c: Likewise. -+ * gcc.target/aarch64/test_frame_11.c: Likewise. -+ * gcc.target/aarch64/test_frame_12.c: Likewise. -+ * gcc.target/aarch64/test_frame_13.c: Likewise. -+ * gcc.target/aarch64/test_frame_14.c: Likewise. -+ * gcc.target/aarch64/test_frame_15.c: Likewise. -+ -+2014-08-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r212023, r212024. -+ 2014-06-26 Vidya Praveen <vidyapraveen@arm.com> -+ -+ * gcc.dg/inline-22.c: Add bind_pic_locally. -+ * gcc.dg/inline_4.c: Ditto. -+ * gcc.dg/fail_always_inline.c: Ditto. -+ * g++.dg/ipa/devirt-25.C: Ditto. -+ -+ 2014-06-26 Vidya Praveen <vidyapraveen@arm.com> -+ -+ * lib/target-support.exp (bind_pic_locally): Save the flags to -+ 'flags_to_postpone' instead of appending to 'flags'. -+ * lib/gcc.exp (gcc_target_compile): Append board_info's multilib_flags -+ with flags_to_postpone and revert after target_compile. -+ * lib/g++.exp (g++_target_compile): Ditto. -+ * lib/gfortran.exp (gfortran_target_compile): Ditto. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211887. -+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211441. -+ 2014-06-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/aarch64/acle/acle.exp: New. -+ * gcc.target/aarch64/acle/crc32b.c: New test. -+ * gcc.target/aarch64/acle/crc32cb.c: Likewise. -+ * gcc.target/aarch64/acle/crc32cd.c: Likewise. -+ * gcc.target/aarch64/acle/crc32ch.c: Likewise. -+ * gcc.target/aarch64/acle/crc32cw.c: Likewise. -+ * gcc.target/aarch64/acle/crc32d.c: Likewise. -+ * gcc.target/aarch64/acle/crc32h.c: Likewise. -+ * gcc.target/aarch64/acle/crc32w.c: Likewise. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210153. -+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/vrev16p8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16p8.x: New file. -+ * gcc.target/aarch64/simd/vrev16qp8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16qp8.x: New file. -+ * gcc.target/aarch64/simd/vrev16qs8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16qs8.x: New file. -+ * gcc.target/aarch64/simd/vrev16qu8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16qu8.x: New file. -+ * gcc.target/aarch64/simd/vrev16s8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16s8.x: New file. -+ * gcc.target/aarch64/simd/vrev16u8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev16u8.x: New file. -+ * gcc.target/aarch64/simd/vrev32p16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32p16.x: New file. -+ * gcc.target/aarch64/simd/vrev32p8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32p8.x: New file. -+ * gcc.target/aarch64/simd/vrev32qp16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qp16.x: New file. -+ * gcc.target/aarch64/simd/vrev32qp8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qp8.x: New file. -+ * gcc.target/aarch64/simd/vrev32qs16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qs16.x: New file. -+ * gcc.target/aarch64/simd/vrev32qs8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qs8.x: New file. -+ * gcc.target/aarch64/simd/vrev32qu16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qu16.x: New file. -+ * gcc.target/aarch64/simd/vrev32qu8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32qu8.x: New file. -+ * gcc.target/aarch64/simd/vrev32s16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32s16.x: New file. -+ * gcc.target/aarch64/simd/vrev32s8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32s8.x: New file. -+ * gcc.target/aarch64/simd/vrev32u16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32u16.x: New file. -+ * gcc.target/aarch64/simd/vrev32u8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev32u8.x: New file. -+ * gcc.target/aarch64/simd/vrev64f32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64f32.x: New file. -+ * gcc.target/aarch64/simd/vrev64p16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64p16.x: New file. -+ * gcc.target/aarch64/simd/vrev64p8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64p8.x: New file. -+ * gcc.target/aarch64/simd/vrev64qf32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qf32.x: New file. -+ * gcc.target/aarch64/simd/vrev64qp16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qp16.x: New file. -+ * gcc.target/aarch64/simd/vrev64qp8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qp8.x: New file. -+ * gcc.target/aarch64/simd/vrev64qs16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qs16.x: New file. -+ * gcc.target/aarch64/simd/vrev64qs32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qs32.x: New file. -+ * gcc.target/aarch64/simd/vrev64qs8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qs8.x: New file. -+ * gcc.target/aarch64/simd/vrev64qu16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qu16.x: New file. -+ * gcc.target/aarch64/simd/vrev64qu32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qu32.x: New file. -+ * gcc.target/aarch64/simd/vrev64qu8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64qu8.x: New file. -+ * gcc.target/aarch64/simd/vrev64s16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64s16.x: New file. -+ * gcc.target/aarch64/simd/vrev64s32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64s32.x: New file. -+ * gcc.target/aarch64/simd/vrev64s8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64s8.x: New file. -+ * gcc.target/aarch64/simd/vrev64u16_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64u16.x: New file. -+ * gcc.target/aarch64/simd/vrev64u32_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64u32.x: New file. -+ * gcc.target/aarch64/simd/vrev64u8_1.c: New file. -+ * gcc.target/aarch64/simd/vrev64u8.x: New file. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210148, r210151, r210422. -+ 2014-05-14 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/arm/simd/vtrnqf32_1.c: New file. -+ * gcc.target/arm/simd/vtrnqp16_1.c: New file. -+ * gcc.target/arm/simd/vtrnqp8_1.c: New file. -+ * gcc.target/arm/simd/vtrnqs16_1.c: New file. -+ * gcc.target/arm/simd/vtrnqs32_1.c: New file. -+ * gcc.target/arm/simd/vtrnqs8_1.c: New file. -+ * gcc.target/arm/simd/vtrnqu16_1.c: New file. -+ * gcc.target/arm/simd/vtrnqu32_1.c: New file. -+ * gcc.target/arm/simd/vtrnqu8_1.c: New file. -+ * gcc.target/arm/simd/vtrnf32_1.c: New file. -+ * gcc.target/arm/simd/vtrnp16_1.c: New file. -+ * gcc.target/arm/simd/vtrnp8_1.c: New file. -+ * gcc.target/arm/simd/vtrns16_1.c: New file. -+ * gcc.target/arm/simd/vtrns32_1.c: New file. -+ * gcc.target/arm/simd/vtrns8_1.c: New file. -+ * gcc.target/arm/simd/vtrnu16_1.c: New file. -+ * gcc.target/arm/simd/vtrnu32_1.c: New file. -+ * gcc.target/arm/simd/vtrnu8_1.c: New file. -+ -+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12]. -+ * gcc.target/aarch64/vtrnu32.c: Likewise. -+ * gcc.target/aarch64/vtrnf32.c: Likewise. -+ -+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/vtrnf32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnf32.x: New file. -+ * gcc.target/aarch64/simd/vtrnp16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnp16.x: New file. -+ * gcc.target/aarch64/simd/vtrnp8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnp8.x: New file. -+ * gcc.target/aarch64/simd/vtrnqf32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqf32.x: New file. -+ * gcc.target/aarch64/simd/vtrnqp16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqp16.x: New file. -+ * gcc.target/aarch64/simd/vtrnqp8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqp8.x: New file. -+ * gcc.target/aarch64/simd/vtrnqs16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqs16.x: New file. -+ * gcc.target/aarch64/simd/vtrnqs32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqs32.x: New file. -+ * gcc.target/aarch64/simd/vtrnqs8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqs8.x: New file. -+ * gcc.target/aarch64/simd/vtrnqu16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqu16.x: New file. -+ * gcc.target/aarch64/simd/vtrnqu32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqu32.x: New file. -+ * gcc.target/aarch64/simd/vtrnqu8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnqu8.x: New file. -+ * gcc.target/aarch64/simd/vtrns16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrns16.x: New file. -+ * gcc.target/aarch64/simd/vtrns32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrns32.x: New file. -+ * gcc.target/aarch64/simd/vtrns8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrns8.x: New file. -+ * gcc.target/aarch64/simd/vtrnu16_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnu16.x: New file. -+ * gcc.target/aarch64/simd/vtrnu32_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnu32.x: New file. -+ * gcc.target/aarch64/simd/vtrnu8_1.c: New file. -+ * gcc.target/aarch64/simd/vtrnu8.x: New file. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209794, 209858. -+ 2014-04-25 Marek Polacek <polacek@redhat.com> -+ -+ PR c/60114 -+ * gcc.dg/pr60114.c: New test. -+ -+ 2014-04-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ PR c/60983 -+ * gcc.dg/pr60114.c: Use signed chars. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210861. -+ 2014-05-23 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/aarch64/tail_indirect_call_1.c: New. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211314. -+ 2014-06-06 James Greenhalgh <james.greenhalgh@arm.com> -+ -+ * gcc.dg/tree-ssa/pr42585.c: Skip for AArch64. -+ * gcc.dg/tree-ssa/sra-12.c: Likewise. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210967. -+ 2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * lib/target-supports.exp (check_effective_target_vect_bswap): -+ Specify arm*-*-* support. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r210152, 211059. -+ 2014-05-29 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/arm/simd/vextQf32_1.c: New file. -+ * gcc.target/arm/simd/vextQp16_1.c: New file. -+ * gcc.target/arm/simd/vextQp8_1.c: New file. -+ * gcc.target/arm/simd/vextQs16_1.c: New file. -+ * gcc.target/arm/simd/vextQs32_1.c: New file. -+ * gcc.target/arm/simd/vextQs64_1.c: New file. -+ * gcc.target/arm/simd/vextQs8_1.c: New file. -+ * gcc.target/arm/simd/vextQu16_1.c: New file. -+ * gcc.target/arm/simd/vextQu32_1.c: New file. -+ * gcc.target/arm/simd/vextQu64_1.c: New file. -+ * gcc.target/arm/simd/vextQu8_1.c: New file. -+ * gcc.target/arm/simd/vextQp64_1.c: New file. -+ * gcc.target/arm/simd/vextf32_1.c: New file. -+ * gcc.target/arm/simd/vextp16_1.c: New file. -+ * gcc.target/arm/simd/vextp8_1.c: New file. -+ * gcc.target/arm/simd/vexts16_1.c: New file. -+ * gcc.target/arm/simd/vexts32_1.c: New file. -+ * gcc.target/arm/simd/vexts64_1.c: New file. -+ * gcc.target/arm/simd/vexts8_1.c: New file. -+ * gcc.target/arm/simd/vextu16_1.c: New file. -+ * gcc.target/arm/simd/vextu32_1.c: New file. -+ * gcc.target/arm/simd/vextu64_1.c: New file. -+ * gcc.target/arm/simd/vextu8_1.c: New file. -+ * gcc.target/arm/simd/vextp64_1.c: New file. -+ -+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/ext_f32.x: New file. -+ * gcc.target/aarch64/simd/ext_f32_1.c: New file. -+ * gcc.target/aarch64/simd/ext_p16.x: New file. -+ * gcc.target/aarch64/simd/ext_p16_1.c: New file. -+ * gcc.target/aarch64/simd/ext_p8.x: New file. -+ * gcc.target/aarch64/simd/ext_p8_1.c: New file. -+ * gcc.target/aarch64/simd/ext_s16.x: New file. -+ * gcc.target/aarch64/simd/ext_s16_1.c: New file. -+ * gcc.target/aarch64/simd/ext_s32.x: New file. -+ * gcc.target/aarch64/simd/ext_s32_1.c: New file. -+ * gcc.target/aarch64/simd/ext_s64.x: New file. -+ * gcc.target/aarch64/simd/ext_s64_1.c: New file. -+ * gcc.target/aarch64/simd/ext_s8.x: New file. -+ * gcc.target/aarch64/simd/ext_s8_1.c: New file. -+ * gcc.target/aarch64/simd/ext_u16.x: New file. -+ * gcc.target/aarch64/simd/ext_u16_1.c: New file. -+ * gcc.target/aarch64/simd/ext_u32.x: New file. -+ * gcc.target/aarch64/simd/ext_u32_1.c: New file. -+ * gcc.target/aarch64/simd/ext_u64.x: New file. -+ * gcc.target/aarch64/simd/ext_u64_1.c: New file. -+ * gcc.target/aarch64/simd/ext_u8.x: New file. -+ * gcc.target/aarch64/simd/ext_u8_1.c: New file. -+ * gcc.target/aarch64/simd/ext_f64.c: New file. -+ * gcc.target/aarch64/simd/extq_f32.x: New file. -+ * gcc.target/aarch64/simd/extq_f32_1.c: New file. -+ * gcc.target/aarch64/simd/extq_p16.x: New file. -+ * gcc.target/aarch64/simd/extq_p16_1.c: New file. -+ * gcc.target/aarch64/simd/extq_p8.x: New file. -+ * gcc.target/aarch64/simd/extq_p8_1.c: New file. -+ * gcc.target/aarch64/simd/extq_s16.x: New file. -+ * gcc.target/aarch64/simd/extq_s16_1.c: New file. -+ * gcc.target/aarch64/simd/extq_s32.x: New file. -+ * gcc.target/aarch64/simd/extq_s32_1.c: New file. -+ * gcc.target/aarch64/simd/extq_s64.x: New file. -+ * gcc.target/aarch64/simd/extq_s64_1.c: New file. -+ * gcc.target/aarch64/simd/extq_s8.x: New file. -+ * gcc.target/aarch64/simd/extq_s8_1.c: New file. -+ * gcc.target/aarch64/simd/extq_u16.x: New file. -+ * gcc.target/aarch64/simd/extq_u16_1.c: New file. -+ * gcc.target/aarch64/simd/extq_u32.x: New file. -+ -+2014-07-16 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209940, r209943, r209947. -+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/arm/simd/vuzpqf32_1.c: New file. -+ * gcc.target/arm/simd/vuzpqp16_1.c: New file. -+ * gcc.target/arm/simd/vuzpqp8_1.c: New file. -+ * gcc.target/arm/simd/vuzpqs16_1.c: New file. -+ * gcc.target/arm/simd/vuzpqs32_1.c: New file. -+ * gcc.target/arm/simd/vuzpqs8_1.c: New file. -+ * gcc.target/arm/simd/vuzpqu16_1.c: New file. -+ * gcc.target/arm/simd/vuzpqu32_1.c: New file. -+ * gcc.target/arm/simd/vuzpqu8_1.c: New file. -+ * gcc.target/arm/simd/vuzpf32_1.c: New file. -+ * gcc.target/arm/simd/vuzpp16_1.c: New file. -+ * gcc.target/arm/simd/vuzpp8_1.c: New file. -+ * gcc.target/arm/simd/vuzps16_1.c: New file. -+ * gcc.target/arm/simd/vuzps32_1.c: New file. -+ * gcc.target/arm/simd/vuzps8_1.c: New file. -+ * gcc.target/arm/simd/vuzpu16_1.c: New file. -+ * gcc.target/arm/simd/vuzpu32_1.c: New file. -+ * gcc.target/arm/simd/vuzpu8_1.c: New file. -+ -+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2. -+ * gcc.target/aarch64/vuzpu32_1.c: Likewise. -+ * gcc.target/aarch64/vuzpf32_1.c: Likewise. -+ -+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/vuzpf32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpf32.x: New file. -+ * gcc.target/aarch64/simd/vuzpp16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpp16.x: New file. -+ * gcc.target/aarch64/simd/vuzpp8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpp8.x: New file. -+ * gcc.target/aarch64/simd/vuzpqf32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqf32.x: New file. -+ * gcc.target/aarch64/simd/vuzpqp16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqp16.x: New file. -+ * gcc.target/aarch64/simd/vuzpqp8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqp8.x: New file. -+ * gcc.target/aarch64/simd/vuzpqs16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqs16.x: New file. -+ * gcc.target/aarch64/simd/vuzpqs32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqs32.x: New file. -+ * gcc.target/aarch64/simd/vuzpqs8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqs8.x: New file. -+ * gcc.target/aarch64/simd/vuzpqu16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqu16.x: New file. -+ * gcc.target/aarch64/simd/vuzpqu32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqu32.x: New file. -+ * gcc.target/aarch64/simd/vuzpqu8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpqu8.x: New file. -+ * gcc.target/aarch64/simd/vuzps16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzps16.x: New file. -+ * gcc.target/aarch64/simd/vuzps32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzps32.x: New file. -+ * gcc.target/aarch64/simd/vuzps8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzps8.x: New file. -+ * gcc.target/aarch64/simd/vuzpu16_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpu16.x: New file. -+ * gcc.target/aarch64/simd/vuzpu32_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpu32.x: New file. -+ * gcc.target/aarch64/simd/vuzpu8_1.c: New file. -+ * gcc.target/aarch64/simd/vuzpu8.x: New file. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r211206. -+ 2014-06-03 Andrew Pinski <apinski@cavium.com> -+ -+ * gcc.c-torture/compile/20140528-1.c: New testcase. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209908. -+ 2013-04-29 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/arm/simd/simd.exp: New file. -+ * gcc.target/arm/simd/vzipqf32_1.c: New file. -+ * gcc.target/arm/simd/vzipqp16_1.c: New file. -+ * gcc.target/arm/simd/vzipqp8_1.c: New file. -+ * gcc.target/arm/simd/vzipqs16_1.c: New file. -+ * gcc.target/arm/simd/vzipqs32_1.c: New file. -+ * gcc.target/arm/simd/vzipqs8_1.c: New file. -+ * gcc.target/arm/simd/vzipqu16_1.c: New file. -+ * gcc.target/arm/simd/vzipqu32_1.c: New file. -+ * gcc.target/arm/simd/vzipqu8_1.c: New file. -+ * gcc.target/arm/simd/vzipf32_1.c: New file. -+ * gcc.target/arm/simd/vzipp16_1.c: New file. -+ * gcc.target/arm/simd/vzipp8_1.c: New file. -+ * gcc.target/arm/simd/vzips16_1.c: New file. -+ * gcc.target/arm/simd/vzips32_1.c: New file. -+ * gcc.target/arm/simd/vzips8_1.c: New file. -+ * gcc.target/arm/simd/vzipu16_1.c: New file. -+ * gcc.target/arm/simd/vzipu32_1.c: New file. -+ * gcc.target/arm/simd/vzipu8_1.c: New file. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209893. -+ 2014-04-29 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * gcc.target/aarch64/simd/simd.exp: New file. -+ * gcc.target/aarch64/simd/vzipf32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipf32.x: New file. -+ * gcc.target/aarch64/simd/vzipp16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipp16.x: New file. -+ * gcc.target/aarch64/simd/vzipp8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipp8.x: New file. -+ * gcc.target/aarch64/simd/vzipqf32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqf32.x: New file. -+ * gcc.target/aarch64/simd/vzipqp16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqp16.x: New file. -+ * gcc.target/aarch64/simd/vzipqp8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqp8.x: New file. -+ * gcc.target/aarch64/simd/vzipqs16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs16.x: New file. -+ * gcc.target/aarch64/simd/vzipqs32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs32.x: New file. -+ * gcc.target/aarch64/simd/vzipqs8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs8.x: New file. -+ * gcc.target/aarch64/simd/vzipqu16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu16.x: New file. -+ * gcc.target/aarch64/simd/vzipqu32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu32.x: New file. -+ * gcc.target/aarch64/simd/vzipqu8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu8.x: New file. -+ * gcc.target/aarch64/simd/vzips16_1.c: New file. -+ * gcc.target/aarch64/simd/vzips16.x: New file. -+ * gcc.target/aarch64/simd/vzips32_1.c: New file. -+ * gcc.target/aarch64/simd/vzips32.x: New file. -+ * gcc.target/aarch64/simd/vzips8_1.c: New file. -+ * gcc.target/aarch64/simd/vzips8.x: New file. -+ * gcc.target/aarch64/simd/vzipu16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu16.x: New file. -+ * gcc.target/aarch64/simd/vzipu32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu32.x: New file. -+ * gcc.target/aarch64/simd/vzipu8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu8.x: New file. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209808. -+ 2014-04-25 Jiong Wang <jiong.wang@arm.com> -+ -+ * gcc.target/arm/tail-long-call.c: New test. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209749. -+ 2014-04-24 Alan Lawrence <alan.lawrence@arm.com> -+ -+ * lib/target-supports.exp (check_effective_target_vect_perm): Return -+ true for aarch64_be. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209736. -+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * lib/target-supports.exp (check_effective_target_vect_bswap): New. -+ * gcc.dg/vect/vect-bswap16: New test. -+ * gcc.dg/vect/vect-bswap32: Likewise. -+ * gcc.dg/vect/vect-bswap64: Likewise. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209713. -+ 2014-04-23 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * gcc.target/aarch64/vdup_lane_1.c: New testcase. -+ * gcc.target/aarch64/vdup_lane_2.c: New testcase. -+ * gcc.target/aarch64/vdup_n_1.c: New testcase. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209704, 209705. -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/arm/rev16.c: New test. -+ -+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * gcc.target/aarch64/rev16_1.c: New test. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209642. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * gcc.target/aarch64/vreinterpret_f64_1.c: New. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209640. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * gcc.target/aarch64/vqneg_s64_1.c: New testcase. -+ * gcc.target/aarch64/vqabs_s64_1.c: New testcase. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209613, 209614. -+ 2014-04-22 Ian Bolton <ian.bolton@arm.com> -+ -+ * gcc.target/arm/anddi_notdi-1.c: New test. -+ * gcc.target/arm/iordi_notdi-1.c: New test case. -+ -+ 2014-04-22 Ian Bolton <ian.bolton@arm.com> -+ -+ * gcc.target/arm/iordi_notdi-1.c: New test. -+ -+2014-05-23 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209559. -+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com> -+ -+ * gcc.target/aarch64/vrnd_f64_1.c : New file. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-05-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209889. -+ 2014-04-29 Zhenqiang Chen <zhenqiang.chen@linaro.org> -+ -+ * gcc.target/aarch64/fcsel_1.c: New test case. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c -+++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c -@@ -0,0 +1,9 @@ -+unsigned f(unsigned flags, unsigned capabilities) -+{ -+ unsigned gfp_mask; -+ unsigned gfp_notmask = 0; -+ gfp_mask = flags & ((1 << 25) - 1); -+ if (!(capabilities & 0x00000001)) -+ gfp_mask |= 0x1000000u; -+ return (gfp_mask & ~gfp_notmask); -+} ---- a/src/gcc/testsuite/gcc.dg/fail_always_inline.c -+++ b/src/gcc/testsuite/gcc.dg/fail_always_inline.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-add-options bind_pic_locally } */ - - extern __attribute__ ((always_inline)) void - bar() { } /* { dg-warning "function might not be inlinable" } */ ---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c -+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c -@@ -1,4 +1,4 @@ --/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */ -+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */ - /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */ - - long __attribute__((noinline, noclone)) ---- a/src/gcc/testsuite/gcc.dg/pr10474.c -+++ b/src/gcc/testsuite/gcc.dg/pr10474.c -@@ -1,4 +1,4 @@ --/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */ -+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */ - /* { dg-options "-O3 -fdump-rtl-pro_and_epilogue" } */ - - void f(int *i) ---- a/src/gcc/testsuite/gcc.dg/ssp-4.c -+++ b/src/gcc/testsuite/gcc.dg/ssp-4.c -@@ -0,0 +1,18 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */ -+/* { dg-require-effective-target fstack_protector } */ -+ -+typedef unsigned int uint32_t; -+struct ctx -+{ -+ uint32_t A; -+}; -+ -+void * -+buffer_copy (const struct ctx *ctx, void *resbuf) -+{ -+ uint32_t buffer[4]; -+ buffer[0] = (ctx->A); -+ __builtin_memcpy (resbuf, buffer, sizeof (buffer)); -+ return resbuf; -+} ---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c -+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c -@@ -1,4 +1,4 @@ --/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */ -+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */ - /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */ - - long __attribute__((noinline, noclone)) ---- a/src/gcc/testsuite/gcc.dg/inline-22.c -+++ b/src/gcc/testsuite/gcc.dg/inline-22.c -@@ -1,5 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-funit-at-a-time -Wno-attributes" } */ -+/* { dg-add-options bind_pic_locally } */ - /* Verify we can inline without a complete prototype and with promoted - arguments. See also PR32492. */ - __attribute__((always_inline)) void f1() {} ---- a/src/gcc/testsuite/gcc.dg/memset-2.c -+++ b/src/gcc/testsuite/gcc.dg/memset-2.c -@@ -0,0 +1,11 @@ -+/* PR target/63937 */ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O2" } */ -+ -+void -+foo (char *p) -+{ -+ p = __builtin_assume_aligned (p, 64); -+ __builtin_memset (p, 0, 0x100000001ULL); -+} -+ ---- a/src/gcc/testsuite/gcc.dg/inline_4.c -+++ b/src/gcc/testsuite/gcc.dg/inline_4.c -@@ -1,5 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-einline=foo2 -fdisable-ipa-inline -Wno-attributes" } */ -+/* { dg-add-options bind_pic_locally } */ - int g; - __attribute__((always_inline)) void bar (void) - { ---- a/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c -+++ b/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-ffat-lto-objects" } */ -+ -+int -+f (void) -+{ -+ register unsigned int r asm ("no-such-register"); /* { dg-error "invalid register name" } */ -+ return r; -+} ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c -@@ -0,0 +1,43 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-dom1-details" } */ -+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */ -+/* { dg-final { cleanup-tree-dump "dom1" } } */ -+ -+int sum0, sum1, sum2, sum3; -+int foo (char *s, char **ret) -+{ -+ int state=0; -+ char c; -+ -+ for (; *s && state != 4; s++) -+ { -+ c = *s; -+ if (c == '*') -+ { -+ s++; -+ break; -+ } -+ switch (state) -+ { -+ case 0: -+ if (c == '+') -+ state = 1; -+ else if (c != '-') -+ sum0+=c; -+ break; -+ case 1: -+ if (c == '+') -+ state = 2; -+ else if (c == '-') -+ state = 0; -+ else -+ sum1+=c; -+ break; -+ default: -+ break; -+ } -+ -+ } -+ *ret = s; -+ return state; -+} ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c -@@ -35,6 +35,6 @@ - /* Whether the structs are totally scalarized or not depends on the - MOVE_RATIO macro definition in the back end. The scalarization will - not take place when using small values for MOVE_RATIO. */ --/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ --/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ -+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ -+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ - /* { dg-final { cleanup-tree-dump "optimized" } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c -@@ -21,5 +21,5 @@ - *p = l; - } - --/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */ -+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */ - /* { dg-final { cleanup-tree-dump "release_ssa" } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c -@@ -0,0 +1,127 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-dom1-details" } */ -+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */ -+/* { dg-final { cleanup-tree-dump "dom1" } } */ -+ -+enum STATE { -+ S0=0, -+ SI, -+ S1, -+ S2, -+ S3, -+ S4, -+ S5, -+ S6 -+}; -+ -+int bar (enum STATE s); -+ -+enum STATE foo (unsigned char **y, unsigned *c) -+{ -+ unsigned char *x = *y; -+ unsigned char n; -+ enum STATE s = S0; -+ -+ for( ; *x && s != SI; x++ ) -+ { -+ n = *x; -+ if (n == 'x') -+ { -+ x++; -+ break; -+ } -+ switch(s) -+ { -+ case S0: -+ if(bar(n)) -+ s = S3; -+ else if( n == 'a' || n == 'b' ) -+ s = S1; -+ else if( n == 'c' ) -+ s = S4; -+ else -+ { -+ s = SI; -+ c[SI]++; -+ } -+ c[S0]++; -+ break; -+ case S1: -+ if(bar(n)) -+ { -+ s = S3; -+ c[S1]++; -+ } -+ else if( n == 'c' ) -+ { -+ s = S4; -+ c[S1]++; -+ } -+ else -+ { -+ s = SI; -+ c[S1]++; -+ } -+ break; -+ case S3: -+ if( n == 'c' ) -+ { -+ s = S4; -+ c[S3]++; -+ } -+ else if(!bar(n)) -+ { -+ s = SI; -+ c[S3]++; -+ } -+ break; -+ case S4: -+ if( n == 'E' || n == 'e' ) -+ { -+ s = S2; -+ c[S4]++; -+ } -+ else if(!bar(n)) -+ { -+ s = SI; -+ c[S4]++; -+ } -+ break; -+ case S2: -+ if( n == 'a' || n == 'b' ) -+ { -+ s = S5; -+ c[S2]++; -+ } -+ else -+ { -+ s = SI; -+ c[S2]++; -+ } -+ break; -+ case S5: -+ if(bar(n)) -+ { -+ s = S6; -+ c[S5]++; -+ } -+ else -+ { -+ s = SI; -+ c[S5]++; -+ } -+ break; -+ case S6: -+ if(!bar(n)) -+ { -+ s = SI; -+ c[SI]++; -+ } -+ break; -+ default: -+ break; -+ } -+ } -+ *y=x; -+ return s; -+} ---- a/src/gcc/testsuite/gcc.dg/pr60114.c -+++ b/src/gcc/testsuite/gcc.dg/pr60114.c -@@ -0,0 +1,31 @@ -+/* PR c/60114 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wconversion" } */ -+ -+struct S { int n, u[2]; }; -+const signed char z[] = { -+ [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */ -+ [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */ -+}; -+int A[] = { -+ 0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */ -+ 0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */ -+ 0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */ -+ }; -+int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */ -+union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */ -+typedef int H[]; -+void -+foo (void) -+{ -+ signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */ -+ 1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */ -+ { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */ -+ }; -+ (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */ -+ (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */ -+ struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */ -+ struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */ -+ struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */ -+ H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */ -+} ---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c -@@ -0,0 +1,36 @@ -+/* { dg-require-effective-target vect_int_mult } */ -+/* { dg-require-effective-target whole_vector_shift } */ -+ -+/* Write a reduction loop to be reduced using vector shifts. */ -+ -+extern void abort(void); -+ -+unsigned char in[16]; -+ -+int -+main (unsigned char argc, char **argv) -+{ -+ unsigned char i = 0; -+ unsigned char sum = 1; -+ -+ for (i = 0; i < 16; i++) -+ in[i] = i + i + 1; -+ -+ /* Prevent constant propagation of the entire loop below. */ -+ asm volatile ("" : : : "memory"); -+ -+ for (i = 0; i < 16; i++) -+ sum *= in[i]; -+ -+ if (sum != 33) -+ { -+ __builtin_printf("Failed %d\n", sum); -+ abort(); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ -+ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target vect_int_mult } */ -+/* { dg-require-effective-target whole_vector_shift } */ -+ -+/* Write a reduction loop to be reduced using vector shifts and folded. */ -+ -+extern void abort(void); -+ -+int -+main (unsigned char argc, char **argv) -+{ -+ unsigned char in[16]; -+ unsigned char i = 0; -+ unsigned char sum = 1; -+ -+ for (i = 0; i < 16; i++) -+ in[i] = i + i + 1; -+ -+ for (i = 0; i < 16; i++) -+ sum *= in[i]; -+ -+ if (sum != 33) -+ { -+ __builtin_printf("Failed %d\n", sum); -+ abort(); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ -+ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c -@@ -0,0 +1,35 @@ -+/* { dg-require-effective-target whole_vector_shift } */ -+ -+/* Write a reduction loop to be reduced using vector shifts. */ -+ -+extern void abort(void); -+ -+unsigned char in[16] __attribute__((__aligned__(16))); -+ -+int -+main (unsigned char argc, char **argv) -+{ -+ unsigned char i = 0; -+ unsigned char sum = 1; -+ -+ for (i = 0; i < 16; i++) -+ in[i] = (i + i + 1) & 0xfd; -+ -+ /* Prevent constant propagation of the entire loop below. */ -+ asm volatile ("" : : : "memory"); -+ -+ for (i = 0; i < 16; i++) -+ sum |= in[i]; -+ -+ if (sum != 29) -+ { -+ __builtin_printf("Failed %d\n", sum); -+ abort(); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ -+ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ -+ -+#include "tree-vect.h" -+ -+#define N 128 -+ -+volatile int y = 0; -+ -+static inline void -+vfoo32 (unsigned int* a) -+{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap32 (a[i]); -+} -+ -+int -+main (void) -+{ -+ unsigned int arr[N]; -+ unsigned int expect[N]; -+ int i; -+ -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap32 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } -+ -+ vfoo32 (arr); -+ -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c -@@ -0,0 +1,31 @@ -+/* { dg-require-effective-target whole_vector_shift } */ -+ -+/* Write a reduction loop to be reduced using vector shifts and folded. */ -+ -+extern void abort(void); -+ -+int -+main (unsigned char argc, char **argv) -+{ -+ unsigned char in[16] __attribute__((aligned(16))); -+ unsigned char i = 0; -+ unsigned char sum = 1; -+ -+ for (i = 0; i < 16; i++) -+ in[i] = (i + i + 1) & 0xfd; -+ -+ for (i = 0; i < 16; i++) -+ sum |= in[i]; -+ -+ if (sum != 29) -+ { -+ __builtin_printf("Failed %d\n", sum); -+ abort(); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ -+ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ -+ -+#include "tree-vect.h" -+ -+#define N 128 -+ -+volatile int y = 0; -+ -+static inline void -+vfoo16 (unsigned short int* a) -+{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap16 (a[i]); -+} -+ -+int -+main (void) -+{ -+ unsigned short arr[N]; -+ unsigned short expect[N]; -+ int i; -+ -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap16 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } -+ -+ vfoo16 (arr); -+ -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ -+ -+#include "tree-vect.h" -+ -+#define N 128 -+ -+volatile int y = 0; -+ -+static inline void -+vfoo64 (unsigned long long* a) -+{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap64 (a[i]); -+} -+ -+int -+main (void) -+{ -+ unsigned long long arr[N]; -+ unsigned long long expect[N]; -+ int i; -+ -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap64 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } -+ -+ vfoo64 (arr); -+ -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/ssp-3.c -+++ b/src/gcc/testsuite/gcc.dg/ssp-3.c -@@ -0,0 +1,16 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */ -+/* { dg-require-effective-target fstack_protector } */ -+ -+extern int bar (const char *s, int *argc); -+extern int baz (const char *s); -+ -+char -+foo (const char *s) -+{ -+ int argc; -+ int ret; -+ if ( !bar (s, &argc)) -+ ret = baz (s); -+ return *s; -+} ---- a/src/gcc/testsuite/g++.dg/ipa/devirt-25.C -+++ b/src/gcc/testsuite/g++.dg/ipa/devirt-25.C -@@ -1,5 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-O3 -fdump-ipa-cp" } */ -+/* { dg-add-options bind_pic_locally } */ - - class ert_RefCounter { - protected: ---- a/src/gcc/objcp/ChangeLog.linaro -+++ b/src/gcc/objcp/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/cp/ChangeLog.linaro -+++ b/src/gcc/cp/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/compare-elim.c -+++ b/src/gcc/compare-elim.c -@@ -100,6 +100,9 @@ - constants. */ - rtx in_a, in_b; - -+ /* The REG_EH_REGION of the comparison. */ -+ rtx eh_note; -+ - /* Information about how this comparison is used. */ - struct comparison_use uses[MAX_CMP_USE]; - -@@ -262,6 +265,7 @@ - struct comparison *last_cmp; - rtx insn, next, last_clobber; - bool last_cmp_valid; -+ bool need_purge = false; - bitmap killed; - - killed = BITMAP_ALLOC (NULL); -@@ -303,44 +307,60 @@ - if (src) - { - enum machine_mode src_mode = GET_MODE (src); -+ rtx eh_note = NULL; - -- /* Eliminate a compare that's redundant with the previous. */ -- if (last_cmp_valid -- && rtx_equal_p (last_cmp->in_a, XEXP (src, 0)) -- && rtx_equal_p (last_cmp->in_b, XEXP (src, 1))) -- { -- rtx flags, x; -- enum machine_mode new_mode -- = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode); -+ if (flag_non_call_exceptions) -+ eh_note = find_reg_note (insn, REG_EH_REGION, NULL); - -- /* New mode is incompatible with the previous compare mode. */ -- if (new_mode == VOIDmode) -- continue; -+ if (!last_cmp_valid) -+ goto dont_delete; - -- if (new_mode != last_cmp->orig_mode) -- { -- flags = gen_rtx_REG (src_mode, targetm.flags_regnum); -+ /* Take care that it's in the same EH region. */ -+ if (flag_non_call_exceptions -+ && !rtx_equal_p (eh_note, last_cmp->eh_note)) -+ goto dont_delete; - -- /* Generate new comparison for substitution. */ -- x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1)); -- x = gen_rtx_SET (VOIDmode, flags, x); -+ /* Make sure the compare is redundant with the previous. */ -+ if (!rtx_equal_p (last_cmp->in_a, XEXP (src, 0)) -+ || !rtx_equal_p (last_cmp->in_b, XEXP (src, 1))) -+ goto dont_delete; - -- if (!validate_change (last_cmp->insn, -- &PATTERN (last_cmp->insn), x, false)) -- continue; -+ /* New mode must be compatible with the previous compare mode. */ -+ { -+ enum machine_mode new_mode -+ = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode); -+ if (new_mode == VOIDmode) -+ goto dont_delete; - -- last_cmp->orig_mode = new_mode; -- } -+ if (new_mode != last_cmp->orig_mode) -+ { -+ rtx x, flags = gen_rtx_REG (src_mode, targetm.flags_regnum); - -- delete_insn (insn); -- continue; -- } -+ /* Generate new comparison for substitution. */ -+ x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1)); -+ x = gen_rtx_SET (VOIDmode, flags, x); - -+ if (!validate_change (last_cmp->insn, -+ &PATTERN (last_cmp->insn), x, false)) -+ goto dont_delete; -+ -+ last_cmp->orig_mode = new_mode; -+ } -+ } -+ -+ /* All tests and substitutions succeeded! */ -+ if (eh_note) -+ need_purge = true; -+ delete_insn (insn); -+ continue; -+ -+ dont_delete: - last_cmp = XCNEW (struct comparison); - last_cmp->insn = insn; - last_cmp->prev_clobber = last_clobber; - last_cmp->in_a = XEXP (src, 0); - last_cmp->in_b = XEXP (src, 1); -+ last_cmp->eh_note = eh_note; - last_cmp->orig_mode = src_mode; - all_compares.safe_push (last_cmp); - -@@ -404,6 +424,11 @@ - } - } - } -+ -+ /* If we deleted a compare with a REG_EH_REGION note, we may need to -+ remove EH edges. */ -+ if (need_purge) -+ purge_dead_edges (bb); - } - - /* Find all comparisons in the function. */ ---- a/src/gcc/ira-int.h -+++ b/src/gcc/ira-int.h -@@ -281,6 +281,9 @@ - /* Mode of the allocno which is the mode of the corresponding - pseudo-register. */ - ENUM_BITFIELD (machine_mode) mode : 8; -+ /* Widest mode of the allocno which in at least one case could be -+ for paradoxical subregs where wmode > mode. */ -+ ENUM_BITFIELD (machine_mode) wmode : 8; - /* Register class which should be used for allocation for given - allocno. NO_REGS means that we should use memory. */ - ENUM_BITFIELD (reg_class) aclass : 16; -@@ -313,7 +316,7 @@ - number (0, ...) - 2. Value -1 is used for allocnos spilled by the - reload (at this point pseudo-register has only one allocno) which - did not get stack slot yet. */ -- short int hard_regno; -+ signed int hard_regno : 16; - /* Allocnos with the same regno are linked by the following member. - Allocnos corresponding to inner loops are first in the list (it - corresponds to depth-first traverse of the loops). */ -@@ -430,6 +433,7 @@ - #define ALLOCNO_BAD_SPILL_P(A) ((A)->bad_spill_p) - #define ALLOCNO_ASSIGNED_P(A) ((A)->assigned_p) - #define ALLOCNO_MODE(A) ((A)->mode) -+#define ALLOCNO_WMODE(A) ((A)->wmode) - #define ALLOCNO_PREFS(A) ((A)->allocno_prefs) - #define ALLOCNO_COPIES(A) ((A)->allocno_copies) - #define ALLOCNO_HARD_REG_COSTS(A) ((A)->hard_reg_costs) ---- a/src/gcc/ira-color.c -+++ b/src/gcc/ira-color.c -@@ -1711,6 +1711,7 @@ - { - ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj); - enum reg_class conflict_aclass; -+ allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a); - - /* Reload can give another class so we need to check all - allocnos. */ -@@ -1782,7 +1783,12 @@ - hard_regno = ira_class_hard_regs[aclass][j]; - ira_assert (hard_regno >= 0); - k = ira_class_hard_reg_index[conflict_aclass][hard_regno]; -- if (k < 0) -+ if (k < 0 -+ /* If HARD_REGNO is not available for CONFLICT_A, -+ the conflict would be ignored, since HARD_REGNO -+ will never be assigned to CONFLICT_A. */ -+ || !TEST_HARD_REG_BIT (data->profitable_hard_regs, -+ hard_regno)) - continue; - full_costs[j] -= conflict_costs[k]; - } ---- a/src/gcc/ifcvt.c -+++ b/src/gcc/ifcvt.c -@@ -1432,10 +1432,17 @@ - end_sequence (); - } - -- /* Don't even try if the comparison operands are weird. */ -+ /* Don't even try if the comparison operands are weird -+ except that the target supports cbranchcc4. */ - if (! general_operand (cmp_a, GET_MODE (cmp_a)) - || ! general_operand (cmp_b, GET_MODE (cmp_b))) -- return NULL_RTX; -+ { -+#if HAVE_cbranchcc4 -+ if (GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC -+ || cmp_b != const0_rtx) -+#endif -+ return NULL_RTX; -+ } - - #if HAVE_conditional_move - unsignedp = (code == LTU || code == GEU -@@ -1753,7 +1760,12 @@ - { - rtx cond, set, insn; - int reverse; -+ int allow_cc_mode = false; -+#if HAVE_cbranchcc4 -+ allow_cc_mode = true; -+#endif - -+ - /* If target is already mentioned in the known condition, return it. */ - if (reg_mentioned_p (target, if_info->cond)) - { -@@ -1874,7 +1886,7 @@ - } - - cond = canonicalize_condition (if_info->jump, cond, reverse, -- earliest, target, false, true); -+ earliest, target, allow_cc_mode, true); - if (! cond || ! reg_mentioned_p (target, cond)) - return NULL; - -@@ -2325,6 +2337,10 @@ - { - rtx cond, set, tmp; - bool reverse; -+ int allow_cc_mode = false; -+#if HAVE_cbranchcc4 -+ allow_cc_mode = true; -+#endif - - if (! any_condjump_p (jump)) - return NULL_RTX; -@@ -2361,7 +2377,7 @@ - /* Otherwise, fall back on canonicalize_condition to do the dirty - work of manipulating MODE_CC values and COMPARE rtx codes. */ - tmp = canonicalize_condition (jump, cond, reverse, earliest, -- NULL_RTX, false, true); -+ NULL_RTX, allow_cc_mode, true); - - /* We don't handle side-effects in the condition, like handling - REG_INC notes and making sure no duplicate conditions are emitted. */ ---- a/src/gcc/expr.c -+++ b/src/gcc/expr.c -@@ -68,22 +68,6 @@ - #include "tree-ssa-address.h" - #include "cfgexpand.h" - --/* Decide whether a function's arguments should be processed -- from first to last or from last to first. -- -- They should if the stack and args grow in opposite directions, but -- only if we have push insns. */ -- --#ifdef PUSH_ROUNDING -- --#ifndef PUSH_ARGS_REVERSED --#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD) --#define PUSH_ARGS_REVERSED /* If it's last to first. */ --#endif --#endif -- --#endif -- - #ifndef STACK_PUSH_CODE - #ifdef STACK_GROWS_DOWNWARD - #define STACK_PUSH_CODE PRE_DEC -@@ -172,37 +156,6 @@ - static rtx const_vector_from_tree (tree); - static void write_complex_part (rtx, rtx, bool); - --/* This macro is used to determine whether move_by_pieces should be called -- to perform a structure copy. */ --#ifndef MOVE_BY_PIECES_P --#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ -- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) --#endif -- --/* This macro is used to determine whether clear_by_pieces should be -- called to clear storage. */ --#ifndef CLEAR_BY_PIECES_P --#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ -- < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ())) --#endif -- --/* This macro is used to determine whether store_by_pieces should be -- called to "memset" storage with byte values other than zero. */ --#ifndef SET_BY_PIECES_P --#define SET_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ -- < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ())) --#endif -- --/* This macro is used to determine whether store_by_pieces should be -- called to "memcpy" storage when the source is a constant string. */ --#ifndef STORE_BY_PIECES_P --#define STORE_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ -- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())) --#endif - - /* This is run to set up which modes can be used - directly in memory and to initialize the block move optab. It is run -@@ -843,22 +796,16 @@ - return mode; - } - --/* STORE_MAX_PIECES is the number of bytes at a time that we can -- store efficiently. Due to internal GCC limitations, this is -- MOVE_MAX_PIECES limited by the number of bytes GCC can represent -- for an immediate constant. */ -- --#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT)) -- - /* Determine whether the LEN bytes can be moved by using several move - instructions. Return nonzero if a call to move_by_pieces should - succeed. */ - - int --can_move_by_pieces (unsigned HOST_WIDE_INT len ATTRIBUTE_UNUSED, -- unsigned int align ATTRIBUTE_UNUSED) -+can_move_by_pieces (unsigned HOST_WIDE_INT len, -+ unsigned int align) - { -- return MOVE_BY_PIECES_P (len, align); -+ return targetm.use_by_pieces_infrastructure_p (len, align, MOVE_BY_PIECES, -+ optimize_insn_for_speed_p ()); - } - - /* Generate several move instructions to copy LEN bytes from block FROM to -@@ -1195,7 +1142,7 @@ - set_mem_size (y, INTVAL (size)); - } - -- if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align)) -+ if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align)) - move_by_pieces (x, y, INTVAL (size), align, 0); - else if (emit_block_move_via_movmem (x, y, size, align, - expected_align, expected_size, -@@ -2396,6 +2343,18 @@ - = gen_rtx_EXPR_LIST (mode, gen_rtx_USE (VOIDmode, reg), *call_fusage); - } - -+/* Add a CLOBBER expression for REG to the (possibly empty) list pointed -+ to by CALL_FUSAGE. REG must denote a hard register. */ -+ -+void -+clobber_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode) -+{ -+ gcc_assert (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER); -+ -+ *call_fusage -+ = gen_rtx_EXPR_LIST (mode, gen_rtx_CLOBBER (VOIDmode, reg), *call_fusage); -+} -+ - /* Add USE expressions to *CALL_FUSAGE for each of NREGS consecutive regs, - starting at REGNO. All of these registers must be hard registers. */ - -@@ -2498,9 +2457,11 @@ - if (len == 0) - return 1; - -- if (! (memsetp -- ? SET_BY_PIECES_P (len, align) -- : STORE_BY_PIECES_P (len, align))) -+ if (!targetm.use_by_pieces_infrastructure_p (len, align, -+ memsetp -+ ? SET_BY_PIECES -+ : STORE_BY_PIECES, -+ optimize_insn_for_speed_p ())) - return 0; - - align = alignment_for_piecewise_move (STORE_MAX_PIECES, align); -@@ -2576,9 +2537,13 @@ - return to; - } - -- gcc_assert (memsetp -- ? SET_BY_PIECES_P (len, align) -- : STORE_BY_PIECES_P (len, align)); -+ gcc_assert (targetm.use_by_pieces_infrastructure_p -+ (len, align, -+ memsetp -+ ? SET_BY_PIECES -+ : STORE_BY_PIECES, -+ optimize_insn_for_speed_p ())); -+ - data.constfun = constfun; - data.constfundata = constfundata; - data.len = len; -@@ -2815,7 +2780,9 @@ - align = MEM_ALIGN (object); - - if (CONST_INT_P (size) -- && CLEAR_BY_PIECES_P (INTVAL (size), align)) -+ && targetm.use_by_pieces_infrastructure_p (INTVAL (size), align, -+ CLEAR_BY_PIECES, -+ optimize_insn_for_speed_p ())) - clear_by_pieces (object, INTVAL (size), align); - else if (set_storage_via_setmem (object, size, const0_rtx, align, - expected_align, expected_size, -@@ -4221,7 +4188,7 @@ - && CONST_INT_P (size) - && skip == 0 - && MEM_ALIGN (xinner) >= align -- && (MOVE_BY_PIECES_P ((unsigned) INTVAL (size) - used, align)) -+ && can_move_by_pieces ((unsigned) INTVAL (size) - used, align) - /* Here we avoid the case of a structure whose weak alignment - forces many pushes of a small amount of data, - and such small pushes do rounding that causes trouble. */ -@@ -4353,11 +4320,7 @@ - /* Loop over all the words allocated on the stack for this arg. */ - /* We can do it by words, because any scalar bigger than a word - has a size a multiple of a word. */ --#ifndef PUSH_ARGS_REVERSED -- for (i = not_stack; i < size; i++) --#else - for (i = size - 1; i >= not_stack; i--) --#endif - if (i >= not_stack + offset) - emit_push_insn (operand_subword_force (x, i, mode), - word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, -@@ -7838,7 +7801,7 @@ - && ! (target != 0 && safe_from_p (target, exp, 1))) - || TREE_ADDRESSABLE (exp) - || (tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)) -- && (! MOVE_BY_PIECES_P -+ && (! can_move_by_pieces - (tree_to_uhwi (TYPE_SIZE_UNIT (type)), - TYPE_ALIGN (type))) - && ! mostly_zeros_p (exp)))) ---- a/src/gcc/expr.h -+++ b/src/gcc/expr.h -@@ -346,6 +346,7 @@ - /* Mark REG as holding a parameter for the next CALL_INSN. - Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode. */ - extern void use_reg_mode (rtx *, rtx, enum machine_mode); -+extern void clobber_reg_mode (rtx *, rtx, enum machine_mode); - - extern rtx copy_blkmode_to_reg (enum machine_mode, tree); - -@@ -356,6 +357,13 @@ - use_reg_mode (fusage, reg, VOIDmode); - } - -+/* Mark REG as clobbered by the call with FUSAGE as CALL_INSN_FUNCTION_USAGE. */ -+static inline void -+clobber_reg (rtx *fusage, rtx reg) -+{ -+ clobber_reg_mode (fusage, reg, VOIDmode); -+} -+ - /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters - for the next CALL_INSN. */ - extern void use_regs (rtx *, int, int); ---- a/src/gcc/go/ChangeLog.linaro -+++ b/src/gcc/go/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/genattrtab.c -+++ b/src/gcc/genattrtab.c -@@ -4765,6 +4765,7 @@ - - static struct bypass_list *all_bypasses; - static size_t n_bypasses; -+static size_t n_bypassed; - - static void - gen_bypass_1 (const char *s, size_t len) -@@ -4810,12 +4811,18 @@ - struct bypass_list *b; - struct insn_reserv *r; - -+ n_bypassed = 0; -+ - /* The reservation list is likely to be much longer than the bypass - list. */ - for (r = all_insn_reservs; r; r = r->next) - for (b = all_bypasses; b; b = b->next) - if (fnmatch (b->pattern, r->name, 0) == 0) -- r->bypassed = true; -+ { -+ n_bypassed++; -+ r->bypassed = true; -+ break; -+ } - } - - /* Check that attribute NAME is used in define_insn_reservation condition -@@ -5074,7 +5081,7 @@ - process_bypasses (); - - byps_exp = rtx_alloc (COND); -- XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2); -+ XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2); - XEXP (byps_exp, 1) = make_numeric_value (0); - for (decl = all_insn_reservs, i = 0; - decl; ---- a/src/gcc/ada/ChangeLog.linaro -+++ b/src/gcc/ada/ChangeLog.linaro -@@ -0,0 +1,95 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-05-13 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209653,209866,209871. -+ -+ 2014-04-28 Richard Henderson <rth@redhat.com> -+ -+ * gcc-interface/Makefile.in: Support aarch64-linux. -+ -+ 2014-04-28 Eric Botcazou <ebotcazou@adacore.com> -+ -+ * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix, -+ add 'Suffix' parameter and adjust comment. -+ (Get_External_Name_With_Suffix): Delete. -+ * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into... -+ (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add -+ 'Suffix' parameter. -+ (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name. -+ Call Get_External_Name instead of Get_External_Name_With_Suffix. -+ (Get_Secondary_DT_External_Name): Likewise. -+ * exp_cg.adb (Write_Call_Info): Likewise. -+ * exp_disp.adb (Export_DT): Likewise. -+ (Import_DT): Likewise. -+ * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC -+ parameter with False default. -+ * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly. -+ * types.h (Fat_Pointer): Rename into... -+ (String_Pointer): ...this. Add comment on interfacing rules. -+ * fe.h (Compiler_Abort): Adjust for above renaming. -+ (Error_Msg_N): Likewise. -+ (Error_Msg_NE): Likewise. -+ (Get_External_Name): Likewise. Add third parameter. -+ (Get_External_Name_With_Suffix): Delete. -+ * gcc-interface/decl.c (STDCALL_PREFIX): Define. -+ (create_concat_name): Adjust call to Get_External_Name, remove call to -+ Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming. -+ * gcc-interface/trans.c (post_error): Likewise. -+ (post_error_ne): Likewise. -+ * gcc-interface/misc.c (internal_error_function): Likewise. -+ -+ 2014-04-22 Richard Henderson <rth@redhat.com> -+ -+ * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define. -+ (__gnat_alternate_stack): Enable for all linux except ia64. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/common/config/aarch64/aarch64-common.c -+++ b/src/gcc/common/config/aarch64/aarch64-common.c -@@ -44,6 +44,8 @@ - { - /* Enable section anchors by default at -O1 or higher. */ - { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, -+ /* Enable -fsched-pressure by default when optimizing. */ -+ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, - /* Enable redundant extension instructions removal at -O2 and higher. */ - { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, - { OPT_LEVELS_NONE, 0, NULL, 0 } ---- a/src/gcc/fortran/ChangeLog.linaro -+++ b/src/gcc/fortran/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -809,7 +809,7 @@ - ) - AC_SUBST(CONFIGURE_SPECS) - --ACX_PKGVERSION([GCC]) -+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) - ACX_BUGURL([http://gcc.gnu.org/bugs.html]) - - # Sanity check enable_languages in case someone does not run the toplevel ---- a/src/gcc/ira-build.c -+++ b/src/gcc/ira-build.c -@@ -523,6 +523,7 @@ - ALLOCNO_BAD_SPILL_P (a) = false; - ALLOCNO_ASSIGNED_P (a) = false; - ALLOCNO_MODE (a) = (regno < 0 ? VOIDmode : PSEUDO_REGNO_MODE (regno)); -+ ALLOCNO_WMODE (a) = ALLOCNO_MODE (a); - ALLOCNO_PREFS (a) = NULL; - ALLOCNO_COPIES (a) = NULL; - ALLOCNO_HARD_REG_COSTS (a) = NULL; -@@ -892,6 +893,7 @@ - parent = ALLOCNO_LOOP_TREE_NODE (a)->parent; - cap = ira_create_allocno (ALLOCNO_REGNO (a), true, parent); - ALLOCNO_MODE (cap) = ALLOCNO_MODE (a); -+ ALLOCNO_WMODE (cap) = ALLOCNO_WMODE (a); - aclass = ALLOCNO_CLASS (a); - ira_set_allocno_class (cap, aclass); - ira_create_allocno_objects (cap); -@@ -1856,9 +1858,9 @@ - - /* This recursive function creates allocnos corresponding to - pseudo-registers containing in X. True OUTPUT_P means that X is -- a lvalue. */ -+ an lvalue. PARENT corresponds to the parent expression of X. */ - static void --create_insn_allocnos (rtx x, bool output_p) -+create_insn_allocnos (rtx x, rtx outer, bool output_p) - { - int i, j; - const char *fmt; -@@ -1873,7 +1875,15 @@ - ira_allocno_t a; - - if ((a = ira_curr_regno_allocno_map[regno]) == NULL) -- a = ira_create_allocno (regno, false, ira_curr_loop_tree_node); -+ { -+ a = ira_create_allocno (regno, false, ira_curr_loop_tree_node); -+ if (outer != NULL && GET_CODE (outer) == SUBREG) -+ { -+ enum machine_mode wmode = GET_MODE (outer); -+ if (GET_MODE_SIZE (wmode) > GET_MODE_SIZE (ALLOCNO_WMODE (a))) -+ ALLOCNO_WMODE (a) = wmode; -+ } -+ } - - ALLOCNO_NREFS (a)++; - ALLOCNO_FREQ (a) += REG_FREQ_FROM_BB (curr_bb); -@@ -1884,25 +1894,25 @@ - } - else if (code == SET) - { -- create_insn_allocnos (SET_DEST (x), true); -- create_insn_allocnos (SET_SRC (x), false); -+ create_insn_allocnos (SET_DEST (x), NULL, true); -+ create_insn_allocnos (SET_SRC (x), NULL, false); - return; - } - else if (code == CLOBBER) - { -- create_insn_allocnos (XEXP (x, 0), true); -+ create_insn_allocnos (XEXP (x, 0), NULL, true); - return; - } - else if (code == MEM) - { -- create_insn_allocnos (XEXP (x, 0), false); -+ create_insn_allocnos (XEXP (x, 0), NULL, false); - return; - } - else if (code == PRE_DEC || code == POST_DEC || code == PRE_INC || - code == POST_INC || code == POST_MODIFY || code == PRE_MODIFY) - { -- create_insn_allocnos (XEXP (x, 0), true); -- create_insn_allocnos (XEXP (x, 0), false); -+ create_insn_allocnos (XEXP (x, 0), NULL, true); -+ create_insn_allocnos (XEXP (x, 0), NULL, false); - return; - } - -@@ -1910,10 +1920,10 @@ - for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) - { - if (fmt[i] == 'e') -- create_insn_allocnos (XEXP (x, i), output_p); -+ create_insn_allocnos (XEXP (x, i), x, output_p); - else if (fmt[i] == 'E') - for (j = 0; j < XVECLEN (x, i); j++) -- create_insn_allocnos (XVECEXP (x, i, j), output_p); -+ create_insn_allocnos (XVECEXP (x, i, j), x, output_p); - } - } - -@@ -1932,7 +1942,7 @@ - ira_assert (bb != NULL); - FOR_BB_INSNS_REVERSE (bb, insn) - if (NONDEBUG_INSN_P (insn)) -- create_insn_allocnos (PATTERN (insn), false); -+ create_insn_allocnos (PATTERN (insn), NULL, false); - /* It might be a allocno living through from one subloop to - another. */ - EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (bb), FIRST_PSEUDO_REGISTER, i, bi) ---- a/src/gcc/calls.c -+++ b/src/gcc/calls.c -@@ -1104,8 +1104,6 @@ - { - CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far); - location_t loc = EXPR_LOCATION (exp); -- /* 1 if scanning parms front to back, -1 if scanning back to front. */ -- int inc; - - /* Count arg position in order args appear. */ - int argpos; -@@ -1116,22 +1114,9 @@ - args_size->var = 0; - - /* In this loop, we consider args in the order they are written. -- We fill up ARGS from the front or from the back if necessary -- so that in any case the first arg to be pushed ends up at the front. */ -+ We fill up ARGS from the back. */ - -- if (PUSH_ARGS_REVERSED) -- { -- i = num_actuals - 1, inc = -1; -- /* In this case, must reverse order of args -- so that we compute and push the last arg first. */ -- } -- else -- { -- i = 0, inc = 1; -- } -- -- /* First fill in the actual arguments in the ARGS array, splitting -- complex arguments if necessary. */ -+ i = num_actuals - 1; - { - int j = i; - call_expr_arg_iterator iter; -@@ -1140,7 +1125,7 @@ - if (struct_value_addr_value) - { - args[j].tree_value = struct_value_addr_value; -- j += inc; -+ j--; - } - FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) - { -@@ -1152,17 +1137,17 @@ - { - tree subtype = TREE_TYPE (argtype); - args[j].tree_value = build1 (REALPART_EXPR, subtype, arg); -- j += inc; -+ j--; - args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg); - } - else - args[j].tree_value = arg; -- j += inc; -+ j--; - } - } - - /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ -- for (argpos = 0; argpos < num_actuals; i += inc, argpos++) -+ for (argpos = 0; argpos < num_actuals; i--, argpos++) - { - tree type = TREE_TYPE (args[i].tree_value); - int unsignedp; -@@ -2952,9 +2937,8 @@ - - compute_argument_addresses (args, argblock, num_actuals); - -- /* If we push args individually in reverse order, perform stack alignment -- before the first push (the last arg). */ -- if (PUSH_ARGS_REVERSED && argblock == 0 -+ /* Perform stack alignment before the first push (the last arg). */ -+ if (argblock == 0 - && adjusted_args_size.constant > reg_parm_stack_space - && adjusted_args_size.constant != unadjusted_args_size) - { -@@ -3097,12 +3081,6 @@ - sibcall_failure = 1; - } - -- /* If we pushed args in forward order, perform stack alignment -- after pushing the last arg. */ -- if (!PUSH_ARGS_REVERSED && argblock == 0) -- anti_adjust_stack (GEN_INT (adjusted_args_size.constant -- - unadjusted_args_size)); -- - /* If register arguments require space on the stack and stack space - was not preallocated, allocate stack space here for arguments - passed in registers. */ -@@ -3152,8 +3130,7 @@ - if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) - { - int arg_nr = return_flags & ERF_RETURN_ARG_MASK; -- if (PUSH_ARGS_REVERSED) -- arg_nr = num_actuals - arg_nr - 1; -+ arg_nr = num_actuals - arg_nr - 1; - if (arg_nr >= 0 - && arg_nr < num_actuals - && args[arg_nr].reg -@@ -3597,7 +3574,6 @@ - isn't present here, so we default to native calling abi here. */ - tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ - tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ -- int inc; - int count; - rtx argblock = 0; - CUMULATIVE_ARGS args_so_far_v; -@@ -3946,22 +3922,13 @@ - argblock = push_block (GEN_INT (args_size.constant), 0, 0); - } - -- /* If we push args individually in reverse order, perform stack alignment -+ /* We push args individually in reverse order, perform stack alignment - before the first push (the last arg). */ -- if (argblock == 0 && PUSH_ARGS_REVERSED) -+ if (argblock == 0) - anti_adjust_stack (GEN_INT (args_size.constant - - original_args_size.constant)); - -- if (PUSH_ARGS_REVERSED) -- { -- inc = -1; -- argnum = nargs - 1; -- } -- else -- { -- inc = 1; -- argnum = 0; -- } -+ argnum = nargs - 1; - - #ifdef REG_PARM_STACK_SPACE - if (ACCUMULATE_OUTGOING_ARGS) -@@ -3978,7 +3945,7 @@ - - /* ARGNUM indexes the ARGVEC array in the order in which the arguments - are to be pushed. */ -- for (count = 0; count < nargs; count++, argnum += inc) -+ for (count = 0; count < nargs; count++, argnum--) - { - enum machine_mode mode = argvec[argnum].mode; - rtx val = argvec[argnum].value; -@@ -4080,17 +4047,8 @@ - } - } - -- /* If we pushed args in forward order, perform stack alignment -- after pushing the last arg. */ -- if (argblock == 0 && !PUSH_ARGS_REVERSED) -- anti_adjust_stack (GEN_INT (args_size.constant -- - original_args_size.constant)); -+ argnum = nargs - 1; - -- if (PUSH_ARGS_REVERSED) -- argnum = nargs - 1; -- else -- argnum = 0; -- - fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); - - /* Now load any reg parms into their regs. */ -@@ -4097,7 +4055,7 @@ - - /* ARGNUM indexes the ARGVEC array in the order in which the arguments - are to be pushed. */ -- for (count = 0; count < nargs; count++, argnum += inc) -+ for (count = 0; count < nargs; count++, argnum--) - { - enum machine_mode mode = argvec[argnum].mode; - rtx val = argvec[argnum].value; ---- a/src/gcc/cfgexpand.c -+++ b/src/gcc/cfgexpand.c -@@ -1292,7 +1292,12 @@ - else if (TREE_CODE (var) == VAR_DECL && DECL_HARD_REGISTER (var)) - { - if (really_expand) -- expand_one_hard_reg_var (var); -+ { -+ expand_one_hard_reg_var (var); -+ if (!DECL_HARD_REGISTER (var)) -+ /* Invalid register specification. */ -+ expand_one_error_var (var); -+ } - } - else if (use_register_for_decl (var)) - { ---- a/src/gcc/explow.c -+++ b/src/gcc/explow.c -@@ -329,11 +329,13 @@ - an address in the address space's address mode, or vice versa (TO_MODE says - which way). We take advantage of the fact that pointers are not allowed to - overflow by commuting arithmetic operations over conversions so that address -- arithmetic insns can be used. */ -+ arithmetic insns can be used. IN_CONST is true if this conversion is inside -+ a CONST. */ - --rtx --convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED, -- rtx x, addr_space_t as ATTRIBUTE_UNUSED) -+static rtx -+convert_memory_address_addr_space_1 (enum machine_mode to_mode ATTRIBUTE_UNUSED, -+ rtx x, addr_space_t as ATTRIBUTE_UNUSED, -+ bool in_const) - { - #ifndef POINTERS_EXTEND_UNSIGNED - gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode); -@@ -389,32 +391,29 @@ - - case CONST: - return gen_rtx_CONST (to_mode, -- convert_memory_address_addr_space -- (to_mode, XEXP (x, 0), as)); -+ convert_memory_address_addr_space_1 -+ (to_mode, XEXP (x, 0), as, true)); - break; - - case PLUS: - case MULT: -- /* FIXME: For addition, we used to permute the conversion and -- addition operation only if one operand is a constant and -- converting the constant does not change it or if one operand -- is a constant and we are using a ptr_extend instruction -- (POINTERS_EXTEND_UNSIGNED < 0) even if the resulting address -- may overflow/underflow. We relax the condition to include -- zero-extend (POINTERS_EXTEND_UNSIGNED > 0) since the other -- parts of the compiler depend on it. See PR 49721. -- -+ /* For addition we can safely permute the conversion and addition -+ operation if one operand is a constant and converting the constant -+ does not change it or if one operand is a constant and we are -+ using a ptr_extend instruction (POINTERS_EXTEND_UNSIGNED < 0). - We can always safely permute them if we are making the address -- narrower. */ -+ narrower. Inside a CONST RTL, this is safe for both pointers -+ zero or sign extended as pointers cannot wrap. */ - if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode) - || (GET_CODE (x) == PLUS - && CONST_INT_P (XEXP (x, 1)) -- && (POINTERS_EXTEND_UNSIGNED != 0 -- || XEXP (x, 1) == convert_memory_address_addr_space -- (to_mode, XEXP (x, 1), as)))) -+ && ((in_const && POINTERS_EXTEND_UNSIGNED != 0) -+ || XEXP (x, 1) == convert_memory_address_addr_space_1 -+ (to_mode, XEXP (x, 1), as, in_const) -+ || POINTERS_EXTEND_UNSIGNED < 0))) - return gen_rtx_fmt_ee (GET_CODE (x), to_mode, -- convert_memory_address_addr_space -- (to_mode, XEXP (x, 0), as), -+ convert_memory_address_addr_space_1 -+ (to_mode, XEXP (x, 0), as, in_const), - XEXP (x, 1)); - break; - -@@ -426,6 +425,18 @@ - x, POINTERS_EXTEND_UNSIGNED); - #endif /* defined(POINTERS_EXTEND_UNSIGNED) */ - } -+ -+/* Given X, a memory address in address space AS' pointer mode, convert it to -+ an address in the address space's address mode, or vice versa (TO_MODE says -+ which way). We take advantage of the fact that pointers are not allowed to -+ overflow by commuting arithmetic operations over conversions so that address -+ arithmetic insns can be used. */ -+ -+rtx -+convert_memory_address_addr_space (enum machine_mode to_mode, rtx x, addr_space_t as) -+{ -+ return convert_memory_address_addr_space_1 (to_mode, x, as, false); -+} - - /* Return something equivalent to X but valid as a memory address for something - of mode MODE in the named address space AS. When X is not itself valid, ---- a/src/gcc/lto/ChangeLog.linaro -+++ b/src/gcc/lto/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/po/ChangeLog.linaro -+++ b/src/gcc/po/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/varasm.c -+++ b/src/gcc/varasm.c -@@ -1335,6 +1335,11 @@ - /* As a register variable, it has no section. */ - return; - } -+ /* Avoid internal errors from invalid register -+ specifications. */ -+ SET_DECL_ASSEMBLER_NAME (decl, NULL_TREE); -+ DECL_HARD_REGISTER (decl) = 0; -+ return; - } - /* Now handle ordinary static variables and functions (in memory). - Also handle vars declared register invalidly. */ ---- a/src/gcc/sched-deps.c -+++ b/src/gcc/sched-deps.c -@@ -2828,35 +2828,42 @@ - sched_deps_info->finish_rhs (); - } - --/* Try to group comparison and the following conditional jump INSN if -- they're already adjacent. This is to prevent scheduler from scheduling -- them apart. */ -+/* Try to group two fuseable insns together to prevent scheduler -+ from scheduling them apart. */ - - static void --try_group_insn (rtx insn) -+sched_macro_fuse_insns (rtx insn) - { -- unsigned int condreg1, condreg2; -- rtx cc_reg_1; - rtx prev; - -- if (!any_condjump_p (insn)) -- return; -+ if (any_condjump_p (insn)) -+ { -+ unsigned int condreg1, condreg2; -+ rtx cc_reg_1; -+ targetm.fixed_condition_code_regs (&condreg1, &condreg2); -+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1); -+ prev = prev_nonnote_nondebug_insn (insn); -+ if (!reg_referenced_p (cc_reg_1, PATTERN (insn)) -+ || !prev -+ || !modified_in_p (cc_reg_1, prev)) -+ return; -+ } -+ else -+ { -+ rtx insn_set = single_set (insn); - -- targetm.fixed_condition_code_regs (&condreg1, &condreg2); -- cc_reg_1 = gen_rtx_REG (CCmode, condreg1); -- prev = prev_nonnote_nondebug_insn (insn); -- if (!reg_referenced_p (cc_reg_1, PATTERN (insn)) -- || !prev -- || !modified_in_p (cc_reg_1, prev)) -- return; -+ prev = prev_nonnote_nondebug_insn (insn); -+ if (!prev -+ || !insn_set -+ || !single_set (prev) -+ || !modified_in_p (SET_DEST (insn_set), prev)) -+ return; - -- /* Different microarchitectures support macro fusions for different -- combinations of insn pairs. */ -- if (!targetm.sched.macro_fusion_pair_p -- || !targetm.sched.macro_fusion_pair_p (prev, insn)) -- return; -+ } - -- SCHED_GROUP_P (insn) = 1; -+ if (targetm.sched.macro_fusion_pair_p (prev, insn)) -+ SCHED_GROUP_P (insn) = 1; -+ - } - - /* Analyze an INSN with pattern X to find all dependencies. */ -@@ -2885,7 +2892,7 @@ - /* Group compare and branch insns for macro-fusion. */ - if (targetm.sched.macro_fusion_p - && targetm.sched.macro_fusion_p ()) -- try_group_insn (insn); -+ sched_macro_fuse_insns (insn); - - if (may_trap_p (x)) - /* Avoid moving trapping instructions across function calls that might ---- a/src/gcc/var-tracking.c -+++ b/src/gcc/var-tracking.c -@@ -5997,7 +5997,8 @@ - { - cselib_val *oval = cselib_lookup (oloc, GET_MODE (oloc), 0, VOIDmode); - -- gcc_assert (oval != v); -+ if (oval == v) -+ return; - gcc_assert (REG_P (oloc) || MEM_P (oloc)); - - if (oval && !cselib_preserved_value_p (oval)) ---- a/src/gcc/system.h -+++ b/src/gcc/system.h -@@ -830,7 +830,8 @@ - CAN_DEBUG_WITHOUT_FP UNLIKELY_EXECUTED_TEXT_SECTION_NAME \ - HOT_TEXT_SECTION_NAME LEGITIMATE_CONSTANT_P ALWAYS_STRIP_DOTDOT \ - OUTPUT_ADDR_CONST_EXTRA SMALL_REGISTER_CLASSES ASM_OUTPUT_IDENT \ -- ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK -+ ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK CLEAR_BY_PIECES_P \ -+ MOVE_BY_PIECES_P SET_BY_PIECES_P STORE_BY_PIECES_P - - /* Target macros only used for code built for the target, that have - moved to libgcc-tm.h or have never been present elsewhere. */ -@@ -912,7 +913,8 @@ - USE_COMMON_FOR_ONE_ONLY IFCVT_EXTRA_FIELDS IFCVT_INIT_EXTRA_FIELDS \ - CASE_USE_BIT_TESTS FIXUNS_TRUNC_LIKE_FIX_TRUNC \ - GO_IF_MODE_DEPENDENT_ADDRESS DELAY_SLOTS_FOR_EPILOGUE \ -- ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS -+ ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS \ -+ LARGEST_EXPONENT_IS_NORNAL ROUND_TOWARDS_ZERO - - /* Hooks that are no longer used. */ - #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \ ---- a/src/gcc/config.gcc -+++ b/src/gcc/config.gcc -@@ -312,8 +312,9 @@ - aarch64*-*-*) - cpu_type=aarch64 - need_64bit_hwint=yes -- extra_headers="arm_neon.h" -+ extra_headers="arm_neon.h arm_acle.h" - extra_objs="aarch64-builtins.o aarch-common.o" -+ target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c" - target_has_targetm_common=yes - ;; - alpha*-*-*) ---- a/src/gcc/Makefile.in -+++ b/src/gcc/Makefile.in -@@ -814,10 +814,12 @@ - DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, "" - DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty - REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX] -+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev] - - BASEVER_c := $(shell cat $(BASEVER)) - DEVPHASE_c := $(shell cat $(DEVPHASE)) - DATESTAMP_c := $(shell cat $(DATESTAMP)) -+LINAROVER_c := $(shell cat $(LINAROVER)) - - ifeq (,$(wildcard $(REVISION))) - REVISION_c := -@@ -838,6 +840,7 @@ - DATESTAMP_s := "\"$(if $(DEVPHASE_c), $(DATESTAMP_c))\"" - PKGVERSION_s:= "\"@PKGVERSION@\"" - BUGURL_s := "\"@REPORT_BUGS_TO@\"" -+LINAROVER_s := "\"$(LINAROVER_c)\"" - - PKGVERSION := @PKGVERSION@ - BUGURL_TEXI := @REPORT_BUGS_TEXI@ -@@ -2542,8 +2545,9 @@ - -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \ - @TARGET_SYSTEM_ROOT_DEFINE@ - --CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) --cppbuiltin.o: $(BASEVER) -+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \ -+ -DLINAROVER=$(LINAROVER_s) -+cppbuiltin.o: $(BASEVER) $(LINAROVER) - - CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES) - -@@ -2799,8 +2803,7 @@ - gcov.texi trouble.texi bugreport.texi service.texi \ - contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ - fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ -- implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \ -- arm-acle-intrinsics.texi -+ implement-c.texi implement-cxx.texi - - # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with - # the generated tm.texi; the latter might have a more recent timestamp, ---- a/src/gcc/tree-cfg.c -+++ b/src/gcc/tree-cfg.c -@@ -2594,7 +2594,7 @@ - near its "logical" location. This is of most help to humans looking - at debugging dumps. */ - --static basic_block -+basic_block - split_edge_bb_loc (edge edge_in) - { - basic_block dest = edge_in->dest; ---- a/src/gcc/tree-cfg.h -+++ b/src/gcc/tree-cfg.h -@@ -62,6 +62,7 @@ - extern tree gimple_block_label (basic_block); - extern void add_phi_args_after_copy_bb (basic_block); - extern void add_phi_args_after_copy (basic_block *, unsigned, edge); -+extern basic_block split_edge_bb_loc (edge); - extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned, - basic_block *, bool); - extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned, ---- a/src/gcc/ree.c -+++ b/src/gcc/ree.c -@@ -794,6 +794,14 @@ - if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn))))) - return false; - -+ enum machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn))); -+ rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn))); -+ -+ /* Ensure the number of hard registers of the copy match. */ -+ if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode) -+ != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg))) -+ return false; -+ - /* There's only one reaching def. */ - rtx def_insn = state->defs_list[0]; - -@@ -843,7 +851,7 @@ - start_sequence (); - rtx pat = PATTERN (cand->insn); - rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)), -- REGNO (XEXP (SET_SRC (pat), 0))); -+ REGNO (get_extended_src_reg (SET_SRC (pat)))); - rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)), - REGNO (SET_DEST (pat))); - emit_move_insn (new_dst, new_src); ---- a/src/gcc/config/s390/s390.c -+++ b/src/gcc/config/s390/s390.c -@@ -12066,6 +12066,18 @@ - register_pass (&insert_pass_s390_early_mach); - } - -+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ -+ -+static bool -+s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int align ATTRIBUTE_UNUSED, -+ enum by_pieces_operation op ATTRIBUTE_UNUSED, -+ bool speed_p ATTRIBUTE_UNUSED) -+{ -+ return (size == 1 || size == 2 -+ || size == 4 || (TARGET_ZARCH && size == 8)); -+} -+ - /* Initialize GCC target structure. */ - - #undef TARGET_ASM_ALIGNED_HI_OP -@@ -12248,6 +12260,10 @@ - #undef TARGET_SET_UP_BY_PROLOGUE - #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue - -+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P -+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ -+ s390_use_by_pieces_infrastructure_p -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-s390.h" ---- a/src/gcc/config/s390/s390.h -+++ b/src/gcc/config/s390/s390.h -@@ -752,24 +752,6 @@ - #define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4) - #define MAX_MOVE_MAX 16 - --/* Determine whether to use move_by_pieces or block move insn. */ --#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ -- ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4 \ -- || (TARGET_ZARCH && (SIZE) == 8) ) -- --/* Determine whether to use clear_by_pieces or block clear insn. */ --#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \ -- ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4 \ -- || (TARGET_ZARCH && (SIZE) == 8) ) -- --/* This macro is used to determine whether store_by_pieces should be -- called to "memcpy" storage when the source is a constant string. */ --#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN) -- --/* Likewise to decide whether to "memset" storage with byte values -- other than zero. */ --#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN) -- - /* Don't perform CSE on function addresses. */ - #define NO_FUNCTION_CSE - ---- a/src/gcc/config/i386/i386.c -+++ b/src/gcc/config/i386/i386.c -@@ -25796,6 +25796,9 @@ - rtx compare_set = NULL_RTX, test_if, cond; - rtx alu_set = NULL_RTX, addr = NULL_RTX; - -+ if (!any_condjump_p (condjmp)) -+ return false; -+ - if (get_attr_type (condgen) != TYPE_TEST - && get_attr_type (condgen) != TYPE_ICMP - && get_attr_type (condgen) != TYPE_INCDEC ---- a/src/gcc/config/sh/sh.c -+++ b/src/gcc/config/sh/sh.c -@@ -317,6 +317,10 @@ - static bool sh_legitimate_constant_p (enum machine_mode, rtx); - static int mov_insn_size (enum machine_mode, bool); - static int mov_insn_alignment_mask (enum machine_mode, bool); -+static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, -+ unsigned int, -+ enum by_pieces_operation, -+ bool); - static bool sequence_insn_p (rtx); - static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); - static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, -@@ -601,6 +605,10 @@ - #undef TARGET_FIXED_CONDITION_CODE_REGS - #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs - -+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P -+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ -+ sh_use_by_pieces_infrastructure_p -+ - /* Machine-specific symbol_ref flags. */ - #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) - -@@ -13533,4 +13541,27 @@ - return NULL_RTX; - } - -+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ -+ -+static bool -+sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int align, -+ enum by_pieces_operation op, -+ bool speed_p) -+{ -+ switch (op) -+ { -+ case MOVE_BY_PIECES: -+ return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1) -+ < (!speed_p ? 2 : (align >= 32) ? 16 : 2); -+ case STORE_BY_PIECES: -+ case SET_BY_PIECES: -+ return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1) -+ < (!speed_p ? 2 : (align >= 32) ? 16 : 2); -+ default: -+ return default_use_by_pieces_infrastructure_p (size, align, -+ op, speed_p); -+ } -+} -+ - #include "gt-sh.h" ---- a/src/gcc/config/sh/sh.h -+++ b/src/gcc/config/sh/sh.h -@@ -1584,16 +1584,6 @@ - #define USE_STORE_PRE_DECREMENT(mode) ((mode == SImode || mode == DImode) \ - ? 0 : TARGET_SH1) - --#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ -- < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2))) -- --#define STORE_BY_PIECES_P(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ -- < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2))) -- --#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN) -- - /* Macros to check register numbers against specific register classes. */ - - /* These assume that REGNO is a hard or pseudo reg number. ---- a/src/gcc/config/host-linux.c -+++ b/src/gcc/config/host-linux.c -@@ -86,6 +86,8 @@ - # define TRY_EMPTY_VM_SPACE 0x60000000 - #elif defined(__mc68000__) - # define TRY_EMPTY_VM_SPACE 0x40000000 -+#elif defined(__aarch64__) && defined(__ILP32__) -+# define TRY_EMPTY_VM_SPACE 0x60000000 - #elif defined(__aarch64__) - # define TRY_EMPTY_VM_SPACE 0x1000000000 - #elif defined(__ARM_EABI__) ---- a/src/gcc/config/cris/cris.h -+++ b/src/gcc/config/cris/cris.h -@@ -80,15 +80,7 @@ - /* Which CPU version this is. The parsed and adjusted cris_cpu_str. */ - extern int cris_cpu_version; - --/* Changing the order used to be necessary to put the fourth __make_dp -- argument (a DImode parameter) in registers, to fit with the libfunc -- parameter passing scheme used for intrinsic functions. FIXME: Check -- performance. */ --#ifdef IN_LIBGCC2 --#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c) --#endif - -- - /* Node: Driver */ - - /* Also provide canonical vN definitions when user specifies an alias. */ ---- a/src/gcc/config/aarch64/geniterators.sh -+++ b/src/gcc/config/aarch64/geniterators.sh -@@ -0,0 +1,45 @@ -+#!/bin/sh -+# -+# Copyright (C) 2014 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. -+ -+# Generate aarch64-builtin-iterators.h, a file containing a series of -+# BUILTIN_<ITERATOR> macros, which expand to VAR<N> Macros covering the -+# same set of modes as the iterator in iterators.md -+ -+echo "/* -*- buffer-read-only: t -*- */" -+echo "/* Generated automatically by geniterators.sh from iterators.md. */" -+echo "#ifndef GCC_AARCH64_ITERATORS_H" -+echo "#define GCC_AARCH64_ITERATORS_H" -+ -+# Strip newlines, create records marked ITERATOR, and strip junk (anything -+# which does not have a matching brace because it contains characters we -+# don't want to or can't handle (e.g P, PTR iterators change depending on -+# Pmode and ptr_mode). -+cat $1 | tr "\n" " " \ -+ | sed 's/(define_mode_iterator \([A-Za-z0-9_]*\) \([]\[A-Z0-9 \t]*\)/\n#define BUILTIN_\1(T, N, MAP) \\ \2\n/g' \ -+ | grep '#define [A-Z0-9_(), \\]* \[[A-Z0-9[:space:]]*]' \ -+ | sed 's/\t//g' \ -+ | sed 's/ \+/ /g' \ -+ | sed 's/ \[\([A-Z0-9 ]*\)]/\n\L\1/' \ -+ | awk ' BEGIN { FS = " " ; OFS = ", "} \ -+ /#/ { print } \ -+ ! /#/ { $1 = $1 ; printf " VAR%d (T, N, MAP, %s)\n", NF, $0 }' -+ -+echo "#endif /* GCC_AARCH64_ITERATORS_H */" ---- a/src/gcc/config/aarch64/aarch64-simd.md -+++ b/src/gcc/config/aarch64/aarch64-simd.md -@@ -19,8 +19,8 @@ - ;; <http://www.gnu.org/licenses/>. - - (define_expand "mov<mode>" -- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") -- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] -+ [(set (match_operand:VALL 0 "nonimmediate_operand" "") -+ (match_operand:VALL 1 "general_operand" ""))] - "TARGET_SIMD" - " - if (GET_CODE (operands[0]) == MEM) -@@ -29,8 +29,8 @@ - ) - - (define_expand "movmisalign<mode>" -- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") -- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] -+ [(set (match_operand:VALL 0 "nonimmediate_operand" "") -+ (match_operand:VALL 1 "general_operand" ""))] - "TARGET_SIMD" - { - /* This pattern is not permitted to fail during expansion: if both arguments -@@ -91,9 +91,9 @@ - ) - - (define_insn "*aarch64_simd_mov<mode>" -- [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" -+ [(set (match_operand:VD 0 "nonimmediate_operand" - "=w, m, w, ?r, ?w, ?r, w") -- (match_operand:VD 1 "aarch64_simd_general_operand" -+ (match_operand:VD 1 "general_operand" - "m, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], <MODE>mode) -@@ -119,9 +119,9 @@ - ) - - (define_insn "*aarch64_simd_mov<mode>" -- [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" -+ [(set (match_operand:VQ 0 "nonimmediate_operand" - "=w, m, w, ?r, ?w, ?r, w") -- (match_operand:VQ 1 "aarch64_simd_general_operand" -+ (match_operand:VQ 1 "general_operand" - "m, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], <MODE>mode) -@@ -286,6 +286,23 @@ - [(set_attr "type" "neon_mul_<Vetype><q>")] - ) - -+(define_insn "bswap<mode>" -+ [(set (match_operand:VDQHSD 0 "register_operand" "=w") -+ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] -+ "TARGET_SIMD" -+ "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" -+ [(set_attr "type" "neon_rev<q>")] -+) -+ -+(define_insn "aarch64_rbit<mode>" -+ [(set (match_operand:VB 0 "register_operand" "=w") -+ (unspec:VB [(match_operand:VB 1 "register_operand" "w")] -+ UNSPEC_RBIT))] -+ "TARGET_SIMD" -+ "rbit\\t%0.<Vbtype>, %1.<Vbtype>" -+ [(set_attr "type" "neon_rbit")] -+) -+ - (define_insn "*aarch64_mul3_elt<mode>" - [(set (match_operand:VMUL 0 "register_operand" "=w") - (mult:VMUL -@@ -954,7 +971,7 @@ - dup\\t%d0, %1.d[0] - fmov\\t%d0, %1 - dup\\t%d0, %1" -- [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>") -+ [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") - (set_attr "simd" "yes,*,yes") - (set_attr "fp" "*,yes,*") - (set_attr "length" "4")] -@@ -1046,7 +1063,7 @@ - (match_operand:<VHALF> 1 "register_operand" "w,r") - (vec_select:<VHALF> - (match_dup 0) -- (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))] -+ (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - ins\\t%0.d[1], %1.d[0] -@@ -1059,7 +1076,7 @@ - (match_operand:<VHALF> 1 "register_operand" "")] - "TARGET_SIMD" - { -- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN); -+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], - operands[1], p)); -@@ -1099,7 +1116,7 @@ - ;; For quads. - - (define_insn "vec_pack_trunc_<mode>" -- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w") -+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] -@@ -1541,7 +1558,7 @@ - ) - - ;; Vector versions of the floating-point frint patterns. --;; Expands to btrunc, ceil, floor, nearbyint, rint, round. -+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - (define_insn "<frint_pattern><mode>2" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] -@@ -1853,15 +1870,15 @@ - ;; bif op0, op1, mask - - (define_insn "aarch64_simd_bsl<mode>_internal" -- [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w") -- (ior:VALLDIF -- (and:VALLDIF -- (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w") -- (match_operand:VALLDIF 2 "register_operand" " w,w,0")) -- (and:VALLDIF -+ [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") -+ (ior:VSDQ_I_DI -+ (and:VSDQ_I_DI - (not:<V_cmp_result> -- (match_dup:<V_cmp_result> 1)) -- (match_operand:VALLDIF 3 "register_operand" " w,0,w")) -+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")) -+ (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w")) -+ (and:VSDQ_I_DI -+ (match_dup:<V_cmp_result> 1) -+ (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0")) - ))] - "TARGET_SIMD" - "@ -@@ -1879,9 +1896,21 @@ - "TARGET_SIMD" - { - /* We can't alias operands together if they have different modes. */ -+ rtx tmp = operands[0]; -+ if (FLOAT_MODE_P (<MODE>mode)) -+ { -+ operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]); -+ operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]); -+ tmp = gen_reg_rtx (<V_cmp_result>mode); -+ } - operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); -- emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1], -- operands[2], operands[3])); -+ emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp, -+ operands[1], -+ operands[2], -+ operands[3])); -+ if (tmp != operands[0]) -+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); -+ - DONE; - }) - -@@ -1895,58 +1924,94 @@ - (match_operand:VDQ 2 "nonmemory_operand")))] - "TARGET_SIMD" - { -- int inverse = 0, has_zero_imm_form = 0; - rtx op1 = operands[1]; - rtx op2 = operands[2]; - rtx mask = gen_reg_rtx (<MODE>mode); -+ enum rtx_code code = GET_CODE (operands[3]); - -- switch (GET_CODE (operands[3])) -+ /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn), -+ and desirable for other comparisons if it results in FOO ? -1 : 0 -+ (this allows direct use of the comparison result without a bsl). */ -+ if (code == NE -+ || (code != EQ -+ && op1 == CONST0_RTX (<V_cmp_result>mode) -+ && op2 == CONSTM1_RTX (<V_cmp_result>mode))) - { -+ op1 = operands[2]; -+ op2 = operands[1]; -+ switch (code) -+ { -+ case LE: code = GT; break; -+ case LT: code = GE; break; -+ case GE: code = LT; break; -+ case GT: code = LE; break; -+ /* No case EQ. */ -+ case NE: code = EQ; break; -+ case LTU: code = GEU; break; -+ case LEU: code = GTU; break; -+ case GTU: code = LEU; break; -+ case GEU: code = LTU; break; -+ default: gcc_unreachable (); -+ } -+ } -+ -+ /* Make sure we can handle the last operand. */ -+ switch (code) -+ { -+ case NE: -+ /* Normalized to EQ above. */ -+ gcc_unreachable (); -+ - case LE: - case LT: -- case NE: -- inverse = 1; -- /* Fall through. */ - case GE: - case GT: - case EQ: -- has_zero_imm_form = 1; -- break; -- case LEU: -- case LTU: -- inverse = 1; -- break; -+ /* These instructions have a form taking an immediate zero. */ -+ if (operands[5] == CONST0_RTX (<MODE>mode)) -+ break; -+ /* Fall through, as may need to load into register. */ - default: -+ if (!REG_P (operands[5])) -+ operands[5] = force_reg (<MODE>mode, operands[5]); - break; - } - -- if (!REG_P (operands[5]) -- && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form)) -- operands[5] = force_reg (<MODE>mode, operands[5]); -- -- switch (GET_CODE (operands[3])) -+ switch (code) - { - case LT: -+ emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5])); -+ break; -+ - case GE: - emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); - break; - - case LE: -+ emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5])); -+ break; -+ - case GT: - emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); - break; - - case LTU: -+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4])); -+ break; -+ - case GEU: - emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); - break; - - case LEU: -+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4])); -+ break; -+ - case GTU: - emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); - break; - -- case NE: -+ /* NE has been normalized to EQ above. */ - case EQ: - emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); - break; -@@ -1955,12 +2020,6 @@ - gcc_unreachable (); - } - -- if (inverse) -- { -- op1 = operands[2]; -- op2 = operands[1]; -- } -- - /* If we have (a = (b CMP c) ? -1 : 0); - Then we can simply move the generated mask. */ - -@@ -2348,6 +2407,15 @@ - DONE; - }) - -+(define_expand "aarch64_reinterpretdf<mode>" -+ [(match_operand:DF 0 "register_operand" "") -+ (match_operand:VD_RE 1 "register_operand" "")] -+ "TARGET_SIMD" -+{ -+ aarch64_simd_reinterpret (operands[0], operands[1]); -+ DONE; -+}) -+ - (define_expand "aarch64_reinterpretv16qi<mode>" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:VQ 1 "register_operand" "")] -@@ -2734,9 +2802,9 @@ - ;; <su>q<absneg> - - (define_insn "aarch64_s<optab><mode>" -- [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") -- (UNQOPS:VSDQ_I_BHSI -- (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] -+ [(set (match_operand:VSDQ_I 0 "register_operand" "=w") -+ (UNQOPS:VSDQ_I -+ (match_operand:VSDQ_I 1 "register_operand" "w")))] - "TARGET_SIMD" - "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" - [(set_attr "type" "neon_<optab><q>")] -@@ -3788,26 +3856,46 @@ - ))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> -- cm<optab>\t%d0, %d1, #0 -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (COMPARISONS:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero") -+ )))] - { -- enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); -- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); -- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); -+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); -+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } - [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] - ) - -+(define_insn "*aarch64_cm<optab>di" -+ [(set (match_operand:DI 0 "register_operand" "=w,w") -+ (neg:DI -+ (COMPARISONS:DI -+ (match_operand:DI 1 "register_operand" "w,w") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") -+ )))] -+ "TARGET_SIMD && reload_completed" -+ "@ -+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> -+ cm<optab>\t%d0, %d1, #0" -+ [(set_attr "type" "neon_compare, neon_compare_zero")] -+) -+ - ;; cm(hs|hi) - - (define_insn "aarch64_cm<optab><mode>" -@@ -3831,35 +3919,62 @@ - ))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (UCOMPARISONS:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero") -+ )))] - { -- enum machine_mode mode = CCmode; -- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); -- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ enum machine_mode mode = CCmode; -+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); -+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } -- [(set_attr "type" "neon_compare, neon_compare_zero")] -+ [(set_attr "type" "neon_compare,multiple")] - ) - -+(define_insn "*aarch64_cm<optab>di" -+ [(set (match_operand:DI 0 "register_operand" "=w") -+ (neg:DI -+ (UCOMPARISONS:DI -+ (match_operand:DI 1 "register_operand" "w") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") -+ )))] -+ "TARGET_SIMD && reload_completed" -+ "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" -+ [(set_attr "type" "neon_compare")] -+) -+ - ;; cmtst - -+;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, -+;; we don't have any insns using ne, and aarch64_vcond_internal outputs -+;; not (neg (eq (and x y) 0)) -+;; which is rewritten by simplify_rtx as -+;; plus (eq (and x y) 0) -1. -+ - (define_insn "aarch64_cmtst<mode>" - [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") -- (neg:<V_cmp_result> -- (ne:<V_cmp_result> -+ (plus:<V_cmp_result> -+ (eq:<V_cmp_result> - (and:VDQ - (match_operand:VDQ 1 "register_operand" "w") - (match_operand:VDQ 2 "register_operand" "w")) -- (vec_duplicate:<V_cmp_result> (const_int 0)))))] -+ (match_operand:VDQ 3 "aarch64_simd_imm_zero")) -+ (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one"))) -+ ] - "TARGET_SIMD" - "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" - [(set_attr "type" "neon_tst<q>")] -@@ -3875,23 +3990,44 @@ - (const_int 0)))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cmtst\t%d0, %d1, %d2 -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (ne:DI -+ (and:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "register_operand")) -+ (const_int 0))))] - { -- rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); -- enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); -- rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); -- rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); -+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); -+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); -+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } -+ [(set_attr "type" "neon_tst,multiple")] -+) -+ -+(define_insn "*aarch64_cmtstdi" -+ [(set (match_operand:DI 0 "register_operand" "=w") -+ (neg:DI -+ (ne:DI -+ (and:DI -+ (match_operand:DI 1 "register_operand" "w") -+ (match_operand:DI 2 "register_operand" "w")) -+ (const_int 0))))] -+ "TARGET_SIMD" -+ "cmtst\t%d0, %d1, %d2" - [(set_attr "type" "neon_tst")] - ) - -@@ -3972,6 +4108,16 @@ - [(set_attr "type" "neon_load2_2reg<q>")] - ) - -+(define_insn "aarch64_simd_ld2r<mode>" -+ [(set (match_operand:OI 0 "register_operand" "=w") -+ (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] -+ UNSPEC_LD2_DUP))] -+ "TARGET_SIMD" -+ "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load2_all_lanes<q>")] -+) -+ - (define_insn "vec_store_lanesoi<mode>" - [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") - (unspec:OI [(match_operand:OI 1 "register_operand" "w") -@@ -3982,6 +4128,17 @@ - [(set_attr "type" "neon_store2_2reg<q>")] - ) - -+(define_insn "vec_store_lanesoi_lane<mode>" -+ [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST2_LANE))] -+ "TARGET_SIMD" -+ "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0" -+ [(set_attr "type" "neon_store3_one_lane<q>")] -+) -+ - (define_insn "vec_load_lanesci<mode>" - [(set (match_operand:CI 0 "register_operand" "=w") - (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") -@@ -3992,6 +4149,16 @@ - [(set_attr "type" "neon_load3_3reg<q>")] - ) - -+(define_insn "aarch64_simd_ld3r<mode>" -+ [(set (match_operand:CI 0 "register_operand" "=w") -+ (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] -+ UNSPEC_LD3_DUP))] -+ "TARGET_SIMD" -+ "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load3_all_lanes<q>")] -+) -+ - (define_insn "vec_store_lanesci<mode>" - [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") - (unspec:CI [(match_operand:CI 1 "register_operand" "w") -@@ -4002,6 +4169,17 @@ - [(set_attr "type" "neon_store3_3reg<q>")] - ) - -+(define_insn "vec_store_lanesci_lane<mode>" -+ [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST3_LANE))] -+ "TARGET_SIMD" -+ "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0" -+ [(set_attr "type" "neon_store3_one_lane<q>")] -+) -+ - (define_insn "vec_load_lanesxi<mode>" - [(set (match_operand:XI 0 "register_operand" "=w") - (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") -@@ -4012,6 +4190,16 @@ - [(set_attr "type" "neon_load4_4reg<q>")] - ) - -+(define_insn "aarch64_simd_ld4r<mode>" -+ [(set (match_operand:XI 0 "register_operand" "=w") -+ (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] -+ UNSPEC_LD4_DUP))] -+ "TARGET_SIMD" -+ "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load4_all_lanes<q>")] -+) -+ - (define_insn "vec_store_lanesxi<mode>" - [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") - (unspec:XI [(match_operand:XI 1 "register_operand" "w") -@@ -4022,6 +4210,17 @@ - [(set_attr "type" "neon_store4_4reg<q>")] - ) - -+(define_insn "vec_store_lanesxi_lane<mode>" -+ [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST4_LANE))] -+ "TARGET_SIMD" -+ "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0" -+ [(set_attr "type" "neon_store4_one_lane<q>")] -+) -+ - ;; Reload patterns for AdvSIMD register list operands. - - (define_expand "mov<mode>" -@@ -4141,6 +4340,45 @@ - aarch64_simd_disambiguate_copy (operands, dest, src, 4); - }) - -+(define_expand "aarch64_ld2r<mode>" -+ [(match_operand:OI 0 "register_operand" "=w") -+ (match_operand:DI 1 "register_operand" "w") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_TWO_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[1]); -+ -+ emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem)); -+ DONE; -+}) -+ -+(define_expand "aarch64_ld3r<mode>" -+ [(match_operand:CI 0 "register_operand" "=w") -+ (match_operand:DI 1 "register_operand" "w") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_THREE_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[1]); -+ -+ emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem)); -+ DONE; -+}) -+ -+(define_expand "aarch64_ld4r<mode>" -+ [(match_operand:XI 0 "register_operand" "=w") -+ (match_operand:DI 1 "register_operand" "w") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_FOUR_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[1]); -+ -+ emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem)); -+ DONE; -+}) -+ - (define_insn "aarch64_ld2<mode>_dreg" - [(set (match_operand:OI 0 "register_operand" "=w") - (subreg:OI -@@ -4375,7 +4613,7 @@ - (match_operand:VB 1 "register_operand") - (match_operand:VB 2 "register_operand") - (match_operand:VB 3 "register_operand")] -- "TARGET_SIMD && !BYTES_BIG_ENDIAN" -+ "TARGET_SIMD" - { - aarch64_expand_vec_perm (operands[0], operands[1], - operands[2], operands[3]); -@@ -4430,6 +4668,44 @@ - [(set_attr "type" "neon_permute<q>")] - ) - -+;; Note immediate (third) operand is lane index not byte index. -+(define_insn "aarch64_ext<mode>" -+ [(set (match_operand:VALL 0 "register_operand" "=w") -+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") -+ (match_operand:VALL 2 "register_operand" "w") -+ (match_operand:SI 3 "immediate_operand" "i")] -+ UNSPEC_EXT))] -+ "TARGET_SIMD" -+{ -+ operands[3] = GEN_INT (INTVAL (operands[3]) -+ * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode))); -+ return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; -+} -+ [(set_attr "type" "neon_ext<q>")] -+) -+ -+;; This exists solely to check the arguments to the corresponding __builtin. -+;; Used where we want an error for out-of-range indices which would otherwise -+;; be silently wrapped (e.g. the mask to a __builtin_shuffle). -+(define_expand "aarch64_im_lane_boundsi" -+ [(match_operand:SI 0 "immediate_operand" "i") -+ (match_operand:SI 1 "immediate_operand" "i")] -+ "TARGET_SIMD" -+{ -+ aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1])); -+ DONE; -+} -+) -+ -+(define_insn "aarch64_rev<REVERSE:rev_op><mode>" -+ [(set (match_operand:VALL 0 "register_operand" "=w") -+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] -+ REVERSE))] -+ "TARGET_SIMD" -+ "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" -+ [(set_attr "type" "neon_rev<q>")] -+) -+ - (define_insn "aarch64_st2<mode>_dreg" - [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") - (unspec:TI [(match_operand:OI 1 "register_operand" "w") -@@ -4516,6 +4792,57 @@ - DONE; - }) - -+(define_expand "aarch64_st2_lane<VQ:mode>" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:OI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_TWO_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) -+ -+(define_expand "aarch64_st3_lane<VQ:mode>" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:CI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_THREE_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) -+ -+(define_expand "aarch64_st4_lane<VQ:mode>" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:XI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" -+{ -+ enum machine_mode mode = <V_FOUR_ELEM>mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) -+ - (define_expand "aarch64_st1<VALL:mode>" - [(match_operand:DI 0 "register_operand") - (match_operand:VALL 1 "register_operand")] ---- a/src/gcc/config/aarch64/predicates.md -+++ b/src/gcc/config/aarch64/predicates.md -@@ -26,6 +26,10 @@ - && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) - ) - -+(define_predicate "aarch64_call_insn_operand" -+ (ior (match_code "symbol_ref") -+ (match_operand 0 "register_operand"))) -+ - (define_predicate "aarch64_simd_register" - (and (match_code "reg") - (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") -@@ -119,6 +123,10 @@ - (match_test "INTVAL (op) != 0 - && (unsigned) exact_log2 (INTVAL (op)) < 64"))) - -+(define_predicate "aarch64_mem_pair_offset" -+ (and (match_code "const_int") -+ (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))"))) -+ - (define_predicate "aarch64_mem_pair_operand" - (and (match_code "mem") - (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, -@@ -194,6 +202,18 @@ - (define_special_predicate "aarch64_comparison_operator" - (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt")) - -+(define_special_predicate "aarch64_comparison_operation" -+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt") -+{ -+ if (XEXP (op, 1) != const0_rtx) -+ return false; -+ rtx op0 = XEXP (op, 0); -+ if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) -+ return false; -+ return aarch64_get_condition_code (op) >= 0; -+}) -+ -+ - ;; True if the operand is memory reference suitable for a load/store exclusive. - (define_predicate "aarch64_sync_memory_operand" - (and (match_operand 0 "memory_operand") -@@ -203,62 +223,15 @@ - (define_special_predicate "vect_par_cnst_hi_half" - (match_code "parallel") - { -- HOST_WIDE_INT count = XVECLEN (op, 0); -- int nunits = GET_MODE_NUNITS (mode); -- int i; -- -- if (count < 1 -- || count != nunits / 2) -- return false; -- -- if (!VECTOR_MODE_P (mode)) -- return false; -- -- for (i = 0; i < count; i++) -- { -- rtx elt = XVECEXP (op, 0, i); -- int val; -- -- if (GET_CODE (elt) != CONST_INT) -- return false; -- -- val = INTVAL (elt); -- if (val != (nunits / 2) + i) -- return false; -- } -- return true; -+ return aarch64_simd_check_vect_par_cnst_half (op, mode, true); - }) - - (define_special_predicate "vect_par_cnst_lo_half" - (match_code "parallel") - { -- HOST_WIDE_INT count = XVECLEN (op, 0); -- int nunits = GET_MODE_NUNITS (mode); -- int i; -- -- if (count < 1 -- || count != nunits / 2) -- return false; -- -- if (!VECTOR_MODE_P (mode)) -- return false; -- -- for (i = 0; i < count; i++) -- { -- rtx elt = XVECEXP (op, 0, i); -- int val; -- -- if (GET_CODE (elt) != CONST_INT) -- return false; -- -- val = INTVAL (elt); -- if (val != i) -- return false; -- } -- return true; -+ return aarch64_simd_check_vect_par_cnst_half (op, mode, false); - }) - -- - (define_special_predicate "aarch64_simd_lshift_imm" - (match_code "const_vector") - { -@@ -300,3 +273,9 @@ - { - return aarch64_simd_imm_zero_p (op, mode); - }) -+ -+(define_special_predicate "aarch64_simd_imm_minus_one" -+ (match_code "const_vector") -+{ -+ return aarch64_const_vec_all_same_int_p (op, -1); -+}) ---- a/src/gcc/config/aarch64/arm_neon.h -+++ b/src/gcc/config/aarch64/arm_neon.h -@@ -2113,29 +2113,26 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqadd_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqadd_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqadd_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __builtin_aarch64_uqaddv2si_uuu (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqadd_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a, -- (int64x1_t) __b); -+ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, -+ (uint64_t) __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -2165,29 +2162,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return __builtin_aarch64_uqaddv4si_uuu (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return __builtin_aarch64_uqaddv2di_uuu (__a, __b); - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -@@ -2217,29 +2210,26 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqsub_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqsub_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqsub_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __builtin_aarch64_uqsubv2si_uuu (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqsub_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a, -- (int64x1_t) __b); -+ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, -+ (uint64_t) __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -2269,29 +2259,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return __builtin_aarch64_uqsubv4si_uuu (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return __builtin_aarch64_uqsubv2di_uuu (__a, __b); - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -@@ -2312,6 +2298,12 @@ - return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); - } - -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vqneg_s64 (int64x1_t __a) -+{ -+ return __builtin_aarch64_sqnegdi (__a); -+} -+ - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vqnegq_s8 (int8x16_t __a) - { -@@ -2348,6 +2340,12 @@ - return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); - } - -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vqabs_s64 (int64x1_t __a) -+{ -+ return __builtin_aarch64_sqabsdi (__a); -+} -+ - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vqabsq_s8 (int8x16_t __a) - { -@@ -2637,1352 +2635,1587 @@ - /* vreinterpret */ - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_p8_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv8qidf_ps (__a); -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s8 (int8x8_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s16 (int16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s32 (int32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s64 (int64x1_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_f32 (float32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u8 (uint8x8_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u16 (uint16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u32 (uint32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u64 (uint64x1_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_p16 (poly16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_p8_f64 (float64x2_t __a) -+{ -+ return (poly8x16_t) __a; -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s8 (int8x16_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s16 (int16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s32 (int32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s64 (int64x2_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_f32 (float32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u8 (uint8x16_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u16 (uint16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u32 (uint32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u64 (uint64x2_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_p16 (poly16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_p16_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv4hidf_ps (__a); -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s8 (int8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s16 (int16x4_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s32 (int32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s64 (int64x1_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_f32 (float32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u8 (uint8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u16 (uint16x4_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u32 (uint32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u64 (uint64x1_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_p8 (poly8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_p16_f64 (float64x2_t __a) -+{ -+ return (poly16x8_t) __a; -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s8 (int8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s16 (int16x8_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s32 (int32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s64 (int64x2_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_f32 (float32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u8 (uint8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u16 (uint16x8_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u32 (uint32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u64 (uint64x2_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_p8 (poly8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_f32_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv2sfdf (__a); -+} -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s8 (int8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s16 (int16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s32 (int32x2_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s64 (int64x1_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u8 (uint8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u16 (uint16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u32 (uint32x2_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u64 (uint64x1_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_p8 (poly8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_p16 (poly16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_f32_f64 (float64x2_t __a) -+{ -+ return (float32x4_t) __a; -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s8 (int8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s16 (int16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s32 (int32x4_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s64 (int64x2_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u8 (uint8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u16 (uint16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u32 (uint32x4_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u64 (uint64x2_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_p8 (poly8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_p16 (poly16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) -- __a); -+ return (float32x4_t) __a; - } - -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv2sf (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_p8 (poly8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi_sp (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_p16 (poly16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi_sp (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s8 (int8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s16 (int16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s32 (int32x2_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv2si (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s64 (int64x1_t __a) -+{ -+ return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0)); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u8 (uint8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi_su (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u16 (uint16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi_su (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u32 (uint32x2_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv2si_su (__a); -+} -+ -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u64 (uint64x1_t __a) -+{ -+ return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0)); -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_f32 (float32x4_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_p8 (poly8x16_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_p16 (poly16x8_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s8 (int8x16_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s16 (int16x8_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s32 (int32x4_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s64 (int64x2_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u8 (uint8x16_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u16 (uint16x8_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u32 (uint32x4_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u64 (uint64x2_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vreinterpret_s64_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretdidf (__a); -+} -+ -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s8 (int8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s16 (int16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s32 (int32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_f32 (float32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u8 (uint8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u16 (uint16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u32 (uint32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u64 (uint64x1_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_p8 (poly8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_p16 (poly16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vreinterpretq_s64_f64 (float64x2_t __a) -+{ -+ return (int64x2_t) __a; -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s8 (int8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s16 (int16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s32 (int32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_f32 (float32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u8 (uint8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u16 (uint16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u32 (uint32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u64 (uint64x2_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_p8 (poly8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_p16 (poly16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+vreinterpret_u64_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretdidf_us (__a); -+} -+ -+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s8 (int8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s16 (int16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s32 (int32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s64 (int64x1_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_f32 (float32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u8 (uint8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u16 (uint16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u32 (uint32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_p8 (poly8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_p16 (poly16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vreinterpretq_u64_f64 (float64x2_t __a) -+{ -+ return (uint64x2_t) __a; -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s8 (int8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s16 (int16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s32 (int32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s64 (int64x2_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_f32 (float32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u8 (uint8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) -- __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u16 (uint16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u32 (uint32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_p8 (poly8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) -- __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_p16 (poly16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_s8_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv8qidf (__a); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s16 (int16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s32 (int32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s64 (int64x1_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_f32 (float32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u8 (uint8x8_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u16 (uint16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u32 (uint32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u64 (uint64x1_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_p8 (poly8x8_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_p16 (poly16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_s8_f64 (float64x2_t __a) -+{ -+ return (int8x16_t) __a; -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s16 (int16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s32 (int32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s64 (int64x2_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_f32 (float32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u8 (uint8x16_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u16 (uint16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u32 (uint32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u64 (uint64x2_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_p8 (poly8x16_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_p16 (poly16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_s16_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv4hidf (__a); -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s8 (int8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s32 (int32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s64 (int64x1_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_f32 (float32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u8 (uint8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u16 (uint16x4_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u32 (uint32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u64 (uint64x1_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_p8 (poly8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_p16 (poly16x4_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_s16_f64 (float64x2_t __a) -+{ -+ return (int16x8_t) __a; -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s8 (int8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s32 (int32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s64 (int64x2_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_f32 (float32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u8 (uint8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u16 (uint16x8_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u32 (uint32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u64 (uint64x2_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_p8 (poly8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_p16 (poly16x8_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_s32_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv2sidf (__a); -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s8 (int8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s16 (int16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s64 (int64x1_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_f32 (float32x2_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u8 (uint8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u16 (uint16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u32 (uint32x2_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u64 (uint64x1_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_p8 (poly8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_p16 (poly16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_s32_f64 (float64x2_t __a) -+{ -+ return (int32x4_t) __a; -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s8 (int8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s16 (int16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s64 (int64x2_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_f32 (float32x4_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u8 (uint8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u16 (uint16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u32 (uint32x4_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u64 (uint64x2_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_p8 (poly8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_p16 (poly16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_u8_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv8qidf_us (__a); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s8 (int8x8_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s16 (int16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s32 (int32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s64 (int64x1_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_f32 (float32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u16 (uint16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u32 (uint32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u64 (uint64x1_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_p8 (poly8x8_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_p16 (poly16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_u8_f64 (float64x2_t __a) -+{ -+ return (uint8x16_t) __a; -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s8 (int8x16_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s16 (int16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s32 (int32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s64 (int64x2_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_f32 (float32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u16 (uint16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u32 (uint32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u64 (uint64x2_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_p8 (poly8x16_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_p16 (poly16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_u16_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv4hidf_us (__a); -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s8 (int8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s16 (int16x4_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s32 (int32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s64 (int64x1_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_f32 (float32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u8 (uint8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u32 (uint32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u64 (uint64x1_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_p8 (poly8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_p16 (poly16x4_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_u16_f64 (float64x2_t __a) -+{ -+ return (uint16x8_t) __a; -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s8 (int8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s16 (int16x8_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s32 (int32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s64 (int64x2_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_f32 (float32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u8 (uint8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u32 (uint32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u64 (uint64x2_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_p8 (poly8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_p16 (poly16x8_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_u32_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_reinterpretv2sidf_us (__a); -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s8 (int8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s16 (int16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s32 (int32x2_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s64 (int64x1_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_f32 (float32x2_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u8 (uint8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u16 (uint16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u64 (uint64x1_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_p8 (poly8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_p16 (poly16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_u32_f64 (float64x2_t __a) -+{ -+ return (uint32x4_t) __a; -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s8 (int8x16_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s16 (int16x8_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s32 (int32x4_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s64 (int64x2_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_f32 (float32x4_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_u8 (uint8x16_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) -- __a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_u16 (uint16x8_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_u64 (uint64x2_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_p8 (poly8x16_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) -- __a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_p16 (poly16x8_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (uint32x4_t) __a; - } - - #define __GET_LOW(__TYPE) \ -@@ -4064,6 +4297,85 @@ - - #undef __GET_LOW - -+#define __GET_HIGH(__TYPE) \ -+ uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ -+ uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ -+ return vreinterpret_##__TYPE##_u64 (hi); -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vget_high_f32 (float32x4_t __a) -+{ -+ __GET_HIGH (f32); -+} -+ -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vget_high_f64 (float64x2_t __a) -+{ -+ __GET_HIGH (f64); -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vget_high_p8 (poly8x16_t __a) -+{ -+ __GET_HIGH (p8); -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vget_high_p16 (poly16x8_t __a) -+{ -+ __GET_HIGH (p16); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vget_high_s8 (int8x16_t __a) -+{ -+ __GET_HIGH (s8); -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vget_high_s16 (int16x8_t __a) -+{ -+ __GET_HIGH (s16); -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vget_high_s32 (int32x4_t __a) -+{ -+ __GET_HIGH (s32); -+} -+ -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vget_high_s64 (int64x2_t __a) -+{ -+ __GET_HIGH (s64); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vget_high_u8 (uint8x16_t __a) -+{ -+ __GET_HIGH (u8); -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vget_high_u16 (uint16x8_t __a) -+{ -+ __GET_HIGH (u16); -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vget_high_u32 (uint32x4_t __a) -+{ -+ __GET_HIGH (u32); -+} -+ -+#undef __GET_HIGH -+ -+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+vget_high_u64 (uint64x2_t __a) -+{ -+ return vcreate_u64 (vgetq_lane_u64 (__a, 1)); -+} -+ - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vcombine_s8 (int8x8_t __a, int8x8_t __b) - { -@@ -5408,318 +5720,6 @@ - return result; - } - --#define vext_f32(a, b, c) \ -- __extension__ \ -- ({ \ -- float32x2_t b_ = (b); \ -- float32x2_t a_ = (a); \ -- float32x2_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_f64(a, b, c) \ -- __extension__ \ -- ({ \ -- float64x1_t b_ = (b); \ -- float64x1_t a_ = (a); \ -- float64x1_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x8_t b_ = (b); \ -- poly8x8_t a_ = (a); \ -- poly8x8_t result; \ -- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x4_t b_ = (b); \ -- poly16x4_t a_ = (a); \ -- poly16x4_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_s8(a, b, c) \ -- __extension__ \ -- ({ \ -- int8x8_t b_ = (b); \ -- int8x8_t a_ = (a); \ -- int8x8_t result; \ -- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x4_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int16x4_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x2_t b_ = (b); \ -- int32x2_t a_ = (a); \ -- int32x2_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x1_t b_ = (b); \ -- int64x1_t a_ = (a); \ -- int64x1_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_u8(a, b, c) \ -- __extension__ \ -- ({ \ -- uint8x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x8_t result; \ -- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x4_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x2_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vext_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x1_t b_ = (b); \ -- uint64x1_t a_ = (a); \ -- uint64x1_t result; \ -- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_f32(a, b, c) \ -- __extension__ \ -- ({ \ -- float32x4_t b_ = (b); \ -- float32x4_t a_ = (a); \ -- float32x4_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_f64(a, b, c) \ -- __extension__ \ -- ({ \ -- float64x2_t b_ = (b); \ -- float64x2_t a_ = (a); \ -- float64x2_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x16_t b_ = (b); \ -- poly8x16_t a_ = (a); \ -- poly8x16_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x8_t b_ = (b); \ -- poly16x8_t a_ = (a); \ -- poly16x8_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_s8(a, b, c) \ -- __extension__ \ -- ({ \ -- int8x16_t b_ = (b); \ -- int8x16_t a_ = (a); \ -- int8x16_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int16x8_t a_ = (a); \ -- int16x8_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int64x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_u8(a, b, c) \ -- __extension__ \ -- ({ \ -- uint8x16_t b_ = (b); \ -- uint8x16_t a_ = (a); \ -- uint8x16_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint16x8_t a_ = (a); \ -- uint16x8_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vextq_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint64x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) - { -@@ -5819,139 +5819,7 @@ - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vget_high_f32 (float32x4_t a) --{ -- float32x2_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vget_high_f64 (float64x2_t a) --{ -- float64x1_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vget_high_p8 (poly8x16_t a) --{ -- poly8x8_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vget_high_p16 (poly16x8_t a) --{ -- poly16x4_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vget_high_s8 (int8x16_t a) --{ -- int8x8_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vget_high_s16 (int16x8_t a) --{ -- int16x4_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vget_high_s32 (int32x4_t a) --{ -- int32x2_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vget_high_s64 (int64x2_t a) --{ -- int64x1_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vget_high_u8 (uint8x16_t a) --{ -- uint8x8_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vget_high_u16 (uint16x8_t a) --{ -- uint16x4_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vget_high_u32 (uint32x4_t a) --{ -- uint32x2_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vget_high_u64 (uint64x2_t a) --{ -- uint64x1_t result; -- __asm__ ("ins %0.d[0], %1.d[1]" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vhsub_s8 (int8x8_t a, int8x8_t b) - { - int8x8_t result; -@@ -6784,7 +6652,7 @@ - #define vmlal_high_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ -- int16x8_t c_ = (c); \ -+ int16x4_t c_ = (c); \ - int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -@@ -6798,7 +6666,7 @@ - #define vmlal_high_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- int32x4_t c_ = (c); \ -+ int32x2_t c_ = (c); \ - int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -@@ -6812,7 +6680,7 @@ - #define vmlal_high_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ -- uint16x8_t c_ = (c); \ -+ uint16x4_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -@@ -6826,7 +6694,7 @@ - #define vmlal_high_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ -- uint32x4_t c_ = (c); \ -+ uint32x2_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -@@ -7237,18 +7105,6 @@ - return result; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) --{ -- float64x2_t result; -- float64x2_t t1; -- __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) - { -@@ -7484,7 +7340,7 @@ - #define vmlsl_high_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ -- int16x8_t c_ = (c); \ -+ int16x4_t c_ = (c); \ - int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -@@ -7498,7 +7354,7 @@ - #define vmlsl_high_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- int32x4_t c_ = (c); \ -+ int32x2_t c_ = (c); \ - int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -@@ -7512,7 +7368,7 @@ - #define vmlsl_high_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ -- uint16x8_t c_ = (c); \ -+ uint16x4_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -@@ -7526,7 +7382,7 @@ - #define vmlsl_high_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ -- uint32x4_t c_ = (c); \ -+ uint32x2_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -@@ -7937,18 +7793,6 @@ - return result; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) --{ -- float64x2_t result; -- float64x2_t t1; -- __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "x"(c) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) - { -@@ -9312,57 +9156,7 @@ - return result; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vpadd_s8 (int8x8_t __a, int8x8_t __b) --{ -- return __builtin_aarch64_addpv8qi (__a, __b); --} -- - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpadd_s16 (int16x4_t __a, int16x4_t __b) --{ -- return __builtin_aarch64_addpv4hi (__a, __b); --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpadd_s32 (int32x2_t __a, int32x2_t __b) --{ -- return __builtin_aarch64_addpv2si (__a, __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vpadd_u8 (uint8x8_t __a, uint8x8_t __b) --{ -- return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpadd_u16 (uint16x4_t __a, uint16x4_t __b) --{ -- return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, -- (int16x4_t) __b); --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpadd_u32 (uint32x2_t __a, uint32x2_t __b) --{ -- return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, -- (int32x2_t) __b); --} -- --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpaddd_f64 (float64x2_t a) --{ -- float64_t result; -- __asm__ ("faddp %d0,%1.2d" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vpaddl_s8 (int8x8_t a) - { - int16x4_t result; -@@ -10556,50 +10350,6 @@ - result; \ - }) - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrbit_s8 (int8x8_t a) --{ -- int8x8_t result; -- __asm__ ("rbit %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrbit_u8 (uint8x8_t a) --{ -- uint8x8_t result; -- __asm__ ("rbit %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrbitq_s8 (int8x16_t a) --{ -- int8x16_t result; -- __asm__ ("rbit %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrbitq_u8 (uint8x16_t a) --{ -- uint8x16_t result; -- __asm__ ("rbit %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vrecpe_u32 (uint32x2_t a) - { -@@ -10622,402 +10372,6 @@ - return result; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev16_p8 (poly8x8_t a) --{ -- poly8x8_t result; -- __asm__ ("rev16 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev16_s8 (int8x8_t a) --{ -- int8x8_t result; -- __asm__ ("rev16 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev16_u8 (uint8x8_t a) --{ -- uint8x8_t result; -- __asm__ ("rev16 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev16q_p8 (poly8x16_t a) --{ -- poly8x16_t result; -- __asm__ ("rev16 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev16q_s8 (int8x16_t a) --{ -- int8x16_t result; -- __asm__ ("rev16 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev16q_u8 (uint8x16_t a) --{ -- uint8x16_t result; -- __asm__ ("rev16 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev32_p8 (poly8x8_t a) --{ -- poly8x8_t result; -- __asm__ ("rev32 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vrev32_p16 (poly16x4_t a) --{ -- poly16x4_t result; -- __asm__ ("rev32 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev32_s8 (int8x8_t a) --{ -- int8x8_t result; -- __asm__ ("rev32 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrev32_s16 (int16x4_t a) --{ -- int16x4_t result; -- __asm__ ("rev32 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev32_u8 (uint8x8_t a) --{ -- uint8x8_t result; -- __asm__ ("rev32 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrev32_u16 (uint16x4_t a) --{ -- uint16x4_t result; -- __asm__ ("rev32 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev32q_p8 (poly8x16_t a) --{ -- poly8x16_t result; -- __asm__ ("rev32 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vrev32q_p16 (poly16x8_t a) --{ -- poly16x8_t result; -- __asm__ ("rev32 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev32q_s8 (int8x16_t a) --{ -- int8x16_t result; -- __asm__ ("rev32 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrev32q_s16 (int16x8_t a) --{ -- int16x8_t result; -- __asm__ ("rev32 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev32q_u8 (uint8x16_t a) --{ -- uint8x16_t result; -- __asm__ ("rev32 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrev32q_u16 (uint16x8_t a) --{ -- uint16x8_t result; -- __asm__ ("rev32 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrev64_f32 (float32x2_t a) --{ -- float32x2_t result; -- __asm__ ("rev64 %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev64_p8 (poly8x8_t a) --{ -- poly8x8_t result; -- __asm__ ("rev64 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vrev64_p16 (poly16x4_t a) --{ -- poly16x4_t result; -- __asm__ ("rev64 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev64_s8 (int8x8_t a) --{ -- int8x8_t result; -- __asm__ ("rev64 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrev64_s16 (int16x4_t a) --{ -- int16x4_t result; -- __asm__ ("rev64 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vrev64_s32 (int32x2_t a) --{ -- int32x2_t result; -- __asm__ ("rev64 %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev64_u8 (uint8x8_t a) --{ -- uint8x8_t result; -- __asm__ ("rev64 %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrev64_u16 (uint16x4_t a) --{ -- uint16x4_t result; -- __asm__ ("rev64 %0.4h,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrev64_u32 (uint32x2_t a) --{ -- uint32x2_t result; -- __asm__ ("rev64 %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrev64q_f32 (float32x4_t a) --{ -- float32x4_t result; -- __asm__ ("rev64 %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev64q_p8 (poly8x16_t a) --{ -- poly8x16_t result; -- __asm__ ("rev64 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vrev64q_p16 (poly16x8_t a) --{ -- poly16x8_t result; -- __asm__ ("rev64 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev64q_s8 (int8x16_t a) --{ -- int8x16_t result; -- __asm__ ("rev64 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrev64q_s16 (int16x8_t a) --{ -- int16x8_t result; -- __asm__ ("rev64 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vrev64q_s32 (int32x4_t a) --{ -- int32x4_t result; -- __asm__ ("rev64 %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev64q_u8 (uint8x16_t a) --{ -- uint8x16_t result; -- __asm__ ("rev64 %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrev64q_u16 (uint16x8_t a) --{ -- uint16x8_t result; -- __asm__ ("rev64 %0.8h,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrev64q_u32 (uint32x4_t a) --{ -- uint32x4_t result; -- __asm__ ("rev64 %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -- - #define vrshrn_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ -@@ -11323,17 +10677,6 @@ - return result; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrsrtsq_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) - { -@@ -12441,469 +11784,7 @@ - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vtrn1_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtrn1_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vtrn1_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtrn1_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vtrn1_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vtrn1_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtrn1_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtrn1_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtrn1_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vtrn1q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vtrn1q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vtrn1q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vtrn1q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vtrn1q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vtrn1q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vtrn1q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vtrn1q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtrn1q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtrn1q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtrn1q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtrn1q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vtrn2_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtrn2_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vtrn2_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtrn2_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vtrn2_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vtrn2_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtrn2_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtrn2_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtrn2_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vtrn2q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vtrn2q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vtrn2q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vtrn2q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vtrn2q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vtrn2q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vtrn2q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vtrn2q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtrn2q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtrn2q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtrn2q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtrn2q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vtst_p8 (poly8x8_t a, poly8x8_t b) - { - uint8x8_t result; -@@ -12946,930 +11827,7 @@ - : /* No clobbers */); - return result; - } --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vuzp1_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vuzp1_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vuzp1_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vuzp1_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vuzp1_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vuzp1_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vuzp1_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vuzp1_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vuzp1_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vuzp1q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vuzp1q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vuzp1q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vuzp1q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vuzp1q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vuzp1q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vuzp1q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vuzp1q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vuzp1q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vuzp1q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vuzp1q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vuzp1q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vuzp2_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vuzp2_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vuzp2_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vuzp2_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vuzp2_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vuzp2_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vuzp2_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vuzp2_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vuzp2_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vuzp2q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vuzp2q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vuzp2q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vuzp2q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vuzp2q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vuzp2q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vuzp2q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vuzp2q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vuzp2q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vuzp2q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vuzp2q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vuzp2q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vzip1_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("zip1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vzip1_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("zip1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vzip1_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("zip1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vzip1_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("zip1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vzip1_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("zip1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vzip1_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("zip1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vzip1_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("zip1 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vzip1_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("zip1 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vzip1_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("zip1 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vzip1q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("zip1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vzip1q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("zip1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vzip1q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("zip1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vzip1q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("zip1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vzip1q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("zip1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vzip1q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("zip1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vzip1q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("zip1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vzip1q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("zip1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vzip1q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("zip1 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vzip1q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("zip1 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vzip1q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("zip1 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vzip1q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("zip1 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vzip2_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("zip2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vzip2_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly8x8_t result; -- __asm__ ("zip2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vzip2_p16 (poly16x4_t a, poly16x4_t b) --{ -- poly16x4_t result; -- __asm__ ("zip2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vzip2_s8 (int8x8_t a, int8x8_t b) --{ -- int8x8_t result; -- __asm__ ("zip2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vzip2_s16 (int16x4_t a, int16x4_t b) --{ -- int16x4_t result; -- __asm__ ("zip2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vzip2_s32 (int32x2_t a, int32x2_t b) --{ -- int32x2_t result; -- __asm__ ("zip2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vzip2_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint8x8_t result; -- __asm__ ("zip2 %0.8b,%1.8b,%2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vzip2_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint16x4_t result; -- __asm__ ("zip2 %0.4h,%1.4h,%2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vzip2_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint32x2_t result; -- __asm__ ("zip2 %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vzip2q_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vzip2q_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vzip2q_p8 (poly8x16_t a, poly8x16_t b) --{ -- poly8x16_t result; -- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vzip2q_p16 (poly16x8_t a, poly16x8_t b) --{ -- poly16x8_t result; -- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vzip2q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vzip2q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vzip2q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vzip2q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vzip2q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vzip2q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vzip2q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vzip2q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- - /* End of temporary inline asm implementations. */ - - /* Start of temporary inline asm for vldn, vstn and friends. */ -@@ -13953,46 +11911,6 @@ - __STRUCTN (float, 64, 4) - #undef __STRUCTN - --#define __LD2R_FUNC(rettype, structtype, ptrtype, \ -- regsuffix, funcsuffix, Q) \ -- __extension__ static __inline rettype \ -- __attribute__ ((__always_inline__)) \ -- vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ -- { \ -- rettype result; \ -- __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ -- "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \ -- : "=Q"(result) \ -- : "Q"(*(const structtype *)ptr) \ -- : "memory", "v16", "v17"); \ -- return result; \ -- } -- --__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,) --__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,) --__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,) --__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,) --__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,) --__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,) --__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,) --__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,) --__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,) --__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,) --__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,) --__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,) --__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q) --__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q) --__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q) --__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q) --__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q) --__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q) --__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q) --__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q) --__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q) --__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q) --__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q) --__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q) -- - #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline rettype \ -@@ -14035,46 +11953,6 @@ - __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) - __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) - --#define __LD3R_FUNC(rettype, structtype, ptrtype, \ -- regsuffix, funcsuffix, Q) \ -- __extension__ static __inline rettype \ -- __attribute__ ((__always_inline__)) \ -- vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ -- { \ -- rettype result; \ -- __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ -- "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \ -- : "=Q"(result) \ -- : "Q"(*(const structtype *)ptr) \ -- : "memory", "v16", "v17", "v18"); \ -- return result; \ -- } -- --__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,) --__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,) --__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,) --__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,) --__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,) --__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,) --__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,) --__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,) --__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,) --__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,) --__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,) --__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,) --__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q) --__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q) --__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q) --__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q) --__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q) --__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q) --__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q) --__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q) --__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q) --__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q) --__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q) --__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q) -- - #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline rettype \ -@@ -14117,46 +11995,6 @@ - __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) - __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) - --#define __LD4R_FUNC(rettype, structtype, ptrtype, \ -- regsuffix, funcsuffix, Q) \ -- __extension__ static __inline rettype \ -- __attribute__ ((__always_inline__)) \ -- vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ -- { \ -- rettype result; \ -- __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ -- "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ -- : "=Q"(result) \ -- : "Q"(*(const structtype *)ptr) \ -- : "memory", "v16", "v17", "v18", "v19"); \ -- return result; \ -- } -- --__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,) --__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,) --__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,) --__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,) --__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,) --__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,) --__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,) --__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,) --__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,) --__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,) --__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,) --__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,) --__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q) --__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q) --__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q) --__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q) --__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q) --__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q) --__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q) --__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q) --__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q) --__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q) --__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q) --__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q) -- - #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - __extension__ static __inline rettype \ -@@ -14199,132 +12037,225 @@ - __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) - __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) - --#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST2_LANE_STRUCTURE_##intype *__p = \ -- (__ST2_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ -- "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17"); \ -- } -+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_oi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregoi##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregoi##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} - --__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) --__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) --__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) --__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) --__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) --__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) --__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) --__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) --__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) --__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) --__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) --__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) --__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) --__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) --__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) --__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) --__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) --__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) --__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) --__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) --__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) --__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) --__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) --__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) -+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) -+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) -+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, -+ int64x2_t) - --#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST3_LANE_STRUCTURE_##intype *__p = \ -- (__ST3_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ -- "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17", "v18"); \ -- } -+#undef __ST2_LANE_FUNC -+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} - --__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) --__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) --__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) --__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) --__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) --__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) --__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) --__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) --__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) --__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) --__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) --__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) --__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) --__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) --__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) --__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) --__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) --__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) --__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) --__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) --__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) --__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) --__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) --__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) -+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) -+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) -+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) -+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) -+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) -+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) -+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) -+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) -+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) -+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) -+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) -+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) - --#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST4_LANE_STRUCTURE_##intype *__p = \ -- (__ST4_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ -- "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17", "v18", "v19"); \ -- } -+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_ci __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} - --__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) --__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) --__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) --__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) --__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) --__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) --__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) --__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) --__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) --__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) --__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) --__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) --__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) --__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) --__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) --__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) --__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) --__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) --__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) --__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) --__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) --__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) --__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) --__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) -+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) -+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) -+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, -+ int64x2_t) - -+#undef __ST3_LANE_FUNC -+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} -+ -+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) -+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) -+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) -+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) -+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) -+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) -+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) -+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) -+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) -+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) -+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) -+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) -+ -+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_xi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[3] \ -+ = vcombine_##funcsuffix (__b.val[3], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[3], 3); \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} -+ -+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) -+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) -+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, -+ int64x2_t) -+ -+#undef __ST4_LANE_FUNC -+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} -+ -+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) -+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) -+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) -+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) -+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) -+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) -+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) -+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) -+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) -+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) -+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) -+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) -+ - __extension__ static __inline int64_t __attribute__ ((__always_inline__)) - vaddlv_s32 (int32x2_t a) - { -@@ -14341,12 +12272,6 @@ - return result; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vpaddd_s64 (int64x2_t __a) --{ -- return __builtin_aarch64_addpdi (__a); --} -- - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) - { -@@ -15706,7 +13631,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceq_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); -+ return (uint32x2_t) (__a == __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15718,26 +13643,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceq_p8 (poly8x8_t __a, poly8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (uint8x8_t) (__a == __b); - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceq_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); -+ return (uint8x8_t) (__a == __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vceq_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); -+ return (uint16x4_t) (__a == __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceq_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); -+ return (uint32x2_t) (__a == __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15749,22 +13673,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceq_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vceq_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceq_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15776,72 +13697,67 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); -+ return (uint32x4_t) (__a == __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); -+ return (uint64x2_t) (__a == __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqq_p8 (poly8x16_t __a, poly8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (uint8x16_t) (__a == __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); -+ return (uint8x16_t) (__a == __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vceqq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); -+ return (uint16x8_t) (__a == __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); -+ return (uint32x4_t) (__a == __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); -+ return (uint64x2_t) (__a == __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vceqq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return (__a == __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return (__a == __b); - } - - /* vceq - scalar. */ -@@ -15875,8 +13791,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceqz_f32 (float32x2_t __a) - { -- float32x2_t __b = {0.0f, 0.0f}; -- return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); -+ return (uint32x2_t) (__a == 0.0f); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15888,30 +13803,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceqz_p8 (poly8x8_t __a) - { -- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (uint8x8_t) (__a == 0); - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceqz_s8 (int8x8_t __a) - { -- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); -+ return (uint8x8_t) (__a == 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vceqz_s16 (int16x4_t __a) - { -- int16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); -+ return (uint16x4_t) (__a == 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceqz_s32 (int32x2_t __a) - { -- int32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); -+ return (uint32x2_t) (__a == 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15923,25 +13833,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vceqz_u8 (uint8x8_t __a) - { -- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vceqz_u16 (uint16x4_t __a) - { -- uint16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vceqz_u32 (uint32x2_t __a) - { -- uint32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -15953,86 +13857,67 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqzq_f32 (float32x4_t __a) - { -- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; -- return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); -+ return (uint32x4_t) (__a == 0.0f); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqzq_f64 (float64x2_t __a) - { -- float64x2_t __b = {0.0, 0.0}; -- return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); -+ return (uint64x2_t) (__a == 0.0f); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqzq_p8 (poly8x16_t __a) - { -- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (uint8x16_t) (__a == 0); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqzq_s8 (int8x16_t __a) - { -- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); -+ return (uint8x16_t) (__a == 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vceqzq_s16 (int16x8_t __a) - { -- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); -+ return (uint16x8_t) (__a == 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqzq_s32 (int32x4_t __a) - { -- int32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); -+ return (uint32x4_t) (__a == 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqzq_s64 (int64x2_t __a) - { -- int64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); -+ return (uint64x2_t) (__a == __AARCH64_INT64_C (0)); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vceqzq_u8 (uint8x16_t __a) - { -- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vceqzq_u16 (uint16x8_t __a) - { -- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vceqzq_u32 (uint32x4_t __a) - { -- uint32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return (__a == 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vceqzq_u64 (uint64x2_t __a) - { -- uint64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return (__a == __AARCH64_UINT64_C (0)); - } - - /* vceqz - scalar. */ -@@ -16066,7 +13951,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcge_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); -+ return (uint32x2_t) (__a >= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16076,28 +13961,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcge_p8 (poly8x8_t __a, poly8x8_t __b) --{ -- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcge_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); -+ return (uint8x8_t) (__a >= __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcge_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); -+ return (uint16x4_t) (__a >= __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcge_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); -+ return (uint32x2_t) (__a >= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16109,22 +13987,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcge_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcge_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcge_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16136,72 +14011,61 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgeq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); -+ return (uint32x4_t) (__a >= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgeq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); -+ return (uint64x2_t) (__a >= __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) --{ -- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgeq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); -+ return (uint8x16_t) (__a >= __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgeq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); -+ return (uint16x8_t) (__a >= __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgeq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); -+ return (uint32x4_t) (__a >= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgeq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); -+ return (uint64x2_t) (__a >= __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return (__a >= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return (__a >= __b); - } - - /* vcge - scalar. */ -@@ -16235,8 +14099,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgez_f32 (float32x2_t __a) - { -- float32x2_t __b = {0.0f, 0.0f}; -- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); -+ return (uint32x2_t) (__a >= 0.0f); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16246,32 +14109,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgez_p8 (poly8x8_t __a) --{ -- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcgez_s8 (int8x8_t __a) - { -- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); -+ return (uint8x8_t) (__a >= 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcgez_s16 (int16x4_t __a) - { -- int16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); -+ return (uint16x4_t) (__a >= 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgez_s32 (int32x2_t __a) - { -- int32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); -+ return (uint32x2_t) (__a >= 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16280,121 +14132,42 @@ - return __a >= 0ll ? -1ll : 0ll; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgez_u8 (uint8x8_t __a) --{ -- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgez_u16 (uint16x4_t __a) --{ -- uint16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, -- (int16x4_t) __b); --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgez_u32 (uint32x2_t __a) --{ -- uint32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, -- (int32x2_t) __b); --} -- --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgez_u64 (uint64x1_t __a) --{ -- return __a >= 0ll ? -1ll : 0ll; --} -- - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgezq_f32 (float32x4_t __a) - { -- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; -- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); -+ return (uint32x4_t) (__a >= 0.0f); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgezq_f64 (float64x2_t __a) - { -- float64x2_t __b = {0.0, 0.0}; -- return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); -+ return (uint64x2_t) (__a >= 0.0); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgezq_p8 (poly8x16_t __a) --{ -- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgezq_s8 (int8x16_t __a) - { -- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); -+ return (uint8x16_t) (__a >= 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgezq_s16 (int16x8_t __a) - { -- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); -+ return (uint16x8_t) (__a >= 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgezq_s32 (int32x4_t __a) - { -- int32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); -+ return (uint32x4_t) (__a >= 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgezq_s64 (int64x2_t __a) - { -- int64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); -+ return (uint64x2_t) (__a >= __AARCH64_INT64_C (0)); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgezq_u8 (uint8x16_t __a) --{ -- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgezq_u16 (uint16x8_t __a) --{ -- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, -- (int16x8_t) __b); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgezq_u32 (uint32x4_t __a) --{ -- uint32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, -- (int32x4_t) __b); --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgezq_u64 (uint64x2_t __a) --{ -- uint64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, -- (int64x2_t) __b); --} -- - /* vcgez - scalar. */ - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -@@ -16409,12 +14182,6 @@ - return __a >= 0 ? -1ll : 0ll; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgezd_u64 (int64x1_t __a) --{ -- return __a >= 0 ? -1ll : 0ll; --} -- - __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) - vcgezd_f64 (float64_t __a) - { -@@ -16426,7 +14193,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgt_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); -+ return (uint32x2_t) (__a > __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16436,28 +14203,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgt_p8 (poly8x8_t __a, poly8x8_t __b) --{ -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcgt_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); -+ return (uint8x8_t) (__a > __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcgt_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); -+ return (uint16x4_t) (__a > __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgt_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); -+ return (uint32x2_t) (__a > __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16469,22 +14229,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcgt_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcgt_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgt_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16496,72 +14253,61 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgtq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); -+ return (uint32x4_t) (__a > __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgtq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); -+ return (uint64x2_t) (__a > __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) --{ -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgtq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); -+ return (uint8x16_t) (__a > __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgtq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); -+ return (uint16x8_t) (__a > __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgtq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); -+ return (uint32x4_t) (__a > __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgtq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); -+ return (uint64x2_t) (__a > __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return (__a > __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return (__a > __b); - } - - /* vcgt - scalar. */ -@@ -16595,8 +14341,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgtz_f32 (float32x2_t __a) - { -- float32x2_t __b = {0.0f, 0.0f}; -- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); -+ return (uint32x2_t) (__a > 0.0f); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16606,32 +14351,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgtz_p8 (poly8x8_t __a) --{ -- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcgtz_s8 (int8x8_t __a) - { -- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); -+ return (uint8x8_t) (__a > 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcgtz_s16 (int16x4_t __a) - { -- int16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); -+ return (uint16x4_t) (__a > 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcgtz_s32 (int32x2_t __a) - { -- int32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); -+ return (uint32x2_t) (__a > 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16640,121 +14374,42 @@ - return __a > 0ll ? -1ll : 0ll; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgtz_u8 (uint8x8_t __a) --{ -- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgtz_u16 (uint16x4_t __a) --{ -- uint16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, -- (int16x4_t) __b); --} -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgtz_u32 (uint32x2_t __a) --{ -- uint32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, -- (int32x2_t) __b); --} -- --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgtz_u64 (uint64x1_t __a) --{ -- return __a > 0ll ? -1ll : 0ll; --} -- - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgtzq_f32 (float32x4_t __a) - { -- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; -- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); -+ return (uint32x4_t) (__a > 0.0f); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgtzq_f64 (float64x2_t __a) - { -- float64x2_t __b = {0.0, 0.0}; -- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); -+ return (uint64x2_t) (__a > 0.0); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtzq_p8 (poly8x16_t __a) --{ -- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcgtzq_s8 (int8x16_t __a) - { -- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); -+ return (uint8x16_t) (__a > 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcgtzq_s16 (int16x8_t __a) - { -- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); -+ return (uint16x8_t) (__a > 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcgtzq_s32 (int32x4_t __a) - { -- int32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); -+ return (uint32x4_t) (__a > 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcgtzq_s64 (int64x2_t __a) - { -- int64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); -+ return (uint64x2_t) (__a > __AARCH64_INT64_C (0)); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtzq_u8 (uint8x16_t __a) --{ -- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgtzq_u16 (uint16x8_t __a) --{ -- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, -- (int16x8_t) __b); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtzq_u32 (uint32x4_t __a) --{ -- uint32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, -- (int32x4_t) __b); --} -- --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtzq_u64 (uint64x2_t __a) --{ -- uint64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, -- (int64x2_t) __b); --} -- - /* vcgtz - scalar. */ - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -@@ -16769,12 +14424,6 @@ - return __a > 0 ? -1ll : 0ll; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgtzd_u64 (int64x1_t __a) --{ -- return __a > 0 ? -1ll : 0ll; --} -- - __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) - vcgtzd_f64 (float64_t __a) - { -@@ -16786,7 +14435,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcle_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); -+ return (uint32x2_t) (__a <= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16796,28 +14445,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcle_p8 (poly8x8_t __a, poly8x8_t __b) --{ -- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, -- (int8x8_t) __a); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcle_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); -+ return (uint8x8_t) (__a <= __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcle_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a); -+ return (uint16x4_t) (__a <= __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcle_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a); -+ return (uint32x2_t) (__a <= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16829,22 +14471,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcle_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, -- (int8x8_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcle_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, -- (int16x4_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcle_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, -- (int32x2_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16856,72 +14495,61 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcleq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); -+ return (uint32x4_t) (__a <= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcleq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); -+ return (uint64x2_t) (__a <= __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcleq_p8 (poly8x16_t __a, poly8x16_t __b) --{ -- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, -- (int8x16_t) __a); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcleq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); -+ return (uint8x16_t) (__a <= __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcleq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a); -+ return (uint16x8_t) (__a <= __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcleq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a); -+ return (uint32x4_t) (__a <= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcleq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a); -+ return (uint64x2_t) (__a <= __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcleq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, -- (int8x16_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcleq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, -- (int16x8_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcleq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, -- (int32x4_t) __a); -+ return (__a <= __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcleq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, -- (int64x2_t) __a); -+ return (__a <= __b); - } - - /* vcle - scalar. */ -@@ -16955,8 +14583,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vclez_f32 (float32x2_t __a) - { -- float32x2_t __b = {0.0f, 0.0f}; -- return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); -+ return (uint32x2_t) (__a <= 0.0f); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -16966,32 +14593,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclez_p8 (poly8x8_t __a) --{ -- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vclez_s8 (int8x8_t __a) - { -- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); -+ return (uint8x8_t) (__a <= 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vclez_s16 (int16x4_t __a) - { -- int16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); -+ return (uint16x4_t) (__a <= 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vclez_s32 (int32x2_t __a) - { -- int32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); -+ return (uint32x2_t) (__a <= 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17000,62 +14616,40 @@ - return __a <= 0ll ? -1ll : 0ll; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclez_u64 (uint64x1_t __a) --{ -- return __a <= 0ll ? -1ll : 0ll; --} -- - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vclezq_f32 (float32x4_t __a) - { -- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; -- return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); -+ return (uint32x4_t) (__a <= 0.0f); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vclezq_f64 (float64x2_t __a) - { -- float64x2_t __b = {0.0, 0.0}; -- return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); -+ return (uint64x2_t) (__a <= 0.0); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vclezq_p8 (poly8x16_t __a) --{ -- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vclezq_s8 (int8x16_t __a) - { -- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); -+ return (uint8x16_t) (__a <= 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vclezq_s16 (int16x8_t __a) - { -- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); -+ return (uint16x8_t) (__a <= 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vclezq_s32 (int32x4_t __a) - { -- int32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); -+ return (uint32x4_t) (__a <= 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vclezq_s64 (int64x2_t __a) - { -- int64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); -+ return (uint64x2_t) (__a <= __AARCH64_INT64_C (0)); - } - - /* vclez - scalar. */ -@@ -17072,12 +14666,6 @@ - return __a <= 0 ? -1ll : 0ll; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclezd_u64 (int64x1_t __a) --{ -- return __a <= 0 ? -1ll : 0ll; --} -- - __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) - vclezd_f64 (float64_t __a) - { -@@ -17089,7 +14677,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vclt_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); -+ return (uint32x2_t) (__a < __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17099,28 +14687,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclt_p8 (poly8x8_t __a, poly8x8_t __b) --{ -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, -- (int8x8_t) __a); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vclt_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); -+ return (uint8x8_t) (__a < __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vclt_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a); -+ return (uint16x4_t) (__a < __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vclt_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a); -+ return (uint32x2_t) (__a < __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17132,22 +14713,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vclt_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, -- (int8x8_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vclt_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, -- (int16x4_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vclt_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, -- (int32x2_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17159,72 +14737,61 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcltq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); -+ return (uint32x4_t) (__a < __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcltq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); -+ return (uint64x2_t) (__a < __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcltq_p8 (poly8x16_t __a, poly8x16_t __b) --{ -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, -- (int8x16_t) __a); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcltq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); -+ return (uint8x16_t) (__a < __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcltq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a); -+ return (uint16x8_t) (__a < __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcltq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a); -+ return (uint32x4_t) (__a < __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcltq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a); -+ return (uint64x2_t) (__a < __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcltq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, -- (int8x16_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcltq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, -- (int16x8_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcltq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, -- (int32x4_t) __a); -+ return (__a < __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcltq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, -- (int64x2_t) __a); -+ return (__a < __b); - } - - /* vclt - scalar. */ -@@ -17258,8 +14825,7 @@ - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcltz_f32 (float32x2_t __a) - { -- float32x2_t __b = {0.0f, 0.0f}; -- return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); -+ return (uint32x2_t) (__a < 0.0f); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17269,32 +14835,21 @@ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcltz_p8 (poly8x8_t __a) --{ -- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, -- (int8x8_t) __b); --} -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vcltz_s8 (int8x8_t __a) - { -- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); -+ return (uint8x8_t) (__a < 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vcltz_s16 (int16x4_t __a) - { -- int16x4_t __b = {0, 0, 0, 0}; -- return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); -+ return (uint16x4_t) (__a < 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vcltz_s32 (int32x2_t __a) - { -- int32x2_t __b = {0, 0}; -- return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); -+ return (uint32x2_t) (__a < 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -17306,53 +14861,37 @@ - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcltzq_f32 (float32x4_t __a) - { -- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; -- return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); -+ return (uint32x4_t) (__a < 0.0f); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcltzq_f64 (float64x2_t __a) - { -- float64x2_t __b = {0.0, 0.0}; -- return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); -+ return (uint64x2_t) (__a < 0.0); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcltzq_p8 (poly8x16_t __a) --{ -- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, -- (int8x16_t) __b); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vcltzq_s8 (int8x16_t __a) - { -- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, -- 0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); -+ return (uint8x16_t) (__a < 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vcltzq_s16 (int16x8_t __a) - { -- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; -- return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); -+ return (uint16x8_t) (__a < 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vcltzq_s32 (int32x4_t __a) - { -- int32x4_t __b = {0, 0, 0, 0}; -- return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); -+ return (uint32x4_t) (__a < 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vcltzq_s64 (int64x2_t __a) - { -- int64x2_t __b = {0, 0}; -- return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); -+ return (uint64x2_t) (__a < __AARCH64_INT64_C (0)); - } - - /* vcltz - scalar. */ -@@ -17369,12 +14908,6 @@ - return __a < 0 ? -1ll : 0ll; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcltzd_u64 (int64x1_t __a) --{ -- return __a < 0 ? -1ll : 0ll; --} -- - __extension__ static __inline uint64_t __attribute__ ((__always_inline__)) - vcltzd_f64 (float64_t __a) - { -@@ -18483,6 +16016,292 @@ - return __aarch64_vgetq_lane_u64 (__a, __b); - } - -+/* vext */ -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif -+} -+ -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) -+{ -+ /* The only possible index to the assembler instruction returns element 0. */ -+ __builtin_aarch64_im_lane_boundsi (__c, 1); -+ return __a; -+} -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif -+} -+ -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) -+{ -+ /* The only possible index to the assembler instruction returns element 0. */ -+ __builtin_aarch64_im_lane_boundsi (__c, 1); -+ return __a; -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif -+} -+ -+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) -+{ -+ /* The only possible index to the assembler instruction returns element 0. */ -+ __builtin_aarch64_im_lane_boundsi (__c, 1); -+ return __a; -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 16); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 16); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 16); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 8); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 4); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) -+{ -+ __builtin_aarch64_im_lane_boundsi (__c, 2); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif -+} -+ - /* vfma_lane */ - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -@@ -19712,6 +17531,872 @@ - return ret; - } - -+/* vldn_dup */ -+ -+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+vld2_dup_s8 (const int8_t * __a) -+{ -+ int8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+vld2_dup_s16 (const int16_t * __a) -+{ -+ int16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+vld2_dup_s32 (const int32_t * __a) -+{ -+ int32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+vld2_dup_f32 (const float32_t * __a) -+{ -+ float32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__)) -+vld2_dup_f64 (const float64_t * __a) -+{ -+ float64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; -+ return ret; -+} -+ -+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+vld2_dup_u8 (const uint8_t * __a) -+{ -+ uint8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+vld2_dup_u16 (const uint16_t * __a) -+{ -+ uint16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+vld2_dup_u32 (const uint32_t * __a) -+{ -+ uint32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+vld2_dup_p8 (const poly8_t * __a) -+{ -+ poly8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+vld2_dup_p16 (const poly16_t * __a) -+{ -+ poly16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) -+vld2_dup_s64 (const int64_t * __a) -+{ -+ int64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) -+vld2_dup_u64 (const uint64_t * __a) -+{ -+ uint64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_s8 (const int8_t * __a) -+{ -+ int8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_p8 (const poly8_t * __a) -+{ -+ poly8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_s16 (const int16_t * __a) -+{ -+ int16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_p16 (const poly16_t * __a) -+{ -+ poly16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_s32 (const int32_t * __a) -+{ -+ int32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_s64 (const int64_t * __a) -+{ -+ int64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_u8 (const uint8_t * __a) -+{ -+ uint8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_u16 (const uint16_t * __a) -+{ -+ uint16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_u32 (const uint32_t * __a) -+{ -+ uint32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_u64 (const uint64_t * __a) -+{ -+ uint64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_f32 (const float32_t * __a) -+{ -+ float32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) -+vld2q_dup_f64 (const float64_t * __a) -+{ -+ float64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); -+ return ret; -+} -+ -+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) -+vld3_dup_s64 (const int64_t * __a) -+{ -+ int64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) -+vld3_dup_u64 (const uint64_t * __a) -+{ -+ uint64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__)) -+vld3_dup_f64 (const float64_t * __a) -+{ -+ float64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; -+ return ret; -+} -+ -+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) -+vld3_dup_s8 (const int8_t * __a) -+{ -+ int8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) -+vld3_dup_p8 (const poly8_t * __a) -+{ -+ poly8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) -+vld3_dup_s16 (const int16_t * __a) -+{ -+ int16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) -+vld3_dup_p16 (const poly16_t * __a) -+{ -+ poly16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) -+vld3_dup_s32 (const int32_t * __a) -+{ -+ int32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) -+vld3_dup_u8 (const uint8_t * __a) -+{ -+ uint8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) -+vld3_dup_u16 (const uint16_t * __a) -+{ -+ uint16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) -+vld3_dup_u32 (const uint32_t * __a) -+{ -+ uint32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) -+vld3_dup_f32 (const float32_t * __a) -+{ -+ float32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_s8 (const int8_t * __a) -+{ -+ int8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_p8 (const poly8_t * __a) -+{ -+ poly8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_s16 (const int16_t * __a) -+{ -+ int16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_p16 (const poly16_t * __a) -+{ -+ poly16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_s32 (const int32_t * __a) -+{ -+ int32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_s64 (const int64_t * __a) -+{ -+ int64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_u8 (const uint8_t * __a) -+{ -+ uint8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_u16 (const uint16_t * __a) -+{ -+ uint16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_u32 (const uint32_t * __a) -+{ -+ uint32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_u64 (const uint64_t * __a) -+{ -+ uint64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_f32 (const float32_t * __a) -+{ -+ float32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) -+vld3q_dup_f64 (const float64_t * __a) -+{ -+ float64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); -+ return ret; -+} -+ -+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) -+vld4_dup_s64 (const int64_t * __a) -+{ -+ int64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) -+vld4_dup_u64 (const uint64_t * __a) -+{ -+ uint64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__)) -+vld4_dup_f64 (const float64_t * __a) -+{ -+ float64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; -+ ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; -+ return ret; -+} -+ -+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) -+vld4_dup_s8 (const int8_t * __a) -+{ -+ int8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) -+vld4_dup_p8 (const poly8_t * __a) -+{ -+ poly8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) -+vld4_dup_s16 (const int16_t * __a) -+{ -+ int16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) -+vld4_dup_p16 (const poly16_t * __a) -+{ -+ poly16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) -+vld4_dup_s32 (const int32_t * __a) -+{ -+ int32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) -+vld4_dup_u8 (const uint8_t * __a) -+{ -+ uint8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) -+vld4_dup_u16 (const uint16_t * __a) -+{ -+ uint16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) -+vld4_dup_u32 (const uint32_t * __a) -+{ -+ uint32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) -+vld4_dup_f32 (const float32_t * __a) -+{ -+ float32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); -+ ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_s8 (const int8_t * __a) -+{ -+ int8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_p8 (const poly8_t * __a) -+{ -+ poly8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_s16 (const int16_t * __a) -+{ -+ int16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_p16 (const poly16_t * __a) -+{ -+ poly16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_s32 (const int32_t * __a) -+{ -+ int32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_s64 (const int64_t * __a) -+{ -+ int64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_u8 (const uint8_t * __a) -+{ -+ uint8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_u16 (const uint16_t * __a) -+{ -+ uint16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_u32 (const uint32_t * __a) -+{ -+ uint32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_u64 (const uint64_t * __a) -+{ -+ uint64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_f32 (const float32_t * __a) -+{ -+ float32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); -+ ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); -+ return ret; -+} -+ -+__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) -+vld4q_dup_f64 (const float64_t * __a) -+{ -+ float64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); -+ ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); -+ return ret; -+} -+ - /* vmax */ - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -@@ -20911,6 +19596,65 @@ - return -__a; - } - -+/* vpadd */ -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vpadd_s8 (int8x8_t __a, int8x8_t __b) -+{ -+ return __builtin_aarch64_addpv8qi (__a, __b); -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vpadd_s16 (int16x4_t __a, int16x4_t __b) -+{ -+ return __builtin_aarch64_addpv4hi (__a, __b); -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vpadd_s32 (int32x2_t __a, int32x2_t __b) -+{ -+ return __builtin_aarch64_addpv2si (__a, __b); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vpadd_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+ return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vpadd_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+ return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vpadd_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+ return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, -+ (int32x2_t) __b); -+} -+ -+__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -+vpaddd_f64 (float64x2_t __a) -+{ -+ return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vpaddd_s64 (int64x2_t __a) -+{ -+ return __builtin_aarch64_addpdi (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vpaddd_u64 (uint64x2_t __a) -+{ -+ return __builtin_aarch64_addpdi ((int64x2_t) __a); -+} -+ - /* vqabs */ - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -@@ -20937,6 +19681,12 @@ - return (int32_t) __builtin_aarch64_sqabssi (__a); - } - -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vqabsd_s64 (int64_t __a) -+{ -+ return __builtin_aarch64_sqabsdi (__a); -+} -+ - /* vqadd */ - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -20966,25 +19716,26 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqaddb_u8 (uint8_t __a, uint8_t __b) - { -- return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b); -+ return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqaddh_u16 (uint16_t __a, uint16_t __b) - { -- return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b); -+ return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqadds_u32 (uint32_t __a, uint32_t __b) - { -- return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b); -+ return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqaddd_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b); -+ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, -+ (uint64_t) __b); - } - - /* vqdmlal */ -@@ -21549,6 +20300,12 @@ - return (int32_t) __builtin_aarch64_sqnegsi (__a); - } - -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vqnegd_s64 (int64_t __a) -+{ -+ return __builtin_aarch64_sqnegdi (__a); -+} -+ - /* vqrdmulh */ - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -@@ -21628,25 +20385,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqrshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqrshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqrshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_uqrshlv2si_uus ( __a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqrshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_uqrshldi_uus ( __a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -21676,25 +20433,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_uqrshlv4si_uus ( __a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_uqrshlv2di_uus ( __a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -21724,25 +20481,25 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqrshlb_u8 (uint8_t __a, uint8_t __b) - { -- return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b); -+ return __builtin_aarch64_uqrshlqi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqrshlh_u16 (uint16_t __a, uint16_t __b) - { -- return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b); -+ return __builtin_aarch64_uqrshlhi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqrshls_u32 (uint32_t __a, uint32_t __b) - { -- return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b); -+ return __builtin_aarch64_uqrshlsi_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqrshld_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b); -+ return __builtin_aarch64_uqrshldi_uus (__a, __b); - } - - /* vqrshrn */ -@@ -21768,19 +20525,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqrshrn_n_u16 (uint16x8_t __a, const int __b) - { -- return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqrshrn_n_u32 (uint32x4_t __a, const int __b) - { -- return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqrshrn_n_u64 (uint64x2_t __a, const int __b) - { -- return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -21804,19 +20561,19 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqrshrnh_n_u16 (uint16_t __a, const int __b) - { -- return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); -+ return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqrshrns_n_u32 (uint32_t __a, const int __b) - { -- return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); -+ return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqrshrnd_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); -+ return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b); - } - - /* vqrshrun */ -@@ -21886,25 +20643,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_uqshlv8qi_uus ( __a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_uqshlv4hi_uus ( __a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_uqshlv2si_uus ( __a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_uqshldi_uus ( __a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -21934,25 +20691,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_uqshlv16qi_uus ( __a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_uqshlv8hi_uus ( __a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_uqshlv4si_uus ( __a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_uqshlv2di_uus ( __a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -21982,25 +20739,25 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqshlb_u8 (uint8_t __a, uint8_t __b) - { -- return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b); -+ return __builtin_aarch64_uqshlqi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqshlh_u16 (uint16_t __a, uint16_t __b) - { -- return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b); -+ return __builtin_aarch64_uqshlhi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqshls_u32 (uint32_t __a, uint32_t __b) - { -- return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b); -+ return __builtin_aarch64_uqshlsi_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqshld_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b); -+ return __builtin_aarch64_uqshldi_uus (__a, __b); - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -@@ -22030,25 +20787,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqshl_n_u8 (uint8x8_t __a, const int __b) - { -- return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqshl_n_u16 (uint16x4_t __a, const int __b) - { -- return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqshl_n_u32 (uint32x2_t __a, const int __b) - { -- return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv2si_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqshl_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_uqshl_ndi_uus (__a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -22078,25 +20835,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqshlq_n_u8 (uint8x16_t __a, const int __b) - { -- return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqshlq_n_u16 (uint16x8_t __a, const int __b) - { -- return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqshlq_n_u32 (uint32x4_t __a, const int __b) - { -- return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv4si_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqshlq_n_u64 (uint64x2_t __a, const int __b) - { -- return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_uqshl_nv2di_uus (__a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -22126,25 +20883,25 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqshlb_n_u8 (uint8_t __a, const int __b) - { -- return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b); -+ return __builtin_aarch64_uqshl_nqi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqshlh_n_u16 (uint16_t __a, const int __b) - { -- return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b); -+ return __builtin_aarch64_uqshl_nhi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqshls_n_u32 (uint32_t __a, const int __b) - { -- return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b); -+ return __builtin_aarch64_uqshl_nsi_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqshld_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b); -+ return __builtin_aarch64_uqshl_ndi_uus (__a, __b); - } - - /* vqshlu */ -@@ -22152,73 +20909,73 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqshlu_n_s8 (int8x8_t __a, const int __b) - { -- return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b); -+ return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqshlu_n_s16 (int16x4_t __a, const int __b) - { -- return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b); -+ return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqshlu_n_s32 (int32x2_t __a, const int __b) - { -- return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b); -+ return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqshlu_n_s64 (int64x1_t __a, const int __b) - { -- return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); -+ return __builtin_aarch64_sqshlu_ndi_uss (__a, __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vqshluq_n_s8 (int8x16_t __a, const int __b) - { -- return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b); -+ return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vqshluq_n_s16 (int16x8_t __a, const int __b) - { -- return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b); -+ return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vqshluq_n_s32 (int32x4_t __a, const int __b) - { -- return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b); -+ return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vqshluq_n_s64 (int64x2_t __a, const int __b) - { -- return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); -+ return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) - vqshlub_n_s8 (int8_t __a, const int __b) - { -- return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b); -+ return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b); - } - - __extension__ static __inline int16_t __attribute__ ((__always_inline__)) - vqshluh_n_s16 (int16_t __a, const int __b) - { -- return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b); -+ return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b); - } - - __extension__ static __inline int32_t __attribute__ ((__always_inline__)) - vqshlus_n_s32 (int32_t __a, const int __b) - { -- return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b); -+ return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vqshlud_n_s64 (int64x1_t __a, const int __b) - { -- return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); -+ return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b); - } - - /* vqshrn */ -@@ -22244,19 +21001,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vqshrn_n_u16 (uint16x8_t __a, const int __b) - { -- return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vqshrn_n_u32 (uint32x4_t __a, const int __b) - { -- return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vqshrn_n_u64 (uint64x2_t __a, const int __b) - { -- return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) -@@ -22280,19 +21037,19 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqshrnh_n_u16 (uint16_t __a, const int __b) - { -- return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b); -+ return __builtin_aarch64_uqshrn_nhi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqshrns_n_u32 (uint32_t __a, const int __b) - { -- return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b); -+ return __builtin_aarch64_uqshrn_nsi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqshrnd_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b); -+ return __builtin_aarch64_uqshrn_ndi_uus (__a, __b); - } - - /* vqshrun */ -@@ -22362,27 +21119,66 @@ - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vqsubb_u8 (uint8_t __a, uint8_t __b) - { -- return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b); -+ return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vqsubh_u16 (uint16_t __a, uint16_t __b) - { -- return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b); -+ return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vqsubs_u32 (uint32_t __a, uint32_t __b) - { -- return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b); -+ return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); -+ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, -+ (uint64_t) __b); - } - -+/* vrbit */ -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vrbit_p8 (poly8x8_t __a) -+{ -+ return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vrbit_s8 (int8x8_t __a) -+{ -+ return __builtin_aarch64_rbitv8qi (__a); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vrbit_u8 (uint8x8_t __a) -+{ -+ return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vrbitq_p8 (poly8x16_t __a) -+{ -+ return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a); -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vrbitq_s8 (int8x16_t __a) -+{ -+ return __builtin_aarch64_rbitv16qi (__a); -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vrbitq_u8 (uint8x16_t __a) -+{ -+ return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a); -+} -+ - /* vrecpe */ - - __extension__ static __inline float32_t __attribute__ ((__always_inline__)) -@@ -22461,6 +21257,234 @@ - return __builtin_aarch64_frecpxdf (__a); - } - -+ -+/* vrev */ -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vrev16_p8 (poly8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vrev16_s8 (int8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vrev16_u8 (uint8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vrev16q_p8 (poly8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vrev16q_s8 (int8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vrev16q_u8 (uint8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vrev32_p8 (poly8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vrev32_p16 (poly16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vrev32_s8 (int8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vrev32_s16 (int16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vrev32_u8 (uint8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vrev32_u16 (uint16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vrev32q_p8 (poly8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vrev32q_p16 (poly16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vrev32q_s8 (int8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vrev32q_s16 (int16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vrev32q_u8 (uint8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vrev32q_u16 (uint16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vrev64_f32 (float32x2_t a) -+{ -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vrev64_p8 (poly8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vrev64_p16 (poly16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vrev64_s8 (int8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vrev64_s16 (int16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vrev64_s32 (int32x2_t a) -+{ -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vrev64_u8 (uint8x8_t a) -+{ -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vrev64_u16 (uint16x4_t a) -+{ -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vrev64_u32 (uint32x2_t a) -+{ -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vrev64q_f32 (float32x4_t a) -+{ -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vrev64q_p8 (poly8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vrev64q_p16 (poly16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vrev64q_s8 (int8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vrev64q_s16 (int16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vrev64q_s32 (int32x4_t a) -+{ -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vrev64q_u8 (uint8x16_t a) -+{ -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vrev64q_u16 (uint16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vrev64q_u32 (uint32x4_t a) -+{ -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+} -+ - /* vrnd */ - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -@@ -22469,6 +21493,12 @@ - return __builtin_aarch64_btruncv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrnd_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndq_f32 (float32x4_t __a) - { -@@ -22489,6 +21519,12 @@ - return __builtin_aarch64_roundv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrnda_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndaq_f32 (float32x4_t __a) - { -@@ -22509,6 +21545,12 @@ - return __builtin_aarch64_nearbyintv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndi_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndiq_f32 (float32x4_t __a) - { -@@ -22529,6 +21571,12 @@ - return __builtin_aarch64_floorv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndm_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndmq_f32 (float32x4_t __a) - { -@@ -22548,6 +21596,13 @@ - { - return __builtin_aarch64_frintnv2sf (__a); - } -+ -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndn_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_frintndf (__a); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndnq_f32 (float32x4_t __a) - { -@@ -22568,6 +21623,12 @@ - return __builtin_aarch64_ceilv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndp_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndpq_f32 (float32x4_t __a) - { -@@ -22588,6 +21649,12 @@ - return __builtin_aarch64_rintv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndx_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); -+} -+ - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndxq_f32 (float32x4_t __a) - { -@@ -22629,25 +21696,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vrshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_urshlv8qi_uus (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vrshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_urshlv4hi_uus (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vrshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_urshlv2si_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_urshldi_uus (__a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -22677,25 +21744,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_urshlv16qi_uus (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_urshlv8hi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_urshlv4si_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_urshlv2di_uus (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -22707,7 +21774,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrshld_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b); -+ return __builtin_aarch64_urshldi_uus (__a, __b); - } - - /* vrshr */ -@@ -22739,25 +21806,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vrshr_n_u8 (uint8x8_t __a, const int __b) - { -- return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_urshr_nv8qi_uus (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vrshr_n_u16 (uint16x4_t __a, const int __b) - { -- return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_urshr_nv4hi_uus (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vrshr_n_u32 (uint32x2_t __a, const int __b) - { -- return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_urshr_nv2si_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrshr_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_urshr_ndi_uus (__a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -22787,25 +21854,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vrshrq_n_u8 (uint8x16_t __a, const int __b) - { -- return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_urshr_nv16qi_uus (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vrshrq_n_u16 (uint16x8_t __a, const int __b) - { -- return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_urshr_nv8hi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vrshrq_n_u32 (uint32x4_t __a, const int __b) - { -- return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_urshr_nv4si_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vrshrq_n_u64 (uint64x2_t __a, const int __b) - { -- return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_urshr_nv2di_uus (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -22817,7 +21884,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrshrd_n_u64 (uint64x1_t __a, const int __b) - { -- return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b); -+ return __builtin_aarch64_urshr_ndi_uus (__a, __b); - } - - /* vrsra */ -@@ -22849,29 +21916,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a, -- (int8x8_t) __b, __c); -+ return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a, -- (int16x4_t) __b, __c); -+ return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a, -- (int32x2_t) __b, __c); -+ return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a, -- (int64x1_t) __b, __c); -+ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -22901,29 +21964,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a, -- (int8x16_t) __b, __c); -+ return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a, -- (int16x8_t) __b, __c); -+ return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a, -- (int32x4_t) __b, __c); -+ return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a, -- (int64x2_t) __b, __c); -+ return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -22935,7 +21994,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); -+ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); - } - - #ifdef __ARM_FEATURE_CRYPTO -@@ -23128,109 +22187,109 @@ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vshl_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b); -+ return __builtin_aarch64_sshlv8qi (__a, __b); - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vshl_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b); -+ return __builtin_aarch64_sshlv4hi (__a, __b); - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vshl_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b); -+ return __builtin_aarch64_sshlv2si (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vshl_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); -+ return __builtin_aarch64_sshldi (__a, __b); - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_ushlv8qi_uus (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_ushlv4hi_uus (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_ushlv2si_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_ushldi_uus (__a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vshlq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b); -+ return __builtin_aarch64_sshlv16qi (__a, __b); - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vshlq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b); -+ return __builtin_aarch64_sshlv8hi (__a, __b); - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vshlq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b); -+ return __builtin_aarch64_sshlv4si (__a, __b); - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vshlq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b); -+ return __builtin_aarch64_sshlv2di (__a, __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b); -+ return __builtin_aarch64_ushlv16qi_uus (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b); -+ return __builtin_aarch64_ushlv8hi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b); -+ return __builtin_aarch64_ushlv4si_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b); -+ return __builtin_aarch64_ushlv2di_uus (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vshld_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); -+ return __builtin_aarch64_sshldi (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vshld_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b); -+ return __builtin_aarch64_ushldi_uus (__a, __b); - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -@@ -23290,19 +22349,19 @@ - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vshll_n_u8 (uint8x8_t __a, const int __b) - { -- return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b); -+ return __builtin_aarch64_ushll_nv8qi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vshll_n_u16 (uint16x4_t __a, const int __b) - { -- return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b); -+ return __builtin_aarch64_ushll_nv4hi_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vshll_n_u32 (uint32x2_t __a, const int __b) - { -- return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b); -+ return __builtin_aarch64_ushll_nv2si_uus (__a, __b); - } - - /* vshr */ -@@ -23444,29 +22503,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a, -- (int8x8_t) __b, __c); -+ return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a, -- (int16x4_t) __b, __c); -+ return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a, -- (int32x2_t) __b, __c); -+ return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a, -- (int64x1_t) __b, __c); -+ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -23496,29 +22551,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a, -- (int8x16_t) __b, __c); -+ return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a, -- (int16x8_t) __b, __c); -+ return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a, -- (int32x4_t) __b, __c); -+ return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a, -- (int64x2_t) __b, __c); -+ return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -23530,7 +22581,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c); -+ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); - } - - /* vsqadd */ -@@ -23538,80 +22589,73 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vsqadd_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return __builtin_aarch64_usqaddv8qi_uus (__a, __b); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vsqadd_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __builtin_aarch64_usqaddv4hi_uus (__a, __b); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vsqadd_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __builtin_aarch64_usqaddv2si_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsqadd_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_usqadddi_uus (__a, __b); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return __builtin_aarch64_usqaddv16qi_uus (__a, __b); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return __builtin_aarch64_usqaddv8hi_uus (__a, __b); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return __builtin_aarch64_usqaddv4si_uus (__a, __b); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return __builtin_aarch64_usqaddv2di_uus (__a, __b); - } - - __extension__ static __inline uint8_t __attribute__ ((__always_inline__)) - vsqaddb_u8 (uint8_t __a, int8_t __b) - { -- return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b); -+ return __builtin_aarch64_usqaddqi_uus (__a, __b); - } - - __extension__ static __inline uint16_t __attribute__ ((__always_inline__)) - vsqaddh_u16 (uint16_t __a, int16_t __b) - { -- return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b); -+ return __builtin_aarch64_usqaddhi_uus (__a, __b); - } - - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - vsqadds_u32 (uint32_t __a, int32_t __b) - { -- return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b); -+ return __builtin_aarch64_usqaddsi_uus (__a, __b); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsqaddd_u64 (uint64x1_t __a, int64x1_t __b) - { -- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); -+ return __builtin_aarch64_usqadddi_uus (__a, __b); - } - - /* vsqrt */ -@@ -23662,29 +22706,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a, -- (int8x8_t) __b, __c); -+ return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a, -- (int16x4_t) __b, __c); -+ return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a, -- (int32x2_t) __b, __c); -+ return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a, -- (int64x1_t) __b, __c); -+ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -23714,29 +22754,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a, -- (int8x16_t) __b, __c); -+ return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a, -- (int16x8_t) __b, __c); -+ return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a, -- (int32x4_t) __b, __c); -+ return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a, -- (int64x2_t) __b, __c); -+ return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -23748,7 +22784,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c); -+ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); - } - - /* vsri */ -@@ -23780,29 +22816,25 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a, -- (int8x8_t) __b, __c); -+ return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a, -- (int16x4_t) __b, __c); -+ return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a, -- (int32x2_t) __b, __c); -+ return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a, -- (int64x1_t) __b, __c); -+ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -@@ -23832,29 +22864,25 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a, -- (int8x16_t) __b, __c); -+ return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a, -- (int16x8_t) __b, __c); -+ return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a, -- (int32x4_t) __b, __c); -+ return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a, -- (int64x2_t) __b, __c); -+ return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -@@ -23866,7 +22894,7 @@ - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); -+ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); - } - - /* vst1 */ -@@ -24970,6 +23998,438 @@ - - /* vtrn */ - -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vtrn1_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vtrn1_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vtrn1_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vtrn1_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vtrn1q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vtrn1q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vtrn1q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vtrn1q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vtrn1q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vtrn1q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vtrn2_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vtrn2_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vtrn2_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vtrn2_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vtrn2q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vtrn2q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vtrn2q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vtrn2q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vtrn2q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vtrn2q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ - __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) - vtrn_f32 (float32x2_t a, float32x2_t b) - { -@@ -25083,19 +24543,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vtst_s8 (int8x8_t __a, int8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); -+ return (uint8x8_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vtst_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); -+ return (uint16x4_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vtst_s32 (int32x2_t __a, int32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); -+ return (uint32x2_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -25107,22 +24567,19 @@ - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vtst_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vtst_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vtst_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -25134,53 +24591,49 @@ - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vtstq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); -+ return (uint8x16_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vtstq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); -+ return (uint16x8_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vtstq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); -+ return (uint32x4_t) ((__a & __b) != 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vtstq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); -+ return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0)); - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vtstq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vtstq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vtstq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return ((__a & __b) != 0); - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vtstq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, -- (int64x2_t) __b); -+ return ((__a & __b) != __AARCH64_UINT64_C (0)); - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -@@ -25200,73 +24653,73 @@ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vuqadd_s8 (int8x8_t __a, uint8x8_t __b) - { -- return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); -+ return __builtin_aarch64_suqaddv8qi_ssu (__a, __b); - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vuqadd_s16 (int16x4_t __a, uint16x4_t __b) - { -- return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); -+ return __builtin_aarch64_suqaddv4hi_ssu (__a, __b); - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vuqadd_s32 (int32x2_t __a, uint32x2_t __b) - { -- return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); -+ return __builtin_aarch64_suqaddv2si_ssu (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vuqadd_s64 (int64x1_t __a, uint64x1_t __b) - { -- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); -+ return __builtin_aarch64_suqadddi_ssu (__a, __b); - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) - { -- return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); -+ return __builtin_aarch64_suqaddv16qi_ssu (__a, __b); - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) - { -- return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); -+ return __builtin_aarch64_suqaddv8hi_ssu (__a, __b); - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) - { -- return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); -+ return __builtin_aarch64_suqaddv4si_ssu (__a, __b); - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) - { -- return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); -+ return __builtin_aarch64_suqaddv2di_ssu (__a, __b); - } - - __extension__ static __inline int8_t __attribute__ ((__always_inline__)) - vuqaddb_s8 (int8_t __a, uint8_t __b) - { -- return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b); -+ return __builtin_aarch64_suqaddqi_ssu (__a, __b); - } - - __extension__ static __inline int16_t __attribute__ ((__always_inline__)) - vuqaddh_s16 (int16_t __a, uint16_t __b) - { -- return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b); -+ return __builtin_aarch64_suqaddhi_ssu (__a, __b); - } - - __extension__ static __inline int32_t __attribute__ ((__always_inline__)) - vuqadds_s32 (int32_t __a, uint32_t __b) - { -- return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b); -+ return __builtin_aarch64_suqaddsi_ssu (__a, __b); - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) - { -- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); -+ return __builtin_aarch64_suqadddi_ssu (__a, __b); - } - - #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ -@@ -25300,10 +24753,880 @@ - - /* vuzp */ - -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vuzp1_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vuzp1_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vuzp1_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vuzp1_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vuzp1q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vuzp1q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vuzp1q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vuzp1q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vuzp1q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vuzp1q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vuzp2_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vuzp2_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vuzp2_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vuzp2_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vuzp2q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vuzp2q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vuzp2q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vuzp2q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vuzp2q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vuzp2q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ - __INTERLEAVE_LIST (uzp) - - /* vzip */ - -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vzip1_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vzip1_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vzip1_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vzip1_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vzip1_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vzip1_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vzip1_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vzip1_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vzip1_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vzip1q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vzip1q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vzip1q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vzip1q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vzip1q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vzip1q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vzip2_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vzip2_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vzip2_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vzip2_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vzip2_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vzip2_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vzip2_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vzip2_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vzip2_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vzip2q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vzip2q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vzip2q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vzip2q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vzip2q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vzip2q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} -+ -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} -+ -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} -+ -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} -+ - __INTERLEAVE_LIST (zip) - - #undef __INTERLEAVE_LIST ---- a/src/gcc/config/aarch64/t-aarch64-linux -+++ b/src/gcc/config/aarch64/t-aarch64-linux -@@ -22,10 +22,7 @@ - LIB1ASMFUNCS = _aarch64_sync_cache_range - - AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be) --MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) -+MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) - MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu) - --# Disable the multilib for linux-gnu targets for the time being; focus --# on the baremetal targets. --MULTILIB_OPTIONS = --MULTILIB_DIRNAMES = -+MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32 ---- a/src/gcc/config/aarch64/aarch64.md -+++ b/src/gcc/config/aarch64/aarch64.md -@@ -67,7 +67,14 @@ - - (define_c_enum "unspec" [ - UNSPEC_CASESI -- UNSPEC_CLS -+ UNSPEC_CRC32B -+ UNSPEC_CRC32CB -+ UNSPEC_CRC32CH -+ UNSPEC_CRC32CW -+ UNSPEC_CRC32CX -+ UNSPEC_CRC32H -+ UNSPEC_CRC32W -+ UNSPEC_CRC32X - UNSPEC_FRECPE - UNSPEC_FRECPS - UNSPEC_FRECPX -@@ -83,8 +90,11 @@ - UNSPEC_GOTTINYPIC - UNSPEC_LD1 - UNSPEC_LD2 -+ UNSPEC_LD2_DUP - UNSPEC_LD3 -+ UNSPEC_LD3_DUP - UNSPEC_LD4 -+ UNSPEC_LD4_DUP - UNSPEC_MB - UNSPEC_NOP - UNSPEC_PRLG_STK -@@ -98,15 +108,24 @@ - UNSPEC_ST2 - UNSPEC_ST3 - UNSPEC_ST4 -+ UNSPEC_ST2_LANE -+ UNSPEC_ST3_LANE -+ UNSPEC_ST4_LANE - UNSPEC_TLS - UNSPEC_TLSDESC - UNSPEC_USHL_2S - UNSPEC_USHR64 - UNSPEC_VSTRUCTDUMMY -+ UNSPEC_SP_SET -+ UNSPEC_SP_TEST - ]) - - (define_c_enum "unspecv" [ - UNSPECV_EH_RETURN ; Represent EH_RETURN -+ UNSPECV_GET_FPCR ; Represent fetch of FPCR content. -+ UNSPECV_SET_FPCR ; Represent assign of FPCR content. -+ UNSPECV_GET_FPSR ; Represent fetch of FPSR content. -+ UNSPECV_SET_FPSR ; Represent assign of FPSR content. - ] - ) - -@@ -159,7 +178,7 @@ - - (define_attr "generic_sched" "yes,no" - (const (if_then_else -- (eq_attr "tune" "cortexa53,cortexa15") -+ (eq_attr "tune" "cortexa53,cortexa15,thunderx") - (const_string "no") - (const_string "yes")))) - -@@ -166,6 +185,7 @@ - ;; Scheduling - (include "../arm/cortex-a53.md") - (include "../arm/cortex-a15.md") -+(include "thunderx.md") - - ;; ------------------------------------------------------------------- - ;; Jumps and other miscellaneous insns -@@ -514,6 +534,10 @@ - (use (match_operand 2 "" ""))])] - "" - { -+ if (!REG_P (XEXP (operands[0], 0)) -+ && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) -+ XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0)); -+ - if (operands[2] == NULL_RTX) - operands[2] = const0_rtx; - } -@@ -527,6 +551,10 @@ - (use (match_operand 3 "" ""))])] - "" - { -+ if (!REG_P (XEXP (operands[1], 0)) -+ && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)) -+ XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); -+ - if (operands[3] == NULL_RTX) - operands[3] = const0_rtx; - } -@@ -533,25 +561,29 @@ - ) - - (define_insn "*sibcall_insn" -- [(call (mem:DI (match_operand:DI 0 "" "X")) -+ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 1 "" "")) - (return) - (use (match_operand 2 "" ""))] -- "GET_CODE (operands[0]) == SYMBOL_REF" -- "b\\t%a0" -- [(set_attr "type" "branch")] -- -+ "SIBLING_CALL_P (insn)" -+ "@ -+ br\\t%0 -+ b\\t%a0" -+ [(set_attr "type" "branch, branch")] - ) - - (define_insn "*sibcall_value_insn" - [(set (match_operand 0 "" "") -- (call (mem:DI (match_operand 1 "" "X")) -+ (call (mem:DI -+ (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 2 "" ""))) - (return) - (use (match_operand 3 "" ""))] -- "GET_CODE (operands[1]) == SYMBOL_REF" -- "b\\t%a1" -- [(set_attr "type" "branch")] -+ "SIBLING_CALL_P (insn)" -+ "@ -+ br\\t%1 -+ b\\t%a1" -+ [(set_attr "type" "branch, branch")] - ) - - ;; Call subroutine returning any type. -@@ -641,17 +673,20 @@ - if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) - operands[1] = force_reg (<MODE>mode, operands[1]); - -- if (CONSTANT_P (operands[1])) -- { -- aarch64_expand_mov_immediate (operands[0], operands[1]); -- DONE; -- } -+ /* FIXME: RR we still need to fix up what we are doing with -+ symbol_refs and other types of constants. */ -+ if (CONSTANT_P (operands[1]) -+ && !CONST_INT_P (operands[1])) -+ { -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ } - " - ) - --(define_insn "*movsi_aarch64" -- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w") -- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] -+(define_insn_and_split "*movsi_aarch64" -+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w") -+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] - "(register_operand (operands[0], SImode) - || aarch64_reg_or_zero (operands[1], SImode))" - "@ -@@ -659,6 +694,7 @@ - mov\\t%w0, %w1 - mov\\t%w0, %w1 - mov\\t%w0, %1 -+ # - ldr\\t%w0, %1 - ldr\\t%s0, %1 - str\\t%w1, %0 -@@ -668,14 +704,20 @@ - fmov\\t%s0, %w1 - fmov\\t%w0, %s1 - fmov\\t%s0, %s1" -- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ -- adr,adr,fmov,fmov,fmov") -- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] -+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)" -+ [(const_int 0)] -+ "{ -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ }" -+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ -+ adr,adr,f_mcr,f_mrc,fmov") -+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] - ) - --(define_insn "*movdi_aarch64" -- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") -- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] -+(define_insn_and_split "*movdi_aarch64" -+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w") -+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] - "(register_operand (operands[0], DImode) - || aarch64_reg_or_zero (operands[1], DImode))" - "@ -@@ -683,6 +725,7 @@ - mov\\t%0, %x1 - mov\\t%x0, %1 - mov\\t%x0, %1 -+ # - ldr\\t%x0, %1 - ldr\\t%d0, %1 - str\\t%x1, %0 -@@ -693,10 +736,16 @@ - fmov\\t%x0, %d1 - fmov\\t%d0, %d1 - movi\\t%d0, %1" -- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ -- adr,adr,fmov,fmov,fmov,fmov") -- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") -- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] -+ "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))" -+ [(const_int 0)] -+ "{ -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ }" -+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ -+ adr,adr,f_mcr,f_mrc,fmov,fmov") -+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") -+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] - ) - - (define_insn "insv_imm<mode>" -@@ -789,7 +838,7 @@ - str\\t%w1, %0 - mov\\t%w0, %w1" - [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ -- f_loads,f_stores,f_loads,f_stores,fmov")] -+ f_loads,f_stores,f_loads,f_stores,mov_reg")] - ) - - (define_insn "*movdf_aarch64" -@@ -863,6 +912,24 @@ - } - ) - -+;; 0 is dst -+;; 1 is src -+;; 2 is size of move in bytes -+;; 3 is alignment -+ -+(define_expand "movmemdi" -+ [(match_operand:BLK 0 "memory_operand") -+ (match_operand:BLK 1 "memory_operand") -+ (match_operand:DI 2 "immediate_operand") -+ (match_operand:DI 3 "immediate_operand")] -+ "!STRICT_ALIGNMENT" -+{ -+ if (aarch64_expand_movmem (operands)) -+ DONE; -+ FAIL; -+} -+) -+ - ;; Operands 1 and 3 are tied together by the final condition; so we allow - ;; fairly lax checking on the second memory operation. - (define_insn "load_pair<mode>" -@@ -923,31 +990,45 @@ - [(set_attr "type" "neon_store1_2reg<q>")] - ) - --;; Load pair with writeback. This is primarily used in function epilogues --;; when restoring [fp,lr] -+;; Load pair with post-index writeback. This is primarily used in function -+;; epilogues. - (define_insn "loadwb_pair<GPI:mode>_<P:mode>" - [(parallel - [(set (match_operand:P 0 "register_operand" "=k") - (plus:P (match_operand:P 1 "register_operand" "0") -- (match_operand:P 4 "const_int_operand" "n"))) -+ (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (match_operand:GPI 2 "register_operand" "=r") -- (mem:GPI (plus:P (match_dup 1) -- (match_dup 4)))) -+ (mem:GPI (match_dup 1))) - (set (match_operand:GPI 3 "register_operand" "=r") - (mem:GPI (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] -- "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" -+ "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" - "ldp\\t%<w>2, %<w>3, [%1], %4" - [(set_attr "type" "load2")] - ) - --;; Store pair with writeback. This is primarily used in function prologues --;; when saving [fp,lr] -+(define_insn "loadwb_pair<GPF:mode>_<P:mode>" -+ [(parallel -+ [(set (match_operand:P 0 "register_operand" "=k") -+ (plus:P (match_operand:P 1 "register_operand" "0") -+ (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) -+ (set (match_operand:GPF 2 "register_operand" "=w") -+ (mem:GPF (match_dup 1))) -+ (set (match_operand:GPF 3 "register_operand" "=w") -+ (mem:GPF (plus:P (match_dup 1) -+ (match_operand:P 5 "const_int_operand" "n"))))])] -+ "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)" -+ "ldp\\t%<w>2, %<w>3, [%1], %4" -+ [(set_attr "type" "neon_load1_2reg")] -+) -+ -+;; Store pair with pre-index writeback. This is primarily used in function -+;; prologues. - (define_insn "storewb_pair<GPI:mode>_<P:mode>" - [(parallel - [(set (match_operand:P 0 "register_operand" "=&k") - (plus:P (match_operand:P 1 "register_operand" "0") -- (match_operand:P 4 "const_int_operand" "n"))) -+ (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) - (set (mem:GPI (plus:P (match_dup 0) - (match_dup 4))) - (match_operand:GPI 2 "register_operand" "r")) -@@ -959,6 +1040,22 @@ - [(set_attr "type" "store2")] - ) - -+(define_insn "storewb_pair<GPF:mode>_<P:mode>" -+ [(parallel -+ [(set (match_operand:P 0 "register_operand" "=&k") -+ (plus:P (match_operand:P 1 "register_operand" "0") -+ (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) -+ (set (mem:GPF (plus:P (match_dup 0) -+ (match_dup 4))) -+ (match_operand:GPF 2 "register_operand" "w")) -+ (set (mem:GPF (plus:P (match_dup 0) -+ (match_operand:P 5 "const_int_operand" "n"))) -+ (match_operand:GPF 3 "register_operand" "w"))])] -+ "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)" -+ "stp\\t%<w>2, %<w>3, [%0, %4]!" -+ [(set_attr "type" "neon_store1_2reg<q>")] -+) -+ - ;; ------------------------------------------------------------------- - ;; Sign/Zero extension - ;; ------------------------------------------------------------------- -@@ -1063,16 +1160,18 @@ - - (define_insn "*addsi3_aarch64" - [(set -- (match_operand:SI 0 "register_operand" "=rk,rk,rk") -+ (match_operand:SI 0 "register_operand" "=rk,rk,w,rk") - (plus:SI -- (match_operand:SI 1 "register_operand" "%rk,rk,rk") -- (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))] -+ (match_operand:SI 1 "register_operand" "%rk,rk,w,rk") -+ (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))] - "" - "@ - add\\t%w0, %w1, %2 - add\\t%w0, %w1, %w2 -+ add\\t%0.2s, %1.2s, %2.2s - sub\\t%w0, %w1, #%n2" -- [(set_attr "type" "alu_imm,alu_reg,alu_imm")] -+ [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm") -+ (set_attr "simd" "*,*,yes,*")] - ) - - ;; zero_extend version of above -@@ -1106,7 +1205,26 @@ - (set_attr "simd" "*,*,*,yes")] - ) - --(define_insn "*add<mode>3_compare0" -+(define_expand "addti3" -+ [(set (match_operand:TI 0 "register_operand" "") -+ (plus:TI (match_operand:TI 1 "register_operand" "") -+ (match_operand:TI 2 "register_operand" "")))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]), -+ gen_lowpart (DImode, operands[2]))); -+ -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), -+ gen_highpart (DImode, operands[2]))); -+ -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) -+ -+(define_insn "add<mode>3_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ - (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") -@@ -1390,7 +1508,7 @@ - [(set_attr "type" "alu_ext")] - ) - --(define_insn "*add<mode>3_carryin" -+(define_insn "add<mode>3_carryin" - [(set - (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) -@@ -1558,8 +1676,26 @@ - (set_attr "simd" "*,yes")] - ) - -+(define_expand "subti3" -+ [(set (match_operand:TI 0 "register_operand" "") -+ (minus:TI (match_operand:TI 1 "register_operand" "") -+ (match_operand:TI 2 "register_operand" "")))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]), -+ gen_lowpart (DImode, operands[2]))); - --(define_insn "*sub<mode>3_compare0" -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), -+ gen_highpart (DImode, operands[2]))); -+ -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) -+ -+(define_insn "sub<mode>3_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "register_operand" "r")) -@@ -1706,7 +1842,7 @@ - [(set_attr "type" "alu_ext")] - ) - --(define_insn "*sub<mode>3_carryin" -+(define_insn "sub<mode>3_carryin" - [(set - (match_operand:GPI 0 "register_operand" "=r") - (minus:GPI (minus:GPI -@@ -1935,7 +2071,7 @@ - [(set_attr "type" "mul")] - ) - --(define_insn "*madd<mode>" -+(define_insn "madd<mode>" - [(set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "register_operand" "r")) -@@ -2045,6 +2181,48 @@ - [(set_attr "type" "<su>mull")] - ) - -+(define_expand "<su_optab>mulditi3" -+ [(set (match_operand:TI 0 "register_operand") -+ (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand")) -+ (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_muldi3 (low, operands[1], operands[2])); -+ -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2])); -+ -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) -+ -+;; The default expansion of multi3 using umuldi3_highpart will perform -+;; the additions in an order that fails to combine into two madd insns. -+(define_expand "multi3" -+ [(set (match_operand:TI 0 "register_operand") -+ (mult:TI (match_operand:TI 1 "register_operand") -+ (match_operand:TI 2 "register_operand")))] -+ "" -+{ -+ rtx l0 = gen_reg_rtx (DImode); -+ rtx l1 = gen_lowpart (DImode, operands[1]); -+ rtx l2 = gen_lowpart (DImode, operands[2]); -+ rtx h0 = gen_reg_rtx (DImode); -+ rtx h1 = gen_highpart (DImode, operands[1]); -+ rtx h2 = gen_highpart (DImode, operands[2]); -+ -+ emit_insn (gen_muldi3 (l0, l1, l2)); -+ emit_insn (gen_umuldi3_highpart (h0, l1, l2)); -+ emit_insn (gen_madddi (h0, h1, l2, h0)); -+ emit_insn (gen_madddi (h0, l1, h2, h0)); -+ -+ emit_move_insn (gen_lowpart (DImode, operands[0]), l0); -+ emit_move_insn (gen_highpart (DImode, operands[0]), h0); -+ DONE; -+}) -+ - (define_insn "<su>muldi3_highpart" - [(set (match_operand:DI 0 "register_operand" "=r") - (truncate:DI -@@ -2345,11 +2523,46 @@ - } - ) - -+(define_expand "mov<mode>cc" -+ [(set (match_operand:GPF 0 "register_operand" "") -+ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") -+ (match_operand:GPF 2 "register_operand" "") -+ (match_operand:GPF 3 "register_operand" "")))] -+ "" -+ { -+ rtx ccreg; -+ enum rtx_code code = GET_CODE (operands[1]); -+ -+ if (code == UNEQ || code == LTGT) -+ FAIL; -+ -+ ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), -+ XEXP (operands[1], 1)); -+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); -+ } -+) -+ -+ -+;; CRC32 instructions. -+(define_insn "aarch64_<crc_variant>" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (unspec:SI [(match_operand:SI 1 "register_operand" "r") -+ (match_operand:<crc_mode> 2 "register_operand" "r")] -+ CRC))] -+ "TARGET_CRC32" -+ { -+ if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) -+ return "<crc_variant>\\t%w0, %w1, %x2"; -+ else -+ return "<crc_variant>\\t%w0, %w1, %w2"; -+ } -+ [(set_attr "type" "crc")] -+) -+ - (define_insn "*csinc2<mode>_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") -- (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" -- [(match_operand:CC 3 "cc_register" "") (const_int 0)]) -- (match_operand:GPI 1 "register_operand" "r")))] -+ (plus:GPI (match_operand 2 "aarch64_comparison_operation" "") -+ (match_operand:GPI 1 "register_operand" "r")))] - "" - "csinc\\t%<w>0, %<w>1, %<w>1, %M2" - [(set_attr "type" "csel")] -@@ -2358,13 +2571,12 @@ - (define_insn "csinc3<mode>_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") - (if_then_else:GPI -- (match_operator:GPI 1 "aarch64_comparison_operator" -- [(match_operand:CC 2 "cc_register" "") (const_int 0)]) -- (plus:GPI (match_operand:GPI 3 "register_operand" "r") -+ (match_operand 1 "aarch64_comparison_operation" "") -+ (plus:GPI (match_operand:GPI 2 "register_operand" "r") - (const_int 1)) -- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] -+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))] - "" -- "csinc\\t%<w>0, %<w>4, %<w>3, %M1" -+ "csinc\\t%<w>0, %<w>3, %<w>2, %M1" - [(set_attr "type" "csel")] - ) - -@@ -2371,12 +2583,11 @@ - (define_insn "*csinv3<mode>_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") - (if_then_else:GPI -- (match_operator:GPI 1 "aarch64_comparison_operator" -- [(match_operand:CC 2 "cc_register" "") (const_int 0)]) -- (not:GPI (match_operand:GPI 3 "register_operand" "r")) -- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] -+ (match_operand 1 "aarch64_comparison_operation" "") -+ (not:GPI (match_operand:GPI 2 "register_operand" "r")) -+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))] - "" -- "csinv\\t%<w>0, %<w>4, %<w>3, %M1" -+ "csinv\\t%<w>0, %<w>3, %<w>2, %M1" - [(set_attr "type" "csel")] - ) - -@@ -2383,12 +2594,11 @@ - (define_insn "*csneg3<mode>_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") - (if_then_else:GPI -- (match_operator:GPI 1 "aarch64_comparison_operator" -- [(match_operand:CC 2 "cc_register" "") (const_int 0)]) -- (neg:GPI (match_operand:GPI 3 "register_operand" "r")) -- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] -+ (match_operand 1 "aarch64_comparison_operation" "") -+ (neg:GPI (match_operand:GPI 2 "register_operand" "r")) -+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))] - "" -- "csneg\\t%<w>0, %<w>4, %<w>3, %M1" -+ "csneg\\t%<w>0, %<w>3, %<w>2, %M1" - [(set_attr "type" "csel")] - ) - -@@ -2486,7 +2696,18 @@ - [(set_attr "type" "logic_shift_imm")] - ) - --;; zero_extend version of above -+(define_insn "*<optab>_rol<mode>3" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (LOGICAL:GPI (rotate:GPI -+ (match_operand:GPI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) -+ (match_operand:GPI 3 "register_operand" "r")))] -+ "" -+ "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)" -+ [(set_attr "type" "logic_shift_imm")] -+) -+ -+;; zero_extend versions of above - (define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI -@@ -2499,6 +2720,18 @@ - [(set_attr "type" "logic_shift_imm")] - ) - -+(define_insn "*<optab>_rolsi3_uxtw" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (LOGICAL:SI (rotate:SI -+ (match_operand:SI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) -+ (match_operand:SI 3 "register_operand" "r"))))] -+ "" -+ "<logical>\\t%w0, %w3, %w1, ror (32 - %2)" -+ [(set_attr "type" "logic_shift_imm")] -+) -+ - (define_insn "one_cmpl<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (not:GPI (match_operand:GPI 1 "register_operand" "r")))] -@@ -2622,7 +2855,7 @@ - - emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); - emit_insn (gen_clz<mode>2 (operands[0], operands[0])); -- emit_insn (gen_csinc3<mode>_insn (operands[0], x, ccreg, operands[0], const0_rtx)); -+ emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); - DONE; - } - ) -@@ -2629,7 +2862,7 @@ - - (define_insn "clrsb<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") -- (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))] -+ (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - "cls\\t%<w>0, %<w>1" - [(set_attr "type" "clz")] -@@ -3125,7 +3358,7 @@ - [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") - (match_operand 1 "const_int_operand" "n") - (const_int 0)) -- (zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r") -+ (zero_extract:GPI (match_operand:GPI 2 "register_operand" "r") - (match_dup 1) - (match_operand 3 "const_int_operand" "n")))] - "!(UINTVAL (operands[1]) == 0 -@@ -3180,6 +3413,38 @@ - [(set_attr "type" "rev")] - ) - -+;; There are no canonicalisation rules for the position of the lshiftrt, ashift -+;; operations within an IOR/AND RTX, therefore we have two patterns matching -+;; each valid permutation. -+ -+(define_insn "rev16<mode>2" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") -+ (const_int 8)) -+ (match_operand:GPI 3 "const_int_operand" "n")) -+ (and:GPI (lshiftrt:GPI (match_dup 1) -+ (const_int 8)) -+ (match_operand:GPI 2 "const_int_operand" "n"))))] -+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" -+ "rev16\\t%<w>0, %<w>1" -+ [(set_attr "type" "rev")] -+) -+ -+(define_insn "rev16<mode>2_alt" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") -+ (const_int 8)) -+ (match_operand:GPI 2 "const_int_operand" "n")) -+ (and:GPI (ashift:GPI (match_dup 1) -+ (const_int 8)) -+ (match_operand:GPI 3 "const_int_operand" "n"))))] -+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)" -+ "rev16\\t%<w>0, %<w>1" -+ [(set_attr "type" "rev")] -+) -+ - ;; zero_extend version of above - (define_insn "*bswapsi2_uxtw" - [(set (match_operand:DI 0 "register_operand" "=r") -@@ -3194,7 +3459,7 @@ - ;; ------------------------------------------------------------------- - - ;; frint floating-point round to integral standard patterns. --;; Expands to btrunc, ceil, floor, nearbyint, rint, round. -+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - - (define_insn "<frint_pattern><mode>2" - [(set (match_operand:GPF 0 "register_operand" "=w") -@@ -3305,20 +3570,24 @@ - [(set_attr "type" "f_cvtf2i")] - ) - --(define_insn "float<GPI:mode><GPF:mode>2" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (float:GPF (match_operand:GPI 1 "register_operand" "r")))] -- "TARGET_FLOAT" -- "scvtf\\t%<GPF:s>0, %<GPI:w>1" -- [(set_attr "type" "f_cvti2f")] -+(define_insn "<optab><fcvt_target><GPF:mode>2" -+ [(set (match_operand:GPF 0 "register_operand" "=w,w") -+ (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))] -+ "" -+ "@ -+ <su_optab>cvtf\t%<GPF:s>0, %<s>1 -+ <su_optab>cvtf\t%<GPF:s>0, %<w1>1" -+ [(set_attr "simd" "yes,no") -+ (set_attr "fp" "no,yes") -+ (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")] - ) - --(define_insn "floatuns<GPI:mode><GPF:mode>2" -+(define_insn "<optab><fcvt_iesize><GPF:mode>2" - [(set (match_operand:GPF 0 "register_operand" "=w") -- (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))] -+ (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))] - "TARGET_FLOAT" -- "ucvtf\\t%<GPF:s>0, %<GPI:w>1" -- [(set_attr "type" "f_cvt")] -+ "<su_optab>cvtf\t%<GPF:s>0, %<w2>1" -+ [(set_attr "type" "f_cvti2f")] - ) - - ;; ------------------------------------------------------------------- -@@ -3490,7 +3759,7 @@ - (truncate:DI (match_operand:TI 1 "register_operand" "w"))))] - "reload_completed || reload_in_progress" - "fmov\\t%d0, %d1" -- [(set_attr "type" "f_mcr") -+ [(set_attr "type" "fmov") - (set_attr "length" "4") - ]) - -@@ -3588,36 +3857,63 @@ - [(set_attr "type" "call") - (set_attr "length" "16")]) - --(define_insn "tlsie_small" -- [(set (match_operand:DI 0 "register_operand" "=r") -- (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")] -+(define_insn "tlsie_small_<mode>" -+ [(set (match_operand:PTR 0 "register_operand" "=r") -+ (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")] - UNSPEC_GOTSMALLTLS))] - "" -- "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" -+ "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]" - [(set_attr "type" "load1") - (set_attr "length" "8")] - ) - --(define_insn "tlsle_small" -+(define_insn "tlsie_small_sidi" - [(set (match_operand:DI 0 "register_operand" "=r") -- (unspec:DI [(match_operand:DI 1 "register_operand" "r") -- (match_operand:DI 2 "aarch64_tls_le_symref" "S")] -+ (zero_extend:DI -+ (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")] -+ UNSPEC_GOTSMALLTLS)))] -+ "" -+ "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]" -+ [(set_attr "type" "load1") -+ (set_attr "length" "8")] -+) -+ -+(define_expand "tlsle_small" -+ [(set (match_operand 0 "register_operand" "=r") -+ (unspec [(match_operand 1 "register_operand" "r") -+ (match_operand 2 "aarch64_tls_le_symref" "S")] -+ UNSPEC_GOTSMALLTLS))] -+ "" -+{ -+ enum machine_mode mode = GET_MODE (operands[0]); -+ emit_insn ((mode == DImode -+ ? gen_tlsle_small_di -+ : gen_tlsle_small_si) (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) -+ -+(define_insn "tlsle_small_<mode>" -+ [(set (match_operand:P 0 "register_operand" "=r") -+ (unspec:P [(match_operand:P 1 "register_operand" "r") -+ (match_operand 2 "aarch64_tls_le_symref" "S")] - UNSPEC_GOTSMALLTLS))] - "" -- "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" -+ "add\\t%<w>0, %<w>1, #%G2\;add\\t%<w>0, %<w>0, #%L2" - [(set_attr "type" "alu_reg") - (set_attr "length" "8")] - ) - --(define_insn "tlsdesc_small" -- [(set (reg:DI R0_REGNUM) -- (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] -+(define_insn "tlsdesc_small_<mode>" -+ [(set (reg:PTR R0_REGNUM) -+ (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] - UNSPEC_TLSDESC)) - (clobber (reg:DI LR_REGNUM)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:DI 1 "=r"))] - "TARGET_TLS_DESC" -- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" -+ "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" - [(set_attr "type" "call") - (set_attr "length" "16")]) - -@@ -3642,6 +3938,135 @@ - DONE; - }) - -+;; Named patterns for stack smashing protection. -+(define_expand "stack_protect_set" -+ [(match_operand 0 "memory_operand") -+ (match_operand 1 "memory_operand")] -+ "" -+{ -+ enum machine_mode mode = GET_MODE (operands[0]); -+ -+ emit_insn ((mode == DImode -+ ? gen_stack_protect_set_di -+ : gen_stack_protect_set_si) (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "stack_protect_set_<mode>" -+ [(set (match_operand:PTR 0 "memory_operand" "=m") -+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] -+ UNSPEC_SP_SET)) -+ (set (match_scratch:PTR 2 "=&r") (const_int 0))] -+ "" -+ "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0" -+ [(set_attr "length" "12") -+ (set_attr "type" "multiple")]) -+ -+(define_expand "stack_protect_test" -+ [(match_operand 0 "memory_operand") -+ (match_operand 1 "memory_operand") -+ (match_operand 2)] -+ "" -+{ -+ rtx result; -+ enum machine_mode mode = GET_MODE (operands[0]); -+ -+ result = gen_reg_rtx(mode); -+ -+ emit_insn ((mode == DImode -+ ? gen_stack_protect_test_di -+ : gen_stack_protect_test_si) (result, -+ operands[0], -+ operands[1])); -+ -+ if (mode == DImode) -+ emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), -+ result, const0_rtx, operands[2])); -+ else -+ emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), -+ result, const0_rtx, operands[2])); -+ DONE; -+}) -+ -+(define_insn "stack_protect_test_<mode>" -+ [(set (match_operand:PTR 0 "register_operand" "=r") -+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m") -+ (match_operand:PTR 2 "memory_operand" "m")] -+ UNSPEC_SP_TEST)) -+ (clobber (match_scratch:PTR 3 "=&r"))] -+ "" -+ "ldr\t%<w>3, %x1\;ldr\t%<w>0, %x2\;eor\t%<w>0, %<w>3, %<w>0" -+ [(set_attr "length" "12") -+ (set_attr "type" "multiple")]) -+ -+;; Write Floating-point Control Register. -+(define_insn "set_fpcr" -+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)] -+ "" -+ "msr\\tfpcr, %0" -+ [(set_attr "type" "mrs")]) -+ -+;; Read Floating-point Control Register. -+(define_insn "get_fpcr" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))] -+ "" -+ "mrs\\t%0, fpcr" -+ [(set_attr "type" "mrs")]) -+ -+;; Write Floating-point Status Register. -+(define_insn "set_fpsr" -+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)] -+ "" -+ "msr\\tfpsr, %0" -+ [(set_attr "type" "mrs")]) -+ -+;; Read Floating-point Status Register. -+(define_insn "get_fpsr" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))] -+ "" -+ "mrs\\t%0, fpsr" -+ [(set_attr "type" "mrs")]) -+ -+ -+;; Define the subtract-one-and-jump insns so loop.c -+;; knows what to generate. -+(define_expand "doloop_end" -+ [(use (match_operand 0 "" "")) ; loop pseudo -+ (use (match_operand 1 "" ""))] ; label -+ "optimize > 0 && flag_modulo_sched" -+{ -+ rtx s0; -+ rtx bcomp; -+ rtx loc_ref; -+ rtx cc_reg; -+ rtx insn; -+ rtx cmp; -+ -+ /* Currently SMS relies on the do-loop pattern to recognize loops -+ where (1) the control part consists of all insns defining and/or -+ using a certain 'count' register and (2) the loop count can be -+ adjusted by modifying this register prior to the loop. -+ ??? The possible introduction of a new block to initialize the -+ new IV can potentially affect branch optimizations. */ -+ -+ if (GET_MODE (operands[0]) != DImode) -+ FAIL; -+ -+ s0 = operands [0]; -+ insn = emit_insn (gen_adddi3_compare0 (s0, s0, GEN_INT (-1))); -+ -+ cmp = XVECEXP (PATTERN (insn), 0, 0); -+ cc_reg = SET_DEST (cmp); -+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); -+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]); -+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, -+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, -+ loc_ref, pc_rtx))); -+ DONE; -+}) -+ - ;; AdvSIMD Stuff - (include "aarch64-simd.md") - ---- a/src/gcc/config/aarch64/t-aarch64 -+++ b/src/gcc/config/aarch64/t-aarch64 -@@ -31,10 +31,17 @@ - $(SYSTEM_H) coretypes.h $(TM_H) \ - $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \ - $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \ -- $(srcdir)/config/aarch64/aarch64-simd-builtins.def -+ $(srcdir)/config/aarch64/aarch64-simd-builtins.def \ -+ aarch64-builtin-iterators.h - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-builtins.c - -+aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ -+ $(srcdir)/config/aarch64/iterators.md -+ $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ -+ $(srcdir)/config/aarch64/iterators.md > \ -+ aarch64-builtin-iterators.h -+ - aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ - coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H) - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ---- a/src/gcc/config/aarch64/arm_acle.h -+++ b/src/gcc/config/aarch64/arm_acle.h -@@ -0,0 +1,90 @@ -+/* AArch64 Non-NEON ACLE intrinsics include file. -+ -+ Copyright (C) 2014 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+#ifndef _GCC_ARM_ACLE_H -+#define _GCC_ARM_ACLE_H -+ -+#include <stdint.h> -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#ifdef __ARM_FEATURE_CRC32 -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32b (uint32_t __a, uint8_t __b) -+{ -+ return __builtin_aarch64_crc32b (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32cb (uint32_t __a, uint8_t __b) -+{ -+ return __builtin_aarch64_crc32cb (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32ch (uint32_t __a, uint16_t __b) -+{ -+ return __builtin_aarch64_crc32ch (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32cw (uint32_t __a, uint32_t __b) -+{ -+ return __builtin_aarch64_crc32cw (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32cd (uint32_t __a, uint64_t __b) -+{ -+ return __builtin_aarch64_crc32cx (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32h (uint32_t __a, uint16_t __b) -+{ -+ return __builtin_aarch64_crc32h (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32w (uint32_t __a, uint32_t __b) -+{ -+ return __builtin_aarch64_crc32w (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__crc32d (uint32_t __a, uint64_t __b) -+{ -+ return __builtin_aarch64_crc32x (__a, __b); -+} -+ -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif ---- a/src/gcc/config/aarch64/aarch64-cost-tables.h -+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h -@@ -0,0 +1,131 @@ -+/* RTX cost tables for AArch64. -+ -+ Copyright (C) 2014 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ <http://www.gnu.org/licenses/>. */ -+ -+#ifndef GCC_AARCH64_COST_TABLES_H -+#define GCC_AARCH64_COST_TABLES_H -+ -+#include "config/arm/aarch-cost-tables.h" -+ -+/* ThunderX does not have implement AArch32. */ -+const struct cpu_cost_table thunderx_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* Arith. */ -+ 0, /* Logical. */ -+ 0, /* Shift. */ -+ 0, /* Shift_reg. */ -+ COSTS_N_INSNS (1), /* Arith_shift. */ -+ COSTS_N_INSNS (1), /* Arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* UNUSED: Log_shift. */ -+ COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */ -+ 0, /* Extend. */ -+ COSTS_N_INSNS (1), /* Extend_arith. */ -+ 0, /* Bfi. */ -+ 0, /* Bfx. */ -+ COSTS_N_INSNS (5), /* Clz. */ -+ 0, /* rev. */ -+ 0, /* UNUSED: non_exec. */ -+ false /* UNUSED: non_exec_costs_exec. */ -+ }, -+ { -+ /* MULT SImode */ -+ { -+ COSTS_N_INSNS (3), /* Simple. */ -+ 0, /* Flag_setting. */ -+ 0, /* Extend. */ -+ 0, /* Add. */ -+ COSTS_N_INSNS (1), /* Extend_add. */ -+ COSTS_N_INSNS (21) /* Idiv. */ -+ }, -+ /* MULT DImode */ -+ { -+ COSTS_N_INSNS (3), /* Simple. */ -+ 0, /* Flag_setting. */ -+ 0, /* Extend. */ -+ 0, /* Add. */ -+ COSTS_N_INSNS (1), /* Extend_add. */ -+ COSTS_N_INSNS (37) /* Idiv. */ -+ }, -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (2), /* Load. */ -+ COSTS_N_INSNS (2), /* Load_sign_extend. */ -+ COSTS_N_INSNS (2), /* Ldrd. */ -+ 0, /* N/A: Ldm_1st. */ -+ 0, /* N/A: Ldm_regs_per_insn_1st. */ -+ 0, /* N/A: Ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (3), /* Loadf. */ -+ COSTS_N_INSNS (3), /* Loadd. */ -+ 0, /* N/A: Load_unaligned. */ -+ 0, /* Store. */ -+ 0, /* Strd. */ -+ 0, /* N/A: Stm_1st. */ -+ 0, /* N/A: Stm_regs_per_insn_1st. */ -+ 0, /* N/A: Stm_regs_per_insn_subsequent. */ -+ 0, /* Storef. */ -+ 0, /* Stored. */ -+ COSTS_N_INSNS (1) /* Store_unaligned. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (11), /* Div. */ -+ COSTS_N_INSNS (5), /* Mult. */ -+ COSTS_N_INSNS (5), /* Mult_addsub. */ -+ COSTS_N_INSNS (5), /* Fma. */ -+ COSTS_N_INSNS (3), /* Addsub. */ -+ 0, /* Fpconst. */ -+ COSTS_N_INSNS (1), /* Neg. */ -+ 0, /* Compare. */ -+ COSTS_N_INSNS (5), /* Widen. */ -+ COSTS_N_INSNS (5), /* Narrow. */ -+ COSTS_N_INSNS (5), /* Toint. */ -+ COSTS_N_INSNS (5), /* Fromint. */ -+ COSTS_N_INSNS (1) /* Roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (21), /* Div. */ -+ COSTS_N_INSNS (5), /* Mult. */ -+ COSTS_N_INSNS (5), /* Mult_addsub. */ -+ COSTS_N_INSNS (5), /* Fma. */ -+ COSTS_N_INSNS (3), /* Addsub. */ -+ 0, /* Fpconst. */ -+ COSTS_N_INSNS (1), /* Neg. */ -+ 0, /* Compare. */ -+ COSTS_N_INSNS (5), /* Widen. */ -+ COSTS_N_INSNS (5), /* Narrow. */ -+ COSTS_N_INSNS (5), /* Toint. */ -+ COSTS_N_INSNS (5), /* Fromint. */ -+ COSTS_N_INSNS (1) /* Roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* Alu. */ -+ } -+}; -+ -+ -+ -+#endif -+ ---- a/src/gcc/config/aarch64/aarch64-cores.def -+++ b/src/gcc/config/aarch64/aarch64-cores.def -@@ -34,9 +34,10 @@ - - /* V8 Architecture Processors. */ - --AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53) --AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) -+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa53) -+AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57) -+AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx) - - /* V8 big.LITTLE implementations. */ - --AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) -+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57) ---- a/src/gcc/config/aarch64/atomics.md -+++ b/src/gcc/config/aarch64/atomics.md -@@ -119,7 +119,7 @@ - [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") - (unspec_volatile:ALLI - [(atomic_op:ALLI (match_dup 0) -- (match_operand:ALLI 1 "<atomic_op_operand>" "rn")) -+ (match_operand:ALLI 1 "<atomic_op_operand>" "r<lconst_atomic>")) - (match_operand:SI 2 "const_int_operand")] ;; model - UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -@@ -141,7 +141,7 @@ - (unspec_volatile:ALLI - [(not:ALLI - (and:ALLI (match_dup 0) -- (match_operand:ALLI 1 "aarch64_logical_operand" "rn"))) -+ (match_operand:ALLI 1 "aarch64_logical_operand" "r<lconst_atomic>"))) - (match_operand:SI 2 "const_int_operand")] ;; model - UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -@@ -164,7 +164,7 @@ - (set (match_dup 1) - (unspec_volatile:ALLI - [(atomic_op:ALLI (match_dup 1) -- (match_operand:ALLI 2 "<atomic_op_operand>" "rn")) -+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -@@ -188,7 +188,7 @@ - (unspec_volatile:ALLI - [(not:ALLI - (and:ALLI (match_dup 1) -- (match_operand:ALLI 2 "aarch64_logical_operand" "rn"))) -+ (match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>"))) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -@@ -209,7 +209,7 @@ - [(set (match_operand:ALLI 0 "register_operand" "=&r") - (atomic_op:ALLI - (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") -- (match_operand:ALLI 2 "<atomic_op_operand>" "rn"))) -+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>"))) - (set (match_dup 1) - (unspec_volatile:ALLI - [(match_dup 1) (match_dup 2) -@@ -233,7 +233,7 @@ - (not:ALLI - (and:ALLI - (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") -- (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))) -+ (match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>")))) - (set (match_dup 1) - (unspec_volatile:ALLI - [(match_dup 1) (match_dup 2) ---- a/src/gcc/config/aarch64/aarch64-tune.md -+++ b/src/gcc/config/aarch64/aarch64-tune.md -@@ -1,5 +1,5 @@ - ;; -*- buffer-read-only: t -*- - ;; Generated automatically by gentune.sh from aarch64-cores.def - (define_attr "tune" -- "cortexa53,cortexa15,cortexa57cortexa53" -+ "cortexa53,cortexa15,thunderx,cortexa57cortexa53" - (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) ---- a/src/gcc/config/aarch64/aarch64-builtins.c -+++ b/src/gcc/config/aarch64/aarch64-builtins.c -@@ -47,52 +47,27 @@ - #include "gimple.h" - #include "gimple-iterator.h" - --enum aarch64_simd_builtin_type_mode --{ -- T_V8QI, -- T_V4HI, -- T_V2SI, -- T_V2SF, -- T_DI, -- T_DF, -- T_V16QI, -- T_V8HI, -- T_V4SI, -- T_V4SF, -- T_V2DI, -- T_V2DF, -- T_TI, -- T_EI, -- T_OI, -- T_XI, -- T_SI, -- T_SF, -- T_HI, -- T_QI, -- T_MAX --}; -- --#define v8qi_UP T_V8QI --#define v4hi_UP T_V4HI --#define v2si_UP T_V2SI --#define v2sf_UP T_V2SF --#define di_UP T_DI --#define df_UP T_DF --#define v16qi_UP T_V16QI --#define v8hi_UP T_V8HI --#define v4si_UP T_V4SI --#define v4sf_UP T_V4SF --#define v2di_UP T_V2DI --#define v2df_UP T_V2DF --#define ti_UP T_TI --#define ei_UP T_EI --#define oi_UP T_OI --#define xi_UP T_XI --#define si_UP T_SI --#define sf_UP T_SF --#define hi_UP T_HI --#define qi_UP T_QI -- -+#define v8qi_UP V8QImode -+#define v4hi_UP V4HImode -+#define v2si_UP V2SImode -+#define v2sf_UP V2SFmode -+#define di_UP DImode -+#define df_UP DFmode -+#define v16qi_UP V16QImode -+#define v8hi_UP V8HImode -+#define v4si_UP V4SImode -+#define v4sf_UP V4SFmode -+#define v2di_UP V2DImode -+#define v2df_UP V2DFmode -+#define ti_UP TImode -+#define ei_UP EImode -+#define oi_UP OImode -+#define ci_UP CImode -+#define xi_UP XImode -+#define si_UP SImode -+#define sf_UP SFmode -+#define hi_UP HImode -+#define qi_UP QImode - #define UP(X) X##_UP - - #define SIMD_MAX_BUILTIN_ARGS 5 -@@ -107,8 +82,6 @@ - qualifier_const = 0x2, /* 1 << 1 */ - /* T *foo. */ - qualifier_pointer = 0x4, /* 1 << 2 */ -- /* const T *foo. */ -- qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer */ - /* Used when expanding arguments if an operand could - be an immediate. */ - qualifier_immediate = 0x8, /* 1 << 3 */ -@@ -123,7 +96,7 @@ - qualifier_map_mode = 0x80, /* 1 << 7 */ - /* qualifier_pointer | qualifier_map_mode */ - qualifier_pointer_map_mode = 0x84, -- /* qualifier_const_pointer | qualifier_map_mode */ -+ /* qualifier_const | qualifier_pointer | qualifier_map_mode */ - qualifier_const_pointer_map_mode = 0x86, - /* Polynomial types. */ - qualifier_poly = 0x100 -@@ -132,7 +105,7 @@ - typedef struct - { - const char *name; -- enum aarch64_simd_builtin_type_mode mode; -+ enum machine_mode mode; - const enum insn_code code; - unsigned int fcode; - enum aarch64_type_qualifiers *qualifiers; -@@ -147,16 +120,49 @@ - = { qualifier_unsigned, qualifier_unsigned }; - #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) - #define TYPES_CREATE (aarch64_types_unop_qualifiers) --#define TYPES_REINTERP (aarch64_types_unop_qualifiers) -+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_unsigned }; -+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_poly }; -+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_none }; -+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_poly, qualifier_none }; -+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; - #define TYPES_BINOP (aarch64_types_binop_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_none, -+ qualifier_internal, qualifier_internal }; -+#define TYPES_TST (aarch64_types_cmtst_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_void, qualifier_none, qualifier_none }; -+#define TYPES_BINOPV (aarch64_types_binopv_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; - #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; -+#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_unsigned }; -+#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_poly, qualifier_poly, qualifier_poly }; - #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) -@@ -172,10 +178,10 @@ - #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) - - static enum aarch64_type_qualifiers --aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_none, -- qualifier_none, qualifier_none }; --#define TYPES_QUADOP (aarch64_types_quadop_qualifiers) -+ qualifier_none, qualifier_immediate }; -+#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers) - - static enum aarch64_type_qualifiers - aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] -@@ -183,9 +189,14 @@ - #define TYPES_GETLANE (aarch64_types_getlane_qualifiers) - #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_none, qualifier_immediate }; -+#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; - #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) -+ - static enum aarch64_type_qualifiers - aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; -@@ -194,6 +205,13 @@ - #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) - - static enum aarch64_type_qualifiers -+aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, -+ qualifier_immediate }; -+#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) -+ -+ -+static enum aarch64_type_qualifiers - aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_none }; - #define TYPES_COMBINE (aarch64_types_combine_qualifiers) -@@ -230,6 +248,11 @@ - = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; - #define TYPES_STORE1 (aarch64_types_store1_qualifiers) - #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_void, qualifier_pointer_map_mode, -+ qualifier_none, qualifier_none }; -+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) - - #define CF0(N, X) CODE_FOR_aarch64_##N##X - #define CF1(N, X) CODE_FOR_##N##X##1 -@@ -239,7 +262,7 @@ - #define CF10(N, X) CODE_FOR_##N##X - - #define VAR1(T, N, MAP, A) \ -- {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T}, -+ {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T}, - #define VAR2(T, N, MAP, A, B) \ - VAR1 (T, N, MAP, A) \ - VAR1 (T, N, MAP, B) -@@ -274,96 +297,34 @@ - VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ - VAR1 (T, N, MAP, L) - --/* BUILTIN_<ITERATOR> macros should expand to cover the same range of -- modes as is given for each define_mode_iterator in -- config/aarch64/iterators.md. */ -+#include "aarch64-builtin-iterators.h" - --#define BUILTIN_DX(T, N, MAP) \ -- VAR2 (T, N, MAP, di, df) --#define BUILTIN_GPF(T, N, MAP) \ -- VAR2 (T, N, MAP, sf, df) --#define BUILTIN_SDQ_I(T, N, MAP) \ -- VAR4 (T, N, MAP, qi, hi, si, di) --#define BUILTIN_SD_HSI(T, N, MAP) \ -- VAR2 (T, N, MAP, hi, si) --#define BUILTIN_V2F(T, N, MAP) \ -- VAR2 (T, N, MAP, v2sf, v2df) --#define BUILTIN_VALL(T, N, MAP) \ -- VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ -- v4si, v2di, v2sf, v4sf, v2df) --#define BUILTIN_VALLDI(T, N, MAP) \ -- VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ -- v4si, v2di, v2sf, v4sf, v2df, di) --#define BUILTIN_VALLDIF(T, N, MAP) \ -- VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ -- v4si, v2di, v2sf, v4sf, v2df, di, df) --#define BUILTIN_VB(T, N, MAP) \ -- VAR2 (T, N, MAP, v8qi, v16qi) --#define BUILTIN_VD(T, N, MAP) \ -- VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf) --#define BUILTIN_VDC(T, N, MAP) \ -- VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) --#define BUILTIN_VDIC(T, N, MAP) \ -- VAR3 (T, N, MAP, v8qi, v4hi, v2si) --#define BUILTIN_VDN(T, N, MAP) \ -- VAR3 (T, N, MAP, v4hi, v2si, di) --#define BUILTIN_VDQ(T, N, MAP) \ -- VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) --#define BUILTIN_VDQF(T, N, MAP) \ -- VAR3 (T, N, MAP, v2sf, v4sf, v2df) --#define BUILTIN_VDQH(T, N, MAP) \ -- VAR2 (T, N, MAP, v4hi, v8hi) --#define BUILTIN_VDQHS(T, N, MAP) \ -- VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si) --#define BUILTIN_VDQIF(T, N, MAP) \ -- VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) --#define BUILTIN_VDQM(T, N, MAP) \ -- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) --#define BUILTIN_VDQV(T, N, MAP) \ -- VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si) --#define BUILTIN_VDQQH(T, N, MAP) \ -- VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi) --#define BUILTIN_VDQ_BHSI(T, N, MAP) \ -- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) --#define BUILTIN_VDQ_I(T, N, MAP) \ -- VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) --#define BUILTIN_VDW(T, N, MAP) \ -- VAR3 (T, N, MAP, v8qi, v4hi, v2si) --#define BUILTIN_VD_BHSI(T, N, MAP) \ -- VAR3 (T, N, MAP, v8qi, v4hi, v2si) --#define BUILTIN_VD_HSI(T, N, MAP) \ -- VAR2 (T, N, MAP, v4hi, v2si) --#define BUILTIN_VD_RE(T, N, MAP) \ -- VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) --#define BUILTIN_VQ(T, N, MAP) \ -- VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df) --#define BUILTIN_VQN(T, N, MAP) \ -- VAR3 (T, N, MAP, v8hi, v4si, v2di) --#define BUILTIN_VQW(T, N, MAP) \ -- VAR3 (T, N, MAP, v16qi, v8hi, v4si) --#define BUILTIN_VQ_HSI(T, N, MAP) \ -- VAR2 (T, N, MAP, v8hi, v4si) --#define BUILTIN_VQ_S(T, N, MAP) \ -- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) --#define BUILTIN_VSDQ_HSI(T, N, MAP) \ -- VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si) --#define BUILTIN_VSDQ_I(T, N, MAP) \ -- VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) --#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \ -- VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) --#define BUILTIN_VSDQ_I_DI(T, N, MAP) \ -- VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) --#define BUILTIN_VSD_HSI(T, N, MAP) \ -- VAR4 (T, N, MAP, v4hi, v2si, hi, si) --#define BUILTIN_VSQN_HSDI(T, N, MAP) \ -- VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di) --#define BUILTIN_VSTRUCT(T, N, MAP) \ -- VAR3 (T, N, MAP, oi, ci, xi) -- - static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { - #include "aarch64-simd-builtins.def" - }; - -+/* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ -+#define AARCH64_CRC32_BUILTINS \ -+ CRC32_BUILTIN (crc32b, QI) \ -+ CRC32_BUILTIN (crc32h, HI) \ -+ CRC32_BUILTIN (crc32w, SI) \ -+ CRC32_BUILTIN (crc32x, DI) \ -+ CRC32_BUILTIN (crc32cb, QI) \ -+ CRC32_BUILTIN (crc32ch, HI) \ -+ CRC32_BUILTIN (crc32cw, SI) \ -+ CRC32_BUILTIN (crc32cx, DI) -+ -+typedef struct -+{ -+ const char *name; -+ enum machine_mode mode; -+ const enum insn_code icode; -+ unsigned int fcode; -+} aarch64_crc_builtin_datum; -+ -+#define CRC32_BUILTIN(N, M) \ -+ AARCH64_BUILTIN_##N, -+ - #undef VAR1 - #define VAR1(T, N, MAP, A) \ - AARCH64_SIMD_BUILTIN_##T##_##N##A, -@@ -371,13 +332,32 @@ - enum aarch64_builtins - { - AARCH64_BUILTIN_MIN, -+ -+ AARCH64_BUILTIN_GET_FPCR, -+ AARCH64_BUILTIN_SET_FPCR, -+ AARCH64_BUILTIN_GET_FPSR, -+ AARCH64_BUILTIN_SET_FPSR, -+ - AARCH64_SIMD_BUILTIN_BASE, - #include "aarch64-simd-builtins.def" - AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE - + ARRAY_SIZE (aarch64_simd_builtin_data), -+ AARCH64_CRC32_BUILTIN_BASE, -+ AARCH64_CRC32_BUILTINS -+ AARCH64_CRC32_BUILTIN_MAX, - AARCH64_BUILTIN_MAX - }; - -+#undef CRC32_BUILTIN -+#define CRC32_BUILTIN(N, M) \ -+ {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, -+ -+static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { -+ AARCH64_CRC32_BUILTINS -+}; -+ -+#undef CRC32_BUILTIN -+ - static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; - - #define NUM_DREG_TYPES 6 -@@ -639,25 +619,10 @@ - bool print_type_signature_p = false; - char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; - aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; -- const char *const modenames[] = -- { -- "v8qi", "v4hi", "v2si", "v2sf", "di", "df", -- "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", -- "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi" -- }; -- const enum machine_mode modes[] = -- { -- V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode, -- V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode, -- V2DFmode, TImode, EImode, OImode, XImode, SImode, -- SFmode, HImode, QImode -- }; - char namebuf[60]; - tree ftype = NULL; - tree fndecl = NULL; - -- gcc_assert (ARRAY_SIZE (modenames) == T_MAX); -- - d->fcode = fcode; - - /* We must track two variables here. op_num is -@@ -705,7 +670,7 @@ - /* Some builtins have different user-facing types - for certain arguments, encoded in d->mode. */ - if (qualifiers & qualifier_map_mode) -- op_mode = modes[d->mode]; -+ op_mode = d->mode; - - /* For pointers, we want a pointer to the basic type - of the vector. */ -@@ -737,11 +702,11 @@ - gcc_assert (ftype != NULL); - - if (print_type_signature_p) -- snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s", -- d->name, modenames[d->mode], type_signature); -+ snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s", -+ d->name, type_signature); - else -- snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s", -- d->name, modenames[d->mode]); -+ snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", -+ d->name); - - fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, - NULL, NULL_TREE); -@@ -749,11 +714,49 @@ - } - } - -+static void -+aarch64_init_crc32_builtins () -+{ -+ tree usi_type = aarch64_build_unsigned_type (SImode); -+ unsigned int i = 0; -+ -+ for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) -+ { -+ aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; -+ tree argtype = aarch64_build_unsigned_type (d->mode); -+ tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); -+ tree fndecl = add_builtin_function (d->name, ftype, d->fcode, -+ BUILT_IN_MD, NULL, NULL_TREE); -+ -+ aarch64_builtin_decls[d->fcode] = fndecl; -+ } -+} -+ - void - aarch64_init_builtins (void) - { -+ tree ftype_set_fpr -+ = build_function_type_list (void_type_node, unsigned_type_node, NULL); -+ tree ftype_get_fpr -+ = build_function_type_list (unsigned_type_node, NULL); -+ -+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] -+ = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, -+ AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); -+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] -+ = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, -+ AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); -+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] -+ = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, -+ AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); -+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] -+ = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, -+ AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); -+ - if (TARGET_SIMD) - aarch64_init_simd_builtins (); -+ if (TARGET_CRC32) -+ aarch64_init_crc32_builtins (); - } - - tree -@@ -774,9 +777,8 @@ - - static rtx - aarch64_simd_expand_args (rtx target, int icode, int have_retval, -- tree exp, ...) -+ tree exp, builtin_simd_arg *args) - { -- va_list ap; - rtx pat; - tree arg[SIMD_MAX_BUILTIN_ARGS]; - rtx op[SIMD_MAX_BUILTIN_ARGS]; -@@ -790,11 +792,9 @@ - || !(*insn_data[icode].operand[0].predicate) (target, tmode))) - target = gen_reg_rtx (tmode); - -- va_start (ap, exp); -- - for (;;) - { -- builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int); -+ builtin_simd_arg thisarg = args[argc]; - - if (thisarg == SIMD_ARG_STOP) - break; -@@ -818,8 +818,11 @@ - case SIMD_ARG_CONSTANT: - if (!(*insn_data[icode].operand[argc + have_retval].predicate) - (op[argc], mode[argc])) -+ { - error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, " - "expected %<const int%>", argc + 1); -+ return const0_rtx; -+ } - break; - - case SIMD_ARG_STOP: -@@ -830,8 +833,6 @@ - } - } - -- va_end (ap); -- - if (have_retval) - switch (argc) - { -@@ -886,7 +887,7 @@ - } - - if (!pat) -- return 0; -+ return NULL_RTX; - - emit_insn (pat); - -@@ -945,14 +946,45 @@ - /* The interface to aarch64_simd_expand_args expects a 0 if - the function is void, and a 1 if it is not. */ - return aarch64_simd_expand_args -- (target, icode, !is_void, exp, -- args[1], -- args[2], -- args[3], -- args[4], -- SIMD_ARG_STOP); -+ (target, icode, !is_void, exp, &args[1]); - } - -+rtx -+aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) -+{ -+ rtx pat; -+ aarch64_crc_builtin_datum *d -+ = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; -+ enum insn_code icode = d->icode; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ enum machine_mode tmode = insn_data[icode].operand[0].mode; -+ enum machine_mode mode0 = insn_data[icode].operand[1].mode; -+ enum machine_mode mode1 = insn_data[icode].operand[2].mode; -+ -+ if (! target -+ || GET_MODE (target) != tmode -+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) -+ && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); -+ -+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ pat = GEN_FCN (icode) (target, op0, op1); -+ if (!pat) -+ return NULL_RTX; -+ -+ emit_insn (pat); -+ return target; -+} -+ - /* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient. */ - rtx -@@ -964,11 +996,43 @@ - { - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - int fcode = DECL_FUNCTION_CODE (fndecl); -+ int icode; -+ rtx pat, op0; -+ tree arg0; - -- if (fcode >= AARCH64_SIMD_BUILTIN_BASE) -+ switch (fcode) -+ { -+ case AARCH64_BUILTIN_GET_FPCR: -+ case AARCH64_BUILTIN_SET_FPCR: -+ case AARCH64_BUILTIN_GET_FPSR: -+ case AARCH64_BUILTIN_SET_FPSR: -+ if ((fcode == AARCH64_BUILTIN_GET_FPCR) -+ || (fcode == AARCH64_BUILTIN_GET_FPSR)) -+ { -+ icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? -+ CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; -+ target = gen_reg_rtx (SImode); -+ pat = GEN_FCN (icode) (target); -+ } -+ else -+ { -+ target = NULL_RTX; -+ icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? -+ CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ pat = GEN_FCN (icode) (op0); -+ } -+ emit_insn (pat); -+ return target; -+ } -+ -+ if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) - return aarch64_simd_expand_builtin (fcode, exp, target); -+ else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) -+ return aarch64_crc32_expand_builtin (fcode, exp, target); - -- return NULL_RTX; -+ gcc_unreachable (); - } - - tree -@@ -1086,7 +1150,29 @@ - - return aarch64_builtin_decls[builtin]; - } -- -+ case BUILT_IN_BSWAP16: -+#undef AARCH64_CHECK_BUILTIN_MODE -+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ -+ (out_mode == N##Imode && out_n == C \ -+ && in_mode == N##Imode && in_n == C) -+ if (AARCH64_CHECK_BUILTIN_MODE (4, H)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; -+ else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP32: -+ if (AARCH64_CHECK_BUILTIN_MODE (2, S)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; -+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP64: -+ if (AARCH64_CHECK_BUILTIN_MODE (2, D)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; -+ else -+ return NULL_TREE; - default: - return NULL_TREE; - } -@@ -1111,22 +1197,25 @@ - BUILTIN_VALLDI (UNOP, abs, 2) - return fold_build1 (ABS_EXPR, type, args[0]); - break; -- BUILTIN_VALLDI (BINOP, cmge, 0) -- return fold_build2 (GE_EXPR, type, args[0], args[1]); -- break; -- BUILTIN_VALLDI (BINOP, cmgt, 0) -- return fold_build2 (GT_EXPR, type, args[0], args[1]); -- break; -- BUILTIN_VALLDI (BINOP, cmeq, 0) -- return fold_build2 (EQ_EXPR, type, args[0], args[1]); -- break; -- BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) -- { -- tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]); -- tree vec_zero_node = build_zero_cst (type); -- return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); -- break; -- } -+ VAR1 (REINTERP_SS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) -+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) -+ VAR1 (REINTERP_US, reinterpretdi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) -+ VAR1 (REINTERP_PS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) -+ return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]); - VAR1 (UNOP, floatv2si, 2, v2sf) - VAR1 (UNOP, floatv4si, 2, v4sf) - VAR1 (UNOP, floatv2di, 2, v2df) -@@ -1146,6 +1235,20 @@ - tree call = gimple_call_fn (stmt); - tree fndecl; - gimple new_stmt = NULL; -+ -+ /* The operations folded below are reduction operations. These are -+ defined to leave their result in the 0'th element (from the perspective -+ of GCC). The architectural instruction we are folding will leave the -+ result in the 0'th element (from the perspective of the architecture). -+ For big-endian systems, these perspectives are not aligned. -+ -+ It is therefore wrong to perform this fold on big-endian. There -+ are some tricks we could play with shuffling, but the mid-end is -+ inconsistent in the way it treats reduction operations, so we will -+ end up in difficulty. Until we fix the ambiguity - just bail out. */ -+ if (BYTES_BIG_ENDIAN) -+ return false; -+ - if (call) - { - fndecl = gimple_call_fndecl (stmt); -@@ -1196,43 +1299,108 @@ - return changed; - } - -+void -+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) -+{ -+ const unsigned AARCH64_FE_INVALID = 1; -+ const unsigned AARCH64_FE_DIVBYZERO = 2; -+ const unsigned AARCH64_FE_OVERFLOW = 4; -+ const unsigned AARCH64_FE_UNDERFLOW = 8; -+ const unsigned AARCH64_FE_INEXACT = 16; -+ const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID -+ | AARCH64_FE_DIVBYZERO -+ | AARCH64_FE_OVERFLOW -+ | AARCH64_FE_UNDERFLOW -+ | AARCH64_FE_INEXACT); -+ const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; -+ tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; -+ tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; -+ tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; -+ tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; -+ -+ /* Generate the equivalence of : -+ unsigned int fenv_cr; -+ fenv_cr = __builtin_aarch64_get_fpcr (); -+ -+ unsigned int fenv_sr; -+ fenv_sr = __builtin_aarch64_get_fpsr (); -+ -+ Now set all exceptions to non-stop -+ unsigned int mask_cr -+ = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); -+ unsigned int masked_cr; -+ masked_cr = fenv_cr & mask_cr; -+ -+ And clear all exception flags -+ unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; -+ unsigned int masked_cr; -+ masked_sr = fenv_sr & mask_sr; -+ -+ __builtin_aarch64_set_cr (masked_cr); -+ __builtin_aarch64_set_sr (masked_sr); */ -+ -+ fenv_cr = create_tmp_var (unsigned_type_node, NULL); -+ fenv_sr = create_tmp_var (unsigned_type_node, NULL); -+ -+ get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; -+ set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; -+ get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; -+ set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; -+ -+ mask_cr = build_int_cst (unsigned_type_node, -+ ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); -+ mask_sr = build_int_cst (unsigned_type_node, -+ ~(AARCH64_FE_ALL_EXCEPT)); -+ -+ ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, -+ fenv_cr, build_call_expr (get_fpcr, 0)); -+ ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, -+ fenv_sr, build_call_expr (get_fpsr, 0)); -+ -+ masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); -+ masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); -+ -+ hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); -+ hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); -+ -+ hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, -+ hold_fnclex_sr); -+ masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, -+ masked_fenv_sr); -+ ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); -+ -+ *hold = build2 (COMPOUND_EXPR, void_type_node, -+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), -+ hold_fnclex); -+ -+ /* Store the value of masked_fenv to clear the exceptions: -+ __builtin_aarch64_set_fpsr (masked_fenv_sr); */ -+ -+ *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); -+ -+ /* Generate the equivalent of : -+ unsigned int new_fenv_var; -+ new_fenv_var = __builtin_aarch64_get_fpsr (); -+ -+ __builtin_aarch64_set_fpsr (fenv_sr); -+ -+ __atomic_feraiseexcept (new_fenv_var); */ -+ -+ new_fenv_var = create_tmp_var (unsigned_type_node, NULL); -+ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, -+ new_fenv_var, build_call_expr (get_fpsr, 0)); -+ restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); -+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); -+ update_call = build_call_expr (atomic_feraiseexcept, 1, -+ fold_convert (integer_type_node, new_fenv_var)); -+ *update = build2 (COMPOUND_EXPR, void_type_node, -+ build2 (COMPOUND_EXPR, void_type_node, -+ reload_fenv, restore_fnenv), update_call); -+} -+ -+ - #undef AARCH64_CHECK_BUILTIN_MODE - #undef AARCH64_FIND_FRINT_VARIANT --#undef BUILTIN_DX --#undef BUILTIN_SDQ_I --#undef BUILTIN_SD_HSI --#undef BUILTIN_V2F --#undef BUILTIN_VALL --#undef BUILTIN_VB --#undef BUILTIN_VD --#undef BUILTIN_VDC --#undef BUILTIN_VDIC --#undef BUILTIN_VDN --#undef BUILTIN_VDQ --#undef BUILTIN_VDQF --#undef BUILTIN_VDQH --#undef BUILTIN_VDQHS --#undef BUILTIN_VDQIF --#undef BUILTIN_VDQM --#undef BUILTIN_VDQV --#undef BUILTIN_VDQ_BHSI --#undef BUILTIN_VDQ_I --#undef BUILTIN_VDW --#undef BUILTIN_VD_BHSI --#undef BUILTIN_VD_HSI --#undef BUILTIN_VD_RE --#undef BUILTIN_VQ --#undef BUILTIN_VQN --#undef BUILTIN_VQW --#undef BUILTIN_VQ_HSI --#undef BUILTIN_VQ_S --#undef BUILTIN_VSDQ_HSI --#undef BUILTIN_VSDQ_I --#undef BUILTIN_VSDQ_I_BHSI --#undef BUILTIN_VSDQ_I_DI --#undef BUILTIN_VSD_HSI --#undef BUILTIN_VSQN_HSDI --#undef BUILTIN_VSTRUCT - #undef CF0 - #undef CF1 - #undef CF2 -@@ -1251,3 +1419,4 @@ - #undef VAR10 - #undef VAR11 - -+#include "gt-aarch64-builtins.h" ---- a/src/gcc/config/aarch64/thunderx.md -+++ b/src/gcc/config/aarch64/thunderx.md -@@ -0,0 +1,260 @@ -+;; Cavium ThunderX pipeline description -+;; Copyright (C) 2014 Free Software Foundation, Inc. -+;; -+;; Written by Andrew Pinski <apinski@cavium.com> -+ -+;; This file is part of GCC. -+ -+;; GCC is free software; you can redistribute it and/or modify -+;; it under the terms of the GNU General Public License as published by -+;; the Free Software Foundation; either version 3, or (at your option) -+;; any later version. -+ -+;; GCC is distributed in the hope that it will be useful, -+;; but WITHOUT ANY WARRANTY; without even the implied warranty of -+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+;; GNU General Public License for more details. -+ -+;; You should have received a copy of the GNU General Public License -+;; along with GCC; see the file COPYING3. If not see -+;; <http://www.gnu.org/licenses/>. -+;; Copyright (C) 2004, 2005, 2006 Cavium Networks. -+ -+ -+;; Thunder is a dual-issue processor that can issue all instructions on -+;; pipe0 and a subset on pipe1. -+ -+ -+(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd") -+ -+(define_cpu_unit "thunderx_pipe0" "thunderx_main") -+(define_cpu_unit "thunderx_pipe1" "thunderx_main") -+(define_cpu_unit "thunderx_mult" "thunderx_mult") -+(define_cpu_unit "thunderx_divide" "thunderx_divide") -+(define_cpu_unit "thunderx_simd" "thunderx_simd") -+ -+(define_insn_reservation "thunderx_add" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_reg,alus_imm,alus_reg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg")) -+ "thunderx_pipe0 | thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_shift" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "bfm,extend,shift_imm,shift_reg")) -+ "thunderx_pipe0 | thunderx_pipe1") -+ -+ -+;; Arthimentic instructions with an extra shift or extend is two cycles. -+;; FIXME: This needs more attributes on aarch64 than what is currently there; -+;; this is conserative for now. -+;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3) -+;; Except this is not correct as this is only for !(zero extend) -+ -+(define_insn_reservation "thunderx_arith_shift" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm")) -+ "thunderx_pipe0 | thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_csel" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "csel")) -+ "thunderx_pipe0 | thunderx_pipe1") -+ -+;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1 -+ -+(define_insn_reservation "thunderx_mul" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal")) -+ "thunderx_pipe1 + thunderx_mult") -+ -+;; Multiply high instructions take an extra cycle and cause the muliply unit to -+;; be busy for an extra cycle. -+ -+;(define_insn_reservation "thunderx_mul_high" 5 -+; (and (eq_attr "tune" "thunderx") -+; (eq_attr "type" "smull,umull")) -+; "thunderx_pipe1 + thunderx_mult") -+ -+(define_insn_reservation "thunderx_div32" 22 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "udiv,sdiv")) -+ "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21") -+ -+;(define_insn_reservation "thunderx_div64" 38 -+; (and (eq_attr "tune" "thunderx") -+; (eq_attr "type" "udiv,sdiv") -+; (eq_attr "mode" "DI")) -+; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34") -+ -+;; Stores take one cycle in pipe 0 -+(define_insn_reservation "thunderx_store" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "store1")) -+ "thunderx_pipe0") -+ -+;; Store pair are single issued -+(define_insn_reservation "thunderx_storepair" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "store2")) -+ "thunderx_pipe0 + thunderx_pipe1") -+ -+ -+;; loads (and load pairs) from L1 take 3 cycles in pipe 0 -+(define_insn_reservation "thunderx_load" 3 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "load1, load2")) -+ "thunderx_pipe0") -+ -+(define_insn_reservation "thunderx_brj" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "branch,trap,call")) -+ "thunderx_pipe1") -+ -+;; FPU -+ -+(define_insn_reservation "thunderx_fadd" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "faddd,fadds")) -+ "thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_fconst" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fconsts,fconstd")) -+ "thunderx_pipe1") -+ -+;; Moves between fp are 2 cycles including min/max/select/abs/neg -+(define_insn_reservation "thunderx_fmov" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths")) -+ "thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_fmovgpr" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "f_mrc, f_mcr")) -+ "thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_fmul" 6 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fmacs,fmacd,fmuls,fmuld")) -+ "thunderx_pipe1") -+ -+(define_insn_reservation "thunderx_fdivs" 12 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fdivs")) -+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*8") -+ -+(define_insn_reservation "thunderx_fdivd" 22 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fdivd")) -+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*18") -+ -+(define_insn_reservation "thunderx_fsqrts" 17 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fsqrts")) -+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*13") -+ -+(define_insn_reservation "thunderx_fsqrtd" 28 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "fsqrtd")) -+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*31") -+ -+;; The rounding conversion inside fp is 4 cycles -+(define_insn_reservation "thunderx_frint" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "f_rints,f_rintd")) -+ "thunderx_pipe1") -+ -+;; Float to integer with a move from int to/from float is 6 cycles -+(define_insn_reservation "thunderx_f_cvt" 6 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) -+ "thunderx_pipe1") -+ -+;; FP/SIMD load/stores happen in pipe 0 -+;; 64bit Loads register/pairs are 4 cycles from L1 -+(define_insn_reservation "thunderx_64simd_fp_load" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\ -+ neon_load1_1reg_q,neon_load1_2reg")) -+ "thunderx_pipe0") -+ -+;; 128bit load pair is singled issue and 4 cycles from L1 -+(define_insn_reservation "thunderx_128simd_pair_load" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_load1_2reg_q")) -+ "thunderx_pipe0+thunderx_pipe1") -+ -+;; FP/SIMD Stores takes one cycle in pipe 0 -+(define_insn_reservation "thunderx_simd_fp_store" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q")) -+ "thunderx_pipe0") -+ -+;; 64bit neon store pairs are single issue for one cycle -+(define_insn_reservation "thunderx_64neon_storepair" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_store1_2reg")) -+ "thunderx_pipe0 + thunderx_pipe1") -+ -+;; 128bit neon store pair are single issued for two cycles -+(define_insn_reservation "thunderx_128neon_storepair" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_store1_2reg_q")) -+ "(thunderx_pipe0 + thunderx_pipe1)*2") -+ -+ -+;; SIMD/NEON (q forms take an extra cycle) -+ -+;; Thunder simd move instruction types - 2/3 cycles -+(define_insn_reservation "thunderx_neon_move" 2 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \ -+ neon_fp_compare_d, neon_move")) -+ "thunderx_pipe1 + thunderx_simd") -+ -+(define_insn_reservation "thunderx_neon_move_q" 3 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \ -+ neon_fp_compare_d_q, neon_move_q")) -+ "thunderx_pipe1 + thunderx_simd, thunderx_simd") -+ -+ -+;; Thunder simd simple/add instruction types - 4/5 cycles -+ -+(define_insn_reservation "thunderx_neon_add" 4 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \ -+ neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \ -+ neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ -+ neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ -+ neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \ -+ neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d")) -+ "thunderx_pipe1 + thunderx_simd") -+ -+;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect -+ -+(define_insn_reservation "thunderx_neon_add_q" 5 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ -+ neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \ -+ neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \ -+ neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ -+ neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \ -+ neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \ -+ neon_add_long, neon_sub_long")) -+ "thunderx_pipe1 + thunderx_simd, thunderx_simd") -+ -+ -+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle -+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q") -+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q") -+ -+;; Assume both pipes are needed for unknown and multiple-instruction -+;; patterns. -+ -+(define_insn_reservation "thunderx_unknown" 1 -+ (and (eq_attr "tune" "thunderx") -+ (eq_attr "type" "untyped,multiple")) -+ "thunderx_pipe0 + thunderx_pipe1") -+ -+ ---- a/src/gcc/config/aarch64/aarch64-protos.h -+++ b/src/gcc/config/aarch64/aarch64-protos.h -@@ -108,9 +108,22 @@ - cost models and vectors for address cost calculations, register - move costs and memory move costs. */ - -+/* Scaled addressing modes can vary cost depending on the mode of the -+ value to be loaded/stored. QImode values cannot use scaled -+ addressing modes. */ -+ -+struct scale_addr_mode_cost -+{ -+ const int hi; -+ const int si; -+ const int di; -+ const int ti; -+}; -+ - /* Additional cost for addresses. */ - struct cpu_addrcost_table - { -+ const struct scale_addr_mode_cost addr_scale_costs; - const int pre_modify; - const int post_modify; - const int register_offset; -@@ -160,6 +173,7 @@ - }; - - HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); -+int aarch64_get_condition_code (rtx); - bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); - bool aarch64_cannot_change_mode_class (enum machine_mode, - enum machine_mode, -@@ -166,7 +180,9 @@ - enum reg_class); - enum aarch64_symbol_type - aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); -+bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); - bool aarch64_constant_address_p (rtx); -+bool aarch64_expand_movmem (rtx *); - bool aarch64_float_const_zero_rtx_p (rtx); - bool aarch64_function_arg_regno_p (unsigned); - bool aarch64_gen_movmemqi (rtx *); -@@ -175,9 +191,12 @@ - bool aarch64_is_long_call_p (rtx); - bool aarch64_label_mentioned_p (rtx); - bool aarch64_legitimate_pic_operand_p (rtx); -+bool aarch64_modes_tieable_p (enum machine_mode mode1, -+ enum machine_mode mode2); - bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); - bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, - enum machine_mode); -+bool aarch64_offset_7bit_signed_scaled_p (enum machine_mode, HOST_WIDE_INT); - char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); - char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); - bool aarch64_pad_arg_upward (enum machine_mode, const_tree); -@@ -184,6 +203,8 @@ - bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); - bool aarch64_regno_ok_for_base_p (int, bool); - bool aarch64_regno_ok_for_index_p (int, bool); -+bool aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode, -+ bool high); - bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); - bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); - bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); -@@ -200,6 +221,8 @@ - enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); - enum reg_class aarch64_regno_regclass (unsigned); - int aarch64_asm_preferred_eh_data_format (int, int); -+enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, -+ enum machine_mode); - int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); - int aarch64_hard_regno_nregs (unsigned, enum machine_mode); - int aarch64_simd_attr_length_move (rtx); -@@ -291,4 +314,5 @@ - extern void aarch64_final_prescan_insn (rtx); - extern bool - aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); -+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); - #endif /* GCC_AARCH64_PROTOS_H */ ---- a/src/gcc/config/aarch64/aarch64-simd-builtins.def -+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def -@@ -47,36 +47,44 @@ - VAR1 (UNOP, addp, 0, di) - BUILTIN_VDQ_BHSI (UNOP, clz, 2) - -- BUILTIN_VALL (GETLANE, get_lane, 0) -- VAR1 (GETLANE, get_lane, 0, di) - BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) - -- BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv2si, 0) -- BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) -- BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) -- BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) -- BUILTIN_VQ (REINTERP, reinterpretv4si, 0) -- BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) -- BUILTIN_VQ (REINTERP, reinterpretv2di, 0) -- BUILTIN_VQ (REINTERP, reinterpretv2df, 0) -+ VAR1 (REINTERP_SS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) - -- BUILTIN_VDQ_I (BINOP, dup_lane, 0) -+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) -+ -+ VAR1 (REINTERP_US, reinterpretdi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) -+ -+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) -+ -+ VAR1 (REINTERP_PS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) -+ - /* Implemented by aarch64_<sur>q<r>shl<mode>. */ - BUILTIN_VSDQ_I (BINOP, sqshl, 0) -- BUILTIN_VSDQ_I (BINOP, uqshl, 0) -+ BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0) - BUILTIN_VSDQ_I (BINOP, sqrshl, 0) -- BUILTIN_VSDQ_I (BINOP, uqrshl, 0) -+ BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0) - /* Implemented by aarch64_<su_optab><optab><mode>. */ - BUILTIN_VSDQ_I (BINOP, sqadd, 0) -- BUILTIN_VSDQ_I (BINOP, uqadd, 0) -+ BUILTIN_VSDQ_I (BINOPU, uqadd, 0) - BUILTIN_VSDQ_I (BINOP, sqsub, 0) -- BUILTIN_VSDQ_I (BINOP, uqsub, 0) -+ BUILTIN_VSDQ_I (BINOPU, uqsub, 0) - /* Implemented by aarch64_<sur>qadd<mode>. */ -- BUILTIN_VSDQ_I (BINOP, suqadd, 0) -- BUILTIN_VSDQ_I (BINOP, usqadd, 0) -+ BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0) -+ BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0) - - /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */ - BUILTIN_VDC (GETLANE, get_dregoi, 0) -@@ -98,6 +106,10 @@ - BUILTIN_VQ (LOADSTRUCT, ld2, 0) - BUILTIN_VQ (LOADSTRUCT, ld3, 0) - BUILTIN_VQ (LOADSTRUCT, ld4, 0) -+ /* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>. */ -+ BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0) -+ BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0) -+ BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0) - /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */ - BUILTIN_VDC (STORESTRUCT, st2, 0) - BUILTIN_VDC (STORESTRUCT, st3, 0) -@@ -107,6 +119,10 @@ - BUILTIN_VQ (STORESTRUCT, st3, 0) - BUILTIN_VQ (STORESTRUCT, st4, 0) - -+ BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) -+ BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) -+ BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) -+ - BUILTIN_VQW (BINOP, saddl2, 0) - BUILTIN_VQW (BINOP, uaddl2, 0) - BUILTIN_VQW (BINOP, ssubl2, 0) -@@ -142,19 +158,19 @@ - BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) - BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) - /* Implemented by aarch64_s<optab><mode>. */ -- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) -- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) -+ BUILTIN_VSDQ_I (UNOP, sqabs, 0) -+ BUILTIN_VSDQ_I (UNOP, sqneg, 0) - -- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) -- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) -- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0) -- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0) -+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_lane, 0) -+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_lane, 0) -+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_laneq, 0) -+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_laneq, 0) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0) -- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0) -- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0) -- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0) -- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0) -+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_lane, 0) -+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_lane, 0) -+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_laneq, 0) -+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_laneq, 0) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0) - /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */ -@@ -186,9 +202,9 @@ - BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) - /* Implemented by aarch64_<sur>shl<mode>. */ - BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) -- BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) -+ BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0) - BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) -- BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) -+ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0) - - BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) - VAR1 (SHIFTIMM, ashr_simd, 0, di) -@@ -196,15 +212,15 @@ - VAR1 (USHIFTIMM, lshr_simd, 0, di) - /* Implemented by aarch64_<sur>shr_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) -- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) -+ BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0) - /* Implemented by aarch64_<sur>sra_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) -- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) -+ BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) -- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) -+ BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0) - /* Implemented by aarch64_<sur>shll_n<mode>. */ - BUILTIN_VDW (SHIFTIMM, sshll_n, 0) -- BUILTIN_VDW (SHIFTIMM, ushll_n, 0) -+ BUILTIN_VDW (USHIFTIMM, ushll_n, 0) - /* Implemented by aarch64_<sur>shll2_n<mode>. */ - BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) - BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) -@@ -212,30 +228,19 @@ - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) -- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) -+ BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) -- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) -+ BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0) - /* Implemented by aarch64_<sur>s<lr>i_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) -- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) -+ BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) -- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) -+ BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0) - /* Implemented by aarch64_<sur>qshl<u>_n<mode>. */ -- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) -+ BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0) - BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) -- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) -+ BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0) - -- /* Implemented by aarch64_cm<cmp><mode>. */ -- BUILTIN_VALLDI (BINOP, cmeq, 0) -- BUILTIN_VALLDI (BINOP, cmge, 0) -- BUILTIN_VALLDI (BINOP, cmgt, 0) -- BUILTIN_VALLDI (BINOP, cmle, 0) -- BUILTIN_VALLDI (BINOP, cmlt, 0) -- /* Implemented by aarch64_cm<cmp><mode>. */ -- BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0) -- BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) -- BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) -- - /* Implemented by reduc_<sur>plus_<mode>. */ - BUILTIN_VALL (UNOP, reduc_splus_, 10) - BUILTIN_VDQ (UNOP, reduc_uplus_, 10) -@@ -265,7 +270,7 @@ - BUILTIN_VDQF (UNOP, nearbyint, 2) - BUILTIN_VDQF (UNOP, rint, 2) - BUILTIN_VDQF (UNOP, round, 2) -- BUILTIN_VDQF (UNOP, frintn, 2) -+ BUILTIN_VDQF_DF (UNOP, frintn, 2) - - /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ - VAR1 (UNOP, lbtruncv2sf, 2, v2si) -@@ -330,6 +335,10 @@ - VAR1 (UNOP, floatunsv4si, 2, v4sf) - VAR1 (UNOP, floatunsv2di, 2, v2df) - -+ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) -+ -+ BUILTIN_VB (UNOP, rbit, 0) -+ - /* Implemented by - aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ - BUILTIN_VALL (BINOP, zip1, 0) -@@ -393,3 +402,6 @@ - /* Implemented by aarch64_crypto_pmull<mode>. */ - VAR1 (BINOPP, crypto_pmull, 0, di) - VAR1 (BINOPP, crypto_pmull, 0, v2di) -+ -+ /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */ -+ VAR1 (BINOPV, im_lane_bound, 0, si) ---- a/src/gcc/config/aarch64/constraints.md -+++ b/src/gcc/config/aarch64/constraints.md -@@ -21,6 +21,9 @@ - (define_register_constraint "k" "STACK_REG" - "@internal The stack register.") - -+(define_register_constraint "Ucs" "CALLER_SAVE_REGS" -+ "@internal The caller save registers.") -+ - (define_register_constraint "w" "FP_REGS" - "Floating point and SIMD vector registers.") - -@@ -92,6 +95,10 @@ - (and (match_code "const_int") - (match_test "(unsigned HOST_WIDE_INT) ival < 64"))) - -+(define_constraint "Usf" -+ "@internal Usf is a symbol reference." -+ (match_code "symbol_ref")) -+ - (define_constraint "UsM" - "@internal - A constraint that matches the immediate constant -1." ---- a/src/gcc/config/aarch64/aarch64.c -+++ b/src/gcc/config/aarch64/aarch64.c -@@ -62,7 +62,8 @@ - #include "dwarf2.h" - #include "cfgloop.h" - #include "tree-vectorizer.h" --#include "config/arm/aarch-cost-tables.h" -+#include "aarch64-cost-tables.h" -+#include "dumpfile.h" - - /* Defined for convenience. */ - #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) -@@ -136,12 +137,13 @@ - static void aarch64_override_options_after_change (void); - static bool aarch64_vector_mode_supported_p (enum machine_mode); - static unsigned bit_count (unsigned HOST_WIDE_INT); --static bool aarch64_const_vec_all_same_int_p (rtx, -- HOST_WIDE_INT, HOST_WIDE_INT); -- - static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, - const unsigned char *sel); -+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool); - -+/* Major revision number of the ARM Architecture implemented by the target. */ -+unsigned aarch64_architecture_version; -+ - /* The processor for which instructions should be scheduled. */ - enum aarch64_processor aarch64_tune = cortexa53; - -@@ -171,6 +173,15 @@ - #endif - static const struct cpu_addrcost_table generic_addrcost_table = - { -+#if HAVE_DESIGNATED_INITIALIZERS -+ .addr_scale_costs = -+#endif -+ { -+ NAMED_PARAM (hi, 0), -+ NAMED_PARAM (si, 0), -+ NAMED_PARAM (di, 0), -+ NAMED_PARAM (ti, 0), -+ }, - NAMED_PARAM (pre_modify, 0), - NAMED_PARAM (post_modify, 0), - NAMED_PARAM (register_offset, 0), -@@ -181,14 +192,60 @@ - #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 - __extension__ - #endif -+static const struct cpu_addrcost_table cortexa57_addrcost_table = -+{ -+#if HAVE_DESIGNATED_INITIALIZERS -+ .addr_scale_costs = -+#endif -+ { -+ NAMED_PARAM (hi, 1), -+ NAMED_PARAM (si, 0), -+ NAMED_PARAM (di, 0), -+ NAMED_PARAM (ti, 1), -+ }, -+ NAMED_PARAM (pre_modify, 0), -+ NAMED_PARAM (post_modify, 0), -+ NAMED_PARAM (register_offset, 0), -+ NAMED_PARAM (register_extend, 0), -+ NAMED_PARAM (imm_offset, 0), -+}; -+ -+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 -+__extension__ -+#endif - static const struct cpu_regmove_cost generic_regmove_cost = - { - NAMED_PARAM (GP2GP, 1), - NAMED_PARAM (GP2FP, 2), - NAMED_PARAM (FP2GP, 2), -- /* We currently do not provide direct support for TFmode Q->Q move. -- Therefore we need to raise the cost above 2 in order to have -- reload handle the situation. */ -+ NAMED_PARAM (FP2FP, 2) -+}; -+ -+static const struct cpu_regmove_cost cortexa57_regmove_cost = -+{ -+ NAMED_PARAM (GP2GP, 1), -+ /* Avoid the use of slow int<->fp moves for spilling by setting -+ their cost higher than memmov_cost. */ -+ NAMED_PARAM (GP2FP, 5), -+ NAMED_PARAM (FP2GP, 5), -+ NAMED_PARAM (FP2FP, 2) -+}; -+ -+static const struct cpu_regmove_cost cortexa53_regmove_cost = -+{ -+ NAMED_PARAM (GP2GP, 1), -+ /* Avoid the use of slow int<->fp moves for spilling by setting -+ their cost higher than memmov_cost. */ -+ NAMED_PARAM (GP2FP, 5), -+ NAMED_PARAM (FP2GP, 5), -+ NAMED_PARAM (FP2FP, 2) -+}; -+ -+static const struct cpu_regmove_cost thunderx_regmove_cost = -+{ -+ NAMED_PARAM (GP2GP, 2), -+ NAMED_PARAM (GP2FP, 2), -+ NAMED_PARAM (FP2GP, 6), - NAMED_PARAM (FP2FP, 4) - }; - -@@ -212,9 +269,29 @@ - NAMED_PARAM (cond_not_taken_branch_cost, 1) - }; - -+/* Generic costs for vector insn classes. */ - #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 - __extension__ - #endif -+static const struct cpu_vector_cost cortexa57_vector_cost = -+{ -+ NAMED_PARAM (scalar_stmt_cost, 1), -+ NAMED_PARAM (scalar_load_cost, 4), -+ NAMED_PARAM (scalar_store_cost, 1), -+ NAMED_PARAM (vec_stmt_cost, 3), -+ NAMED_PARAM (vec_to_scalar_cost, 8), -+ NAMED_PARAM (scalar_to_vec_cost, 8), -+ NAMED_PARAM (vec_align_load_cost, 5), -+ NAMED_PARAM (vec_unalign_load_cost, 5), -+ NAMED_PARAM (vec_unalign_store_cost, 1), -+ NAMED_PARAM (vec_store_cost, 1), -+ NAMED_PARAM (cond_taken_branch_cost, 1), -+ NAMED_PARAM (cond_not_taken_branch_cost, 1) -+}; -+ -+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 -+__extension__ -+#endif - static const struct tune_params generic_tunings = - { - &cortexa57_extra_costs, -@@ -229,7 +306,7 @@ - { - &cortexa53_extra_costs, - &generic_addrcost_table, -- &generic_regmove_cost, -+ &cortexa53_regmove_cost, - &generic_vector_cost, - NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 2) -@@ -238,13 +315,23 @@ - static const struct tune_params cortexa57_tunings = - { - &cortexa57_extra_costs, -- &generic_addrcost_table, -- &generic_regmove_cost, -- &generic_vector_cost, -+ &cortexa57_addrcost_table, -+ &cortexa57_regmove_cost, -+ &cortexa57_vector_cost, - NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 3) - }; - -+static const struct tune_params thunderx_tunings = -+{ -+ &thunderx_extra_costs, -+ &generic_addrcost_table, -+ &thunderx_regmove_cost, -+ &generic_vector_cost, -+ NAMED_PARAM (memmov_cost, 6), -+ NAMED_PARAM (issue_rate, 2) -+}; -+ - /* A processor implementing AArch64. */ - struct processor - { -@@ -251,6 +338,7 @@ - const char *const name; - enum aarch64_processor core; - const char *arch; -+ unsigned architecture_version; - const unsigned long flags; - const struct tune_params *const tune; - }; -@@ -259,11 +347,13 @@ - static const struct processor all_cores[] = - { - #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ -- {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings}, -+ {NAME, IDENT, #ARCH, ARCH,\ -+ FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings}, - #include "aarch64-cores.def" - #undef AARCH64_CORE -- {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings}, -- {NULL, aarch64_none, NULL, 0, NULL} -+ {"generic", cortexa53, "8", 8,\ -+ AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings}, -+ {NULL, aarch64_none, NULL, 0, 0, NULL} - }; - - /* Architectures implementing AArch64. */ -@@ -270,10 +360,10 @@ - static const struct processor all_architectures[] = - { - #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ -- {NAME, CORE, #ARCH, FLAGS, NULL}, -+ {NAME, CORE, #ARCH, ARCH, FLAGS, NULL}, - #include "aarch64-arches.def" - #undef AARCH64_ARCH -- {NULL, aarch64_none, NULL, 0, NULL} -+ {NULL, aarch64_none, NULL, 0, 0, NULL} - }; - - /* Target specification. These are populated as commandline arguments -@@ -424,6 +514,24 @@ - return 0; - } - -+/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ -+enum machine_mode -+aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, -+ enum machine_mode mode) -+{ -+ /* Handle modes that fit within single registers. */ -+ if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) -+ { -+ if (GET_MODE_SIZE (mode) >= 4) -+ return mode; -+ else -+ return SImode; -+ } -+ /* Fall back to generic for multi-reg and very large modes. */ -+ else -+ return choose_hard_reg_mode (regno, nregs, false); -+} -+ - /* Return true if calls to DECL should be treated as - long-calls (ie called via a register). */ - static bool -@@ -444,7 +552,7 @@ - represent an expression that matches an extend operation. The - operands represent the paramters from - -- (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ -+ (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ - bool - aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, - rtx extract_imm) -@@ -636,12 +744,24 @@ - - case SYMBOL_SMALL_TLSDESC: - { -- rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); -+ enum machine_mode mode = GET_MODE (dest); -+ rtx x0 = gen_rtx_REG (mode, R0_REGNUM); - rtx tp; - -- emit_insn (gen_tlsdesc_small (imm)); -+ gcc_assert (mode == Pmode || mode == ptr_mode); -+ -+ /* In ILP32, the got entry is always of SImode size. Unlike -+ small GOT, the dest is fixed at reg 0. */ -+ if (TARGET_ILP32) -+ emit_insn (gen_tlsdesc_small_si (imm)); -+ else -+ emit_insn (gen_tlsdesc_small_di (imm)); - tp = aarch64_load_tp (NULL); -- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); -+ -+ if (mode != Pmode) -+ tp = gen_lowpart (mode, tp); -+ -+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); - set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); - return; - } -@@ -648,10 +768,34 @@ - - case SYMBOL_SMALL_GOTTPREL: - { -- rtx tmp_reg = gen_reg_rtx (Pmode); -+ /* In ILP32, the mode of dest can be either SImode or DImode, -+ while the got entry is always of SImode size. The mode of -+ dest depends on how dest is used: if dest is assigned to a -+ pointer (e.g. in the memory), it has SImode; it may have -+ DImode if dest is dereferenced to access the memeory. -+ This is why we have to handle three different tlsie_small -+ patterns here (two patterns for ILP32). */ -+ enum machine_mode mode = GET_MODE (dest); -+ rtx tmp_reg = gen_reg_rtx (mode); - rtx tp = aarch64_load_tp (NULL); -- emit_insn (gen_tlsie_small (tmp_reg, imm)); -- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); -+ -+ if (mode == ptr_mode) -+ { -+ if (mode == DImode) -+ emit_insn (gen_tlsie_small_di (tmp_reg, imm)); -+ else -+ { -+ emit_insn (gen_tlsie_small_si (tmp_reg, imm)); -+ tp = gen_lowpart (mode, tp); -+ } -+ } -+ else -+ { -+ gcc_assert (mode == Pmode); -+ emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); -+ } -+ -+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); - set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); - return; - } -@@ -889,10 +1033,10 @@ - return plus_constant (mode, reg, offset); - } - --void --aarch64_expand_mov_immediate (rtx dest, rtx imm) -+static int -+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, -+ machine_mode mode) - { -- enum machine_mode mode = GET_MODE (dest); - unsigned HOST_WIDE_INT mask; - int i; - bool first; -@@ -899,86 +1043,15 @@ - unsigned HOST_WIDE_INT val; - bool subtargets; - rtx subtarget; -- int one_match, zero_match; -+ int one_match, zero_match, first_not_ffff_match; -+ int num_insns = 0; - -- gcc_assert (mode == SImode || mode == DImode); -- -- /* Check on what type of symbol it is. */ -- if (GET_CODE (imm) == SYMBOL_REF -- || GET_CODE (imm) == LABEL_REF -- || GET_CODE (imm) == CONST) -- { -- rtx mem, base, offset; -- enum aarch64_symbol_type sty; -- -- /* If we have (const (plus symbol offset)), separate out the offset -- before we start classifying the symbol. */ -- split_const (imm, &base, &offset); -- -- sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); -- switch (sty) -- { -- case SYMBOL_FORCE_TO_MEM: -- if (offset != const0_rtx -- && targetm.cannot_force_const_mem (mode, imm)) -- { -- gcc_assert (can_create_pseudo_p ()); -- base = aarch64_force_temporary (mode, dest, base); -- base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); -- aarch64_emit_move (dest, base); -- return; -- } -- mem = force_const_mem (ptr_mode, imm); -- gcc_assert (mem); -- if (mode != ptr_mode) -- mem = gen_rtx_ZERO_EXTEND (mode, mem); -- emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); -- return; -- -- case SYMBOL_SMALL_TLSGD: -- case SYMBOL_SMALL_TLSDESC: -- case SYMBOL_SMALL_GOTTPREL: -- case SYMBOL_SMALL_GOT: -- case SYMBOL_TINY_GOT: -- if (offset != const0_rtx) -- { -- gcc_assert(can_create_pseudo_p ()); -- base = aarch64_force_temporary (mode, dest, base); -- base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); -- aarch64_emit_move (dest, base); -- return; -- } -- /* FALLTHRU */ -- -- case SYMBOL_SMALL_TPREL: -- case SYMBOL_SMALL_ABSOLUTE: -- case SYMBOL_TINY_ABSOLUTE: -- aarch64_load_symref_appropriately (dest, imm, sty); -- return; -- -- default: -- gcc_unreachable (); -- } -- } -- - if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) - { -- emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); -- return; -- } -- -- if (!CONST_INT_P (imm)) -- { -- if (GET_CODE (imm) == HIGH) -+ if (generate) - emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); -- else -- { -- rtx mem = force_const_mem (mode, imm); -- gcc_assert (mem); -- emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); -- } -- -- return; -+ num_insns++; -+ return num_insns; - } - - if (mode == SImode) -@@ -986,10 +1059,15 @@ - /* We know we can't do this in 1 insn, and we must be able to do it - in two; so don't mess around looking for sequences that don't buy - us anything. */ -- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); -- emit_insn (gen_insv_immsi (dest, GEN_INT (16), -- GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); -- return; -+ if (generate) -+ { -+ emit_insn (gen_rtx_SET (VOIDmode, dest, -+ GEN_INT (INTVAL (imm) & 0xffff))); -+ emit_insn (gen_insv_immsi (dest, GEN_INT (16), -+ GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); -+ } -+ num_insns += 2; -+ return num_insns; - } - - /* Remaining cases are all for DImode. */ -@@ -1000,29 +1078,34 @@ - one_match = 0; - zero_match = 0; - mask = 0xffff; -+ first_not_ffff_match = -1; - - for (i = 0; i < 64; i += 16, mask <<= 16) - { -- if ((val & mask) == 0) -- zero_match++; -- else if ((val & mask) == mask) -+ if ((val & mask) == mask) - one_match++; -+ else -+ { -+ if (first_not_ffff_match < 0) -+ first_not_ffff_match = i; -+ if ((val & mask) == 0) -+ zero_match++; -+ } - } - - if (one_match == 2) - { -- mask = 0xffff; -- for (i = 0; i < 64; i += 16, mask <<= 16) -+ /* Set one of the quarters and then insert back into result. */ -+ mask = 0xffffll << first_not_ffff_match; -+ if (generate) - { -- if ((val & mask) != mask) -- { -- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); -- emit_insn (gen_insv_immdi (dest, GEN_INT (i), -- GEN_INT ((val >> i) & 0xffff))); -- return; -- } -+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), -+ GEN_INT ((val >> first_not_ffff_match) -+ & 0xffff))); - } -- gcc_unreachable (); -+ num_insns += 2; -+ return num_insns; - } - - if (zero_match == 2) -@@ -1035,42 +1118,55 @@ - - if (aarch64_uimm12_shift (val - (val & mask))) - { -- subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -- -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); -- emit_insn (gen_adddi3 (dest, subtarget, -- GEN_INT (val - (val & mask)))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT (val & mask))); -+ emit_insn (gen_adddi3 (dest, subtarget, -+ GEN_INT (val - (val & mask)))); -+ } -+ num_insns += 2; -+ return num_insns; - } - else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) - { -- subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -- -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT ((val + comp) & mask))); -- emit_insn (gen_adddi3 (dest, subtarget, -- GEN_INT (val - ((val + comp) & mask)))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT ((val + comp) & mask))); -+ emit_insn (gen_adddi3 (dest, subtarget, -+ GEN_INT (val - ((val + comp) & mask)))); -+ } -+ num_insns += 2; -+ return num_insns; - } - else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) - { -- subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -- -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT ((val - comp) | ~mask))); -- emit_insn (gen_adddi3 (dest, subtarget, -- GEN_INT (val - ((val - comp) | ~mask)))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT ((val - comp) | ~mask))); -+ emit_insn (gen_adddi3 (dest, subtarget, -+ GEN_INT (val - ((val - comp) | ~mask)))); -+ } -+ num_insns += 2; -+ return num_insns; - } - else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) - { -- subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -- -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT (val | ~mask))); -- emit_insn (gen_adddi3 (dest, subtarget, -- GEN_INT (val - (val | ~mask)))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT (val | ~mask))); -+ emit_insn (gen_adddi3 (dest, subtarget, -+ GEN_INT (val - (val | ~mask)))); -+ } -+ num_insns += 2; -+ return num_insns; - } - } - -@@ -1084,12 +1180,16 @@ - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) - { -- subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT (aarch64_bitmasks[i]))); -- emit_insn (gen_adddi3 (dest, subtarget, -- GEN_INT (val - aarch64_bitmasks[i]))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT (aarch64_bitmasks[i]))); -+ emit_insn (gen_adddi3 (dest, subtarget, -+ GEN_INT (val - aarch64_bitmasks[i]))); -+ } -+ num_insns += 2; -+ return num_insns; - } - - for (j = 0; j < 64; j += 16, mask <<= 16) -@@ -1096,11 +1196,15 @@ - { - if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) - { -- emit_insn (gen_rtx_SET (VOIDmode, dest, -- GEN_INT (aarch64_bitmasks[i]))); -- emit_insn (gen_insv_immdi (dest, GEN_INT (j), -- GEN_INT ((val >> j) & 0xffff))); -- return; -+ if (generate) -+ { -+ emit_insn (gen_rtx_SET (VOIDmode, dest, -+ GEN_INT (aarch64_bitmasks[i]))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (j), -+ GEN_INT ((val >> j) & 0xffff))); -+ } -+ num_insns += 2; -+ return num_insns; - } - } - } -@@ -1115,12 +1219,16 @@ - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) - { -- subtarget = subtargets ? gen_reg_rtx (mode) : dest; -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT (aarch64_bitmasks[i]))); -- emit_insn (gen_iordi3 (dest, subtarget, -- GEN_INT (aarch64_bitmasks[j]))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (mode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT (aarch64_bitmasks[i]))); -+ emit_insn (gen_iordi3 (dest, subtarget, -+ GEN_INT (aarch64_bitmasks[j]))); -+ } -+ num_insns += 2; -+ return num_insns; - } - } - else if ((val & aarch64_bitmasks[i]) == val) -@@ -1130,17 +1238,44 @@ - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) - { -- -- subtarget = subtargets ? gen_reg_rtx (mode) : dest; -- emit_insn (gen_rtx_SET (VOIDmode, subtarget, -- GEN_INT (aarch64_bitmasks[j]))); -- emit_insn (gen_anddi3 (dest, subtarget, -- GEN_INT (aarch64_bitmasks[i]))); -- return; -+ if (generate) -+ { -+ subtarget = subtargets ? gen_reg_rtx (mode) : dest; -+ emit_insn (gen_rtx_SET (VOIDmode, subtarget, -+ GEN_INT (aarch64_bitmasks[j]))); -+ emit_insn (gen_anddi3 (dest, subtarget, -+ GEN_INT (aarch64_bitmasks[i]))); -+ } -+ num_insns += 2; -+ return num_insns; - } - } - } - -+ if (one_match > zero_match) -+ { -+ /* Set either first three quarters or all but the third. */ -+ mask = 0xffffll << (16 - first_not_ffff_match); -+ if (generate) -+ emit_insn (gen_rtx_SET (VOIDmode, dest, -+ GEN_INT (val | mask | 0xffffffff00000000ull))); -+ num_insns ++; -+ -+ /* Now insert other two quarters. */ -+ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); -+ i < 64; i += 16, mask <<= 16) -+ { -+ if ((val & mask) != mask) -+ { -+ if (generate) -+ emit_insn (gen_insv_immdi (dest, GEN_INT (i), -+ GEN_INT ((val >> i) & 0xffff))); -+ num_insns ++; -+ } -+ } -+ return num_insns; -+ } -+ - simple_sequence: - first = true; - mask = 0xffff; -@@ -1150,30 +1285,113 @@ - { - if (first) - { -- emit_insn (gen_rtx_SET (VOIDmode, dest, -- GEN_INT (val & mask))); -+ if (generate) -+ emit_insn (gen_rtx_SET (VOIDmode, dest, -+ GEN_INT (val & mask))); -+ num_insns ++; - first = false; - } - else -- emit_insn (gen_insv_immdi (dest, GEN_INT (i), -- GEN_INT ((val >> i) & 0xffff))); -+ { -+ if (generate) -+ emit_insn (gen_insv_immdi (dest, GEN_INT (i), -+ GEN_INT ((val >> i) & 0xffff))); -+ num_insns ++; -+ } - } - } -+ -+ return num_insns; - } - --static bool --aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) -+ -+void -+aarch64_expand_mov_immediate (rtx dest, rtx imm) - { -- /* Indirect calls are not currently supported. */ -- if (decl == NULL) -- return false; -+ machine_mode mode = GET_MODE (dest); - -- /* Cannot tail-call to long-calls, since these are outside of the -- range of a branch instruction (we could handle this if we added -- support for indirect tail-calls. */ -- if (aarch64_decl_is_long_call_p (decl)) -- return false; -+ gcc_assert (mode == SImode || mode == DImode); - -+ /* Check on what type of symbol it is. */ -+ if (GET_CODE (imm) == SYMBOL_REF -+ || GET_CODE (imm) == LABEL_REF -+ || GET_CODE (imm) == CONST) -+ { -+ rtx mem, base, offset; -+ enum aarch64_symbol_type sty; -+ -+ /* If we have (const (plus symbol offset)), separate out the offset -+ before we start classifying the symbol. */ -+ split_const (imm, &base, &offset); -+ -+ sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); -+ switch (sty) -+ { -+ case SYMBOL_FORCE_TO_MEM: -+ if (offset != const0_rtx -+ && targetm.cannot_force_const_mem (mode, imm)) -+ { -+ gcc_assert (can_create_pseudo_p ()); -+ base = aarch64_force_temporary (mode, dest, base); -+ base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); -+ aarch64_emit_move (dest, base); -+ return; -+ } -+ mem = force_const_mem (ptr_mode, imm); -+ gcc_assert (mem); -+ if (mode != ptr_mode) -+ mem = gen_rtx_ZERO_EXTEND (mode, mem); -+ emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); -+ return; -+ -+ case SYMBOL_SMALL_TLSGD: -+ case SYMBOL_SMALL_TLSDESC: -+ case SYMBOL_SMALL_GOTTPREL: -+ case SYMBOL_SMALL_GOT: -+ case SYMBOL_TINY_GOT: -+ if (offset != const0_rtx) -+ { -+ gcc_assert(can_create_pseudo_p ()); -+ base = aarch64_force_temporary (mode, dest, base); -+ base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); -+ aarch64_emit_move (dest, base); -+ return; -+ } -+ /* FALLTHRU */ -+ -+ case SYMBOL_SMALL_TPREL: -+ case SYMBOL_SMALL_ABSOLUTE: -+ case SYMBOL_TINY_ABSOLUTE: -+ aarch64_load_symref_appropriately (dest, imm, sty); -+ return; -+ -+ default: -+ gcc_unreachable (); -+ } -+ } -+ -+ if (!CONST_INT_P (imm)) -+ { -+ if (GET_CODE (imm) == HIGH) -+ emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); -+ else -+ { -+ rtx mem = force_const_mem (mode, imm); -+ gcc_assert (mem); -+ emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); -+ } -+ -+ return; -+ } -+ -+ aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); -+} -+ -+static bool -+aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, -+ tree exp ATTRIBUTE_UNUSED) -+{ -+ /* Currently, always true. */ - return true; - } - -@@ -1688,11 +1906,6 @@ - static bool - aarch64_frame_pointer_required (void) - { -- /* If the function contains dynamic stack allocations, we need to -- use the frame pointer to access the static parts of the frame. */ -- if (cfun->calls_alloca) -- return true; -- - /* In aarch64_override_options_after_change - flag_omit_leaf_frame_pointer turns off the frame pointer by - default. Turn it back on now if we've not got a leaf -@@ -1716,268 +1929,312 @@ - if (reload_completed && cfun->machine->frame.laid_out) - return; - -- cfun->machine->frame.fp_lr_offset = 0; -+#define SLOT_NOT_REQUIRED (-2) -+#define SLOT_REQUIRED (-1) - -+ cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER; -+ cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER; -+ - /* First mark all the registers that really need to be saved... */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) -- cfun->machine->frame.reg_offset[regno] = -1; -+ cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- cfun->machine->frame.reg_offset[regno] = -1; -+ cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; - - /* ... that includes the eh data registers (if needed)... */ - if (crtl->calls_eh_return) - for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) -- cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0; -+ cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] -+ = SLOT_REQUIRED; - - /* ... and any callee saved register that dataflow says is live. */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - if (df_regs_ever_live_p (regno) - && !call_used_regs[regno]) -- cfun->machine->frame.reg_offset[regno] = 0; -+ cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (df_regs_ever_live_p (regno) - && !call_used_regs[regno]) -- cfun->machine->frame.reg_offset[regno] = 0; -+ cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; - - if (frame_pointer_needed) - { -- cfun->machine->frame.reg_offset[R30_REGNUM] = 0; -+ /* FP and LR are placed in the linkage record. */ - cfun->machine->frame.reg_offset[R29_REGNUM] = 0; -+ cfun->machine->frame.wb_candidate1 = R29_REGNUM; -+ cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; -+ cfun->machine->frame.wb_candidate2 = R30_REGNUM; - cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; -+ offset += 2 * UNITS_PER_WORD; - } - - /* Now assign stack slots for them. */ -- for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++) -- if (cfun->machine->frame.reg_offset[regno] != -1) -+ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) -+ if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) - { - cfun->machine->frame.reg_offset[regno] = offset; -+ if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) -+ cfun->machine->frame.wb_candidate1 = regno; -+ else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER) -+ cfun->machine->frame.wb_candidate2 = regno; - offset += UNITS_PER_WORD; - } - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- if (cfun->machine->frame.reg_offset[regno] != -1) -+ if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) - { - cfun->machine->frame.reg_offset[regno] = offset; -+ if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) -+ cfun->machine->frame.wb_candidate1 = regno; -+ else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER -+ && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) -+ cfun->machine->frame.wb_candidate2 = regno; - offset += UNITS_PER_WORD; - } - -- if (frame_pointer_needed) -- { -- cfun->machine->frame.reg_offset[R29_REGNUM] = offset; -- offset += UNITS_PER_WORD; -- cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; -- } -- -- if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) -- { -- cfun->machine->frame.reg_offset[R30_REGNUM] = offset; -- offset += UNITS_PER_WORD; -- cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; -- } -- - cfun->machine->frame.padding0 = - (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); - offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); - - cfun->machine->frame.saved_regs_size = offset; -+ -+ cfun->machine->frame.hard_fp_offset -+ = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size -+ + get_frame_size () -+ + cfun->machine->frame.saved_regs_size, -+ STACK_BOUNDARY / BITS_PER_UNIT); -+ -+ cfun->machine->frame.frame_size -+ = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset -+ + crtl->outgoing_args_size, -+ STACK_BOUNDARY / BITS_PER_UNIT); -+ - cfun->machine->frame.laid_out = true; - } - --/* Make the last instruction frame-related and note that it performs -- the operation described by FRAME_PATTERN. */ -+static bool -+aarch64_register_saved_on_entry (int regno) -+{ -+ return cfun->machine->frame.reg_offset[regno] >= 0; -+} - -+static unsigned -+aarch64_next_callee_save (unsigned regno, unsigned limit) -+{ -+ while (regno <= limit && !aarch64_register_saved_on_entry (regno)) -+ regno ++; -+ return regno; -+} -+ - static void --aarch64_set_frame_expr (rtx frame_pattern) -+aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno, -+ HOST_WIDE_INT adjustment) -+ { -+ rtx base_rtx = stack_pointer_rtx; -+ rtx insn, reg, mem; -+ -+ reg = gen_rtx_REG (mode, regno); -+ mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, -+ plus_constant (Pmode, base_rtx, -adjustment)); -+ mem = gen_rtx_MEM (mode, mem); -+ -+ insn = emit_move_insn (mem, reg); -+ RTX_FRAME_RELATED_P (insn) = 1; -+} -+ -+static rtx -+aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2, -+ HOST_WIDE_INT adjustment) - { -+ switch (mode) -+ { -+ case DImode: -+ return gen_storewb_pairdi_di (base, base, reg, reg2, -+ GEN_INT (-adjustment), -+ GEN_INT (UNITS_PER_WORD - adjustment)); -+ case DFmode: -+ return gen_storewb_pairdf_di (base, base, reg, reg2, -+ GEN_INT (-adjustment), -+ GEN_INT (UNITS_PER_WORD - adjustment)); -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+static void -+aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1, -+ unsigned regno2, HOST_WIDE_INT adjustment) -+{ - rtx insn; -+ rtx reg1 = gen_rtx_REG (mode, regno1); -+ rtx reg2 = gen_rtx_REG (mode, regno2); - -- insn = get_last_insn (); -+ insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1, -+ reg2, adjustment)); -+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; -+ -+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; - RTX_FRAME_RELATED_P (insn) = 1; -- RTX_FRAME_RELATED_P (frame_pattern) = 1; -- REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, -- frame_pattern, -- REG_NOTES (insn)); - } - --static bool --aarch64_register_saved_on_entry (int regno) -+static rtx -+aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2, -+ HOST_WIDE_INT adjustment) - { -- return cfun->machine->frame.reg_offset[regno] != -1; -+ switch (mode) -+ { -+ case DImode: -+ return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), -+ GEN_INT (UNITS_PER_WORD)); -+ case DFmode: -+ return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), -+ GEN_INT (UNITS_PER_WORD)); -+ default: -+ gcc_unreachable (); -+ } - } - -+static rtx -+aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2, -+ rtx reg2) -+{ -+ switch (mode) -+ { -+ case DImode: -+ return gen_store_pairdi (mem1, reg1, mem2, reg2); - --static void --aarch64_save_or_restore_fprs (int start_offset, int increment, -- bool restore, rtx base_rtx) -+ case DFmode: -+ return gen_store_pairdf (mem1, reg1, mem2, reg2); - -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+static rtx -+aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2, -+ rtx mem2) - { -+ switch (mode) -+ { -+ case DImode: -+ return gen_load_pairdi (reg1, mem1, reg2, mem2); -+ -+ case DFmode: -+ return gen_load_pairdf (reg1, mem1, reg2, mem2); -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+ -+static void -+aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset, -+ unsigned start, unsigned limit, bool skip_wb) -+{ -+ rtx insn; -+ rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed -+ ? gen_frame_mem : gen_rtx_MEM); - unsigned regno; - unsigned regno2; -- rtx insn; -- rtx (*gen_mem_ref)(enum machine_mode, rtx) -- = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; - -- -- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -+ for (regno = aarch64_next_callee_save (start, limit); -+ regno <= limit; -+ regno = aarch64_next_callee_save (regno + 1, limit)) - { -- if (aarch64_register_saved_on_entry (regno)) -- { -- rtx mem; -- mem = gen_mem_ref (DFmode, -- plus_constant (Pmode, -- base_rtx, -- start_offset)); -+ rtx reg, mem; -+ HOST_WIDE_INT offset; - -- for (regno2 = regno + 1; -- regno2 <= V31_REGNUM -- && !aarch64_register_saved_on_entry (regno2); -- regno2++) -- { -- /* Empty loop. */ -- } -- if (regno2 <= V31_REGNUM && -- aarch64_register_saved_on_entry (regno2)) -- { -- rtx mem2; -- /* Next highest register to be saved. */ -- mem2 = gen_mem_ref (DFmode, -- plus_constant -- (Pmode, -- base_rtx, -- start_offset + increment)); -- if (restore == false) -- { -- insn = emit_insn -- ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), -- mem2, gen_rtx_REG (DFmode, regno2))); -+ if (skip_wb -+ && (regno == cfun->machine->frame.wb_candidate1 -+ || regno == cfun->machine->frame.wb_candidate2)) -+ continue; - -- } -- else -- { -- insn = emit_insn -- ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, -- gen_rtx_REG (DFmode, regno2), mem2)); -+ reg = gen_rtx_REG (mode, regno); -+ offset = start_offset + cfun->machine->frame.reg_offset[regno]; -+ mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, -+ offset)); - -- add_reg_note (insn, REG_CFA_RESTORE, -- gen_rtx_REG (DFmode, regno)); -- add_reg_note (insn, REG_CFA_RESTORE, -- gen_rtx_REG (DFmode, regno2)); -- } -+ regno2 = aarch64_next_callee_save (regno + 1, limit); - -- /* The first part of a frame-related parallel insn -- is always assumed to be relevant to the frame -- calculations; subsequent parts, are only -- frame-related if explicitly marked. */ -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -- regno = regno2; -- start_offset += increment * 2; -- } -- else -- { -- if (restore == false) -- insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); -- else -- { -- insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); -- add_reg_note (insn, REG_CFA_RESTORE, -- gen_rtx_REG (DImode, regno)); -- } -- start_offset += increment; -- } -- RTX_FRAME_RELATED_P (insn) = 1; -+ if (regno2 <= limit -+ && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) -+ == cfun->machine->frame.reg_offset[regno2])) -+ -+ { -+ rtx reg2 = gen_rtx_REG (mode, regno2); -+ rtx mem2; -+ -+ offset = start_offset + cfun->machine->frame.reg_offset[regno2]; -+ mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, -+ offset)); -+ insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, -+ reg2)); -+ -+ /* The first part of a frame-related parallel insn is -+ always assumed to be relevant to the frame -+ calculations; subsequent parts, are only -+ frame-related if explicitly marked. */ -+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -+ regno = regno2; - } -+ else -+ insn = emit_move_insn (mem, reg); -+ -+ RTX_FRAME_RELATED_P (insn) = 1; - } -- - } - -- --/* offset from the stack pointer of where the saves and -- restore's have to happen. */ - static void --aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, -- bool restore) -+aarch64_restore_callee_saves (enum machine_mode mode, -+ HOST_WIDE_INT start_offset, unsigned start, -+ unsigned limit, bool skip_wb, rtx *cfi_ops) - { -- rtx insn; - rtx base_rtx = stack_pointer_rtx; -- HOST_WIDE_INT start_offset = offset; -- HOST_WIDE_INT increment = UNITS_PER_WORD; -- rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; -- unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; -+ rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed -+ ? gen_frame_mem : gen_rtx_MEM); - unsigned regno; - unsigned regno2; -+ HOST_WIDE_INT offset; - -- for (regno = R0_REGNUM; regno <= limit; regno++) -+ for (regno = aarch64_next_callee_save (start, limit); -+ regno <= limit; -+ regno = aarch64_next_callee_save (regno + 1, limit)) - { -- if (aarch64_register_saved_on_entry (regno)) -- { -- rtx mem; -- mem = gen_mem_ref (Pmode, -- plus_constant (Pmode, -- base_rtx, -- start_offset)); -+ rtx reg, mem; - -- for (regno2 = regno + 1; -- regno2 <= limit -- && !aarch64_register_saved_on_entry (regno2); -- regno2++) -- { -- /* Empty loop. */ -- } -- if (regno2 <= limit && -- aarch64_register_saved_on_entry (regno2)) -- { -- rtx mem2; -- /* Next highest register to be saved. */ -- mem2 = gen_mem_ref (Pmode, -- plus_constant -- (Pmode, -- base_rtx, -- start_offset + increment)); -- if (restore == false) -- { -- insn = emit_insn -- ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), -- mem2, gen_rtx_REG (DImode, regno2))); -+ if (skip_wb -+ && (regno == cfun->machine->frame.wb_candidate1 -+ || regno == cfun->machine->frame.wb_candidate2)) -+ continue; - -- } -- else -- { -- insn = emit_insn -- ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, -- gen_rtx_REG (DImode, regno2), mem2)); -+ reg = gen_rtx_REG (mode, regno); -+ offset = start_offset + cfun->machine->frame.reg_offset[regno]; -+ mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); - -- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); -- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); -- } -+ regno2 = aarch64_next_callee_save (regno + 1, limit); - -- /* The first part of a frame-related parallel insn -- is always assumed to be relevant to the frame -- calculations; subsequent parts, are only -- frame-related if explicitly marked. */ -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, -- 1)) = 1; -- regno = regno2; -- start_offset += increment * 2; -- } -- else -- { -- if (restore == false) -- insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); -- else -- { -- insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); -- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); -- } -- start_offset += increment; -- } -- RTX_FRAME_RELATED_P (insn) = 1; -+ if (regno2 <= limit -+ && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) -+ == cfun->machine->frame.reg_offset[regno2])) -+ { -+ rtx reg2 = gen_rtx_REG (mode, regno2); -+ rtx mem2; -+ -+ offset = start_offset + cfun->machine->frame.reg_offset[regno2]; -+ mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); -+ emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); -+ -+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); -+ regno = regno2; - } -+ else -+ emit_move_insn (reg, mem); -+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); - } -- -- aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); -- - } - - /* AArch64 stack frames generated by this compiler look like: -@@ -1986,37 +2243,35 @@ - | | - | incoming stack arguments | - | | -- +-------------------------------+ <-- arg_pointer_rtx -- | | -+ +-------------------------------+ -+ | | <-- incoming stack pointer (aligned) - | callee-allocated save area | - | for register varargs | - | | -- +-------------------------------+ <-- frame_pointer_rtx -+ +-------------------------------+ -+ | local variables | <-- frame_pointer_rtx - | | -- | local variables | -- | | - +-------------------------------+ - | padding0 | \ - +-------------------------------+ | -- | | | -- | | | - | callee-saved registers | | frame.saved_regs_size -- | | | - +-------------------------------+ | - | LR' | | - +-------------------------------+ | -- | FP' | / -- P +-------------------------------+ <-- hard_frame_pointer_rtx -+ | FP' | / <- hard_frame_pointer_rtx (aligned) -+ +-------------------------------+ - | dynamic allocation | - +-------------------------------+ -- | | -- | outgoing stack arguments | -- | | -- +-------------------------------+ <-- stack_pointer_rtx -+ | padding | -+ +-------------------------------+ -+ | outgoing stack arguments | <-- arg_pointer -+ | | -+ +-------------------------------+ -+ | | <-- stack_pointer_rtx (aligned) - -- Dynamic stack allocations such as alloca insert data at point P. -- They decrease stack_pointer_rtx but leave frame_pointer_rtx and -- hard_frame_pointer_rtx unchanged. */ -+ Dynamic stack allocations via alloca() decrease stack_pointer_rtx -+ but leave frame_pointer_rtx and hard_frame_pointer_rtx -+ unchanged. */ - - /* Generate the prologue instructions for entry into a function. - Establish the stack frame by decreasing the stack pointer with a -@@ -2034,27 +2289,20 @@ - - sub sp, sp, <final_adjustment_if_any> - */ -- HOST_WIDE_INT original_frame_size; /* local variables + vararg save */ - HOST_WIDE_INT frame_size, offset; -- HOST_WIDE_INT fp_offset; /* FP offset from SP */ -+ HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */ -+ HOST_WIDE_INT hard_fp_offset; - rtx insn; - - aarch64_layout_frame (); -- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; -- gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg) -- && (cfun->stdarg || !cfun->machine->saved_varargs_size)); -- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size -- + crtl->outgoing_args_size); -- offset = frame_size = AARCH64_ROUND_UP (frame_size, -- STACK_BOUNDARY / BITS_PER_UNIT); - -+ offset = frame_size = cfun->machine->frame.frame_size; -+ hard_fp_offset = cfun->machine->frame.hard_fp_offset; -+ fp_offset = frame_size - hard_fp_offset; -+ - if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; - -- fp_offset = (offset -- - original_frame_size -- - cfun->machine->frame.saved_regs_size); -- - /* Store pairs and load pairs have a range only -512 to 504. */ - if (offset >= 512) - { -@@ -2064,7 +2312,7 @@ - register area. This will allow the pre-index write-back - store pair instructions to be used for setting up the stack frame - efficiently. */ -- offset = original_frame_size + cfun->machine->frame.saved_regs_size; -+ offset = hard_fp_offset; - if (offset >= 512) - offset = cfun->machine->frame.saved_regs_size; - -@@ -2075,29 +2323,29 @@ - { - rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); - emit_move_insn (op0, GEN_INT (-frame_size)); -- emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); -- aarch64_set_frame_expr (gen_rtx_SET -- (Pmode, stack_pointer_rtx, -- plus_constant (Pmode, -- stack_pointer_rtx, -- -frame_size))); -+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); -+ -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, -+ gen_rtx_SET (VOIDmode, stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -frame_size))); -+ RTX_FRAME_RELATED_P (insn) = 1; - } - else if (frame_size > 0) - { -- if ((frame_size & 0xfff) != frame_size) -+ int hi_ofs = frame_size & 0xfff000; -+ int lo_ofs = frame_size & 0x000fff; -+ -+ if (hi_ofs) - { - insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT (-(frame_size -- & ~(HOST_WIDE_INT)0xfff)))); -+ (stack_pointer_rtx, GEN_INT (-hi_ofs))); - RTX_FRAME_RELATED_P (insn) = 1; - } -- if ((frame_size & 0xfff) != 0) -+ if (lo_ofs) - { - insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT (-(frame_size -- & (HOST_WIDE_INT)0xfff)))); -+ (stack_pointer_rtx, GEN_INT (-lo_ofs))); - RTX_FRAME_RELATED_P (insn) = 1; - } - } -@@ -2107,12 +2355,11 @@ - - if (offset > 0) - { -- /* Save the frame pointer and lr if the frame pointer is needed -- first. Make the frame pointer point to the location of the -- old frame pointer on the stack. */ -+ bool skip_wb = false; -+ - if (frame_pointer_needed) - { -- rtx mem_fp, mem_lr; -+ skip_wb = true; - - if (fp_offset) - { -@@ -2119,67 +2366,52 @@ - insn = emit_insn (gen_add2_insn (stack_pointer_rtx, - GEN_INT (-offset))); - RTX_FRAME_RELATED_P (insn) = 1; -- aarch64_set_frame_expr (gen_rtx_SET -- (Pmode, stack_pointer_rtx, -- gen_rtx_MINUS (Pmode, -- stack_pointer_rtx, -- GEN_INT (offset)))); -- mem_fp = gen_frame_mem (DImode, -- plus_constant (Pmode, -- stack_pointer_rtx, -- fp_offset)); -- mem_lr = gen_frame_mem (DImode, -- plus_constant (Pmode, -- stack_pointer_rtx, -- fp_offset -- + UNITS_PER_WORD)); -- insn = emit_insn (gen_store_pairdi (mem_fp, -- hard_frame_pointer_rtx, -- mem_lr, -- gen_rtx_REG (DImode, -- LR_REGNUM))); -+ -+ aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM, -+ R30_REGNUM, false); - } - else -- { -- insn = emit_insn (gen_storewb_pairdi_di -- (stack_pointer_rtx, stack_pointer_rtx, -- hard_frame_pointer_rtx, -- gen_rtx_REG (DImode, LR_REGNUM), -- GEN_INT (-offset), -- GEN_INT (GET_MODE_SIZE (DImode) - offset))); -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; -- } -+ aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset); - -- /* The first part of a frame-related parallel insn is always -- assumed to be relevant to the frame calculations; -- subsequent parts, are only frame-related if explicitly -- marked. */ -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -- RTX_FRAME_RELATED_P (insn) = 1; -- - /* Set up frame pointer to point to the location of the - previous frame pointer on the stack. */ - insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, - stack_pointer_rtx, - GEN_INT (fp_offset))); -- aarch64_set_frame_expr (gen_rtx_SET -- (Pmode, hard_frame_pointer_rtx, -- plus_constant (Pmode, -- stack_pointer_rtx, -- fp_offset))); - RTX_FRAME_RELATED_P (insn) = 1; -- insn = emit_insn (gen_stack_tie (stack_pointer_rtx, -- hard_frame_pointer_rtx)); -+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - else - { -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -- GEN_INT (-offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -+ unsigned reg1 = cfun->machine->frame.wb_candidate1; -+ unsigned reg2 = cfun->machine->frame.wb_candidate2; -+ -+ if (fp_offset -+ || reg1 == FIRST_PSEUDO_REGISTER -+ || (reg2 == FIRST_PSEUDO_REGISTER -+ && offset >= 256)) -+ { -+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -+ GEN_INT (-offset))); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ else -+ { -+ enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; -+ -+ skip_wb = true; -+ -+ if (reg2 == FIRST_PSEUDO_REGISTER) -+ aarch64_pushwb_single_reg (mode1, reg1, offset); -+ else -+ aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset); -+ } - } - -- aarch64_save_or_restore_callee_save_registers -- (fp_offset + cfun->machine->frame.hardfp_offset, 0); -+ aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, -+ skip_wb); -+ aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, -+ skip_wb); - } - - /* when offset >= 512, -@@ -2200,28 +2432,21 @@ - void - aarch64_expand_epilogue (bool for_sibcall) - { -- HOST_WIDE_INT original_frame_size, frame_size, offset; -+ HOST_WIDE_INT frame_size, offset; - HOST_WIDE_INT fp_offset; -+ HOST_WIDE_INT hard_fp_offset; - rtx insn; -- rtx cfa_reg; - - aarch64_layout_frame (); -- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; -- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size -- + crtl->outgoing_args_size); -- offset = frame_size = AARCH64_ROUND_UP (frame_size, -- STACK_BOUNDARY / BITS_PER_UNIT); - -- fp_offset = (offset -- - original_frame_size -- - cfun->machine->frame.saved_regs_size); -+ offset = frame_size = cfun->machine->frame.frame_size; -+ hard_fp_offset = cfun->machine->frame.hard_fp_offset; -+ fp_offset = frame_size - hard_fp_offset; - -- cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx; -- - /* Store pairs and load pairs have a range only -512 to 504. */ - if (offset >= 512) - { -- offset = original_frame_size + cfun->machine->frame.saved_regs_size; -+ offset = hard_fp_offset; - if (offset >= 512) - offset = cfun->machine->frame.saved_regs_size; - -@@ -2247,72 +2472,51 @@ - { - insn = emit_insn (gen_add3_insn (stack_pointer_rtx, - hard_frame_pointer_rtx, -- GEN_INT (- fp_offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -- /* As SP is set to (FP - fp_offset), according to the rules in -- dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated -- from the value of SP from now on. */ -- cfa_reg = stack_pointer_rtx; -+ GEN_INT (0))); -+ offset = offset - fp_offset; - } - -- aarch64_save_or_restore_callee_save_registers -- (fp_offset + cfun->machine->frame.hardfp_offset, 1); -- -- /* Restore the frame pointer and lr if the frame pointer is needed. */ - if (offset > 0) - { -+ unsigned reg1 = cfun->machine->frame.wb_candidate1; -+ unsigned reg2 = cfun->machine->frame.wb_candidate2; -+ bool skip_wb = true; -+ rtx cfi_ops = NULL; -+ - if (frame_pointer_needed) -+ fp_offset = 0; -+ else if (fp_offset -+ || reg1 == FIRST_PSEUDO_REGISTER -+ || (reg2 == FIRST_PSEUDO_REGISTER -+ && offset >= 256)) -+ skip_wb = false; -+ -+ aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, -+ skip_wb, &cfi_ops); -+ aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, -+ skip_wb, &cfi_ops); -+ -+ if (skip_wb) - { -- rtx mem_fp, mem_lr; -+ enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; -+ rtx rreg1 = gen_rtx_REG (mode1, reg1); - -- if (fp_offset) -+ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops); -+ if (reg2 == FIRST_PSEUDO_REGISTER) - { -- mem_fp = gen_frame_mem (DImode, -- plus_constant (Pmode, -- stack_pointer_rtx, -- fp_offset)); -- mem_lr = gen_frame_mem (DImode, -- plus_constant (Pmode, -- stack_pointer_rtx, -- fp_offset -- + UNITS_PER_WORD)); -- insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, -- mem_fp, -- gen_rtx_REG (DImode, -- LR_REGNUM), -- mem_lr)); -+ rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset); -+ mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); -+ mem = gen_rtx_MEM (mode1, mem); -+ insn = emit_move_insn (rreg1, mem); - } - else - { -- insn = emit_insn (gen_loadwb_pairdi_di -- (stack_pointer_rtx, -- stack_pointer_rtx, -- hard_frame_pointer_rtx, -- gen_rtx_REG (DImode, LR_REGNUM), -- GEN_INT (offset), -- GEN_INT (GET_MODE_SIZE (DImode) + offset))); -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; -- add_reg_note (insn, REG_CFA_ADJUST_CFA, -- (gen_rtx_SET (Pmode, stack_pointer_rtx, -- plus_constant (Pmode, cfa_reg, -- offset)))); -- } -+ rtx rreg2 = gen_rtx_REG (mode1, reg2); - -- /* The first part of a frame-related parallel insn -- is always assumed to be relevant to the frame -- calculations; subsequent parts, are only -- frame-related if explicitly marked. */ -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); -- add_reg_note (insn, REG_CFA_RESTORE, -- gen_rtx_REG (DImode, LR_REGNUM)); -- -- if (fp_offset) -- { -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -- GEN_INT (offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -+ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops); -+ insn = emit_insn (aarch64_gen_loadwb_pair -+ (mode1, stack_pointer_rtx, rreg1, -+ rreg2, offset)); - } - } - else -@@ -2319,79 +2523,57 @@ - { - insn = emit_insn (gen_add2_insn (stack_pointer_rtx, - GEN_INT (offset))); -- RTX_FRAME_RELATED_P (insn) = 1; - } -- } - -- /* Stack adjustment for exception handler. */ -- if (crtl->calls_eh_return) -- { -- /* We need to unwind the stack by the offset computed by -- EH_RETURN_STACKADJ_RTX. However, at this point the CFA is -- based on SP. Ideally we would update the SP and define the -- CFA along the lines of: -- -- SP = SP + EH_RETURN_STACKADJ_RTX -- (regnote CFA = SP - EH_RETURN_STACKADJ_RTX) -- -- However the dwarf emitter only understands a constant -- register offset. -- -- The solution chosen here is to use the otherwise unused IP0 -- as a temporary register to hold the current SP value. The -- CFA is described using IP0 then SP is modified. */ -- -- rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM); -- -- insn = emit_move_insn (ip0, stack_pointer_rtx); -- add_reg_note (insn, REG_CFA_DEF_CFA, ip0); -+ /* Reset the CFA to be SP + FRAME_SIZE. */ -+ rtx new_cfa = stack_pointer_rtx; -+ if (frame_size > 0) -+ new_cfa = plus_constant (Pmode, new_cfa, frame_size); -+ cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops); -+ REG_NOTES (insn) = cfi_ops; - RTX_FRAME_RELATED_P (insn) = 1; -- -- emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); -- -- /* Ensure the assignment to IP0 does not get optimized away. */ -- emit_use (ip0); - } - -- if (frame_size > -1) -+ if (frame_size > 0) - { - if (frame_size >= 0x1000000) - { - rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); - emit_move_insn (op0, GEN_INT (frame_size)); -- emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); -- aarch64_set_frame_expr (gen_rtx_SET -- (Pmode, stack_pointer_rtx, -- plus_constant (Pmode, -- stack_pointer_rtx, -- frame_size))); -+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); - } -- else if (frame_size > 0) -+ else - { -- if ((frame_size & 0xfff) != 0) -+ int hi_ofs = frame_size & 0xfff000; -+ int lo_ofs = frame_size & 0x000fff; -+ -+ if (hi_ofs && lo_ofs) - { - insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT ((frame_size -- & (HOST_WIDE_INT) 0xfff)))); -+ (stack_pointer_rtx, GEN_INT (hi_ofs))); - RTX_FRAME_RELATED_P (insn) = 1; -+ frame_size = lo_ofs; - } -- if ((frame_size & 0xfff) != frame_size) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT ((frame_size -- & ~ (HOST_WIDE_INT) 0xfff)))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -+ insn = emit_insn (gen_add2_insn -+ (stack_pointer_rtx, GEN_INT (frame_size))); - } - -- aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, -- plus_constant (Pmode, -- stack_pointer_rtx, -- offset))); -+ /* Reset the CFA to be SP + 0. */ -+ add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); -+ RTX_FRAME_RELATED_P (insn) = 1; - } - -+ /* Stack adjustment for exception handler. */ -+ if (crtl->calls_eh_return) -+ { -+ /* We need to unwind the stack by the offset computed by -+ EH_RETURN_STACKADJ_RTX. We have already reset the CFA -+ to be SP; letting the CFA move during this adjustment -+ is just as correct as retaining the CFA from the body -+ of the function. Therefore, do nothing special. */ -+ emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); -+ } -+ - emit_use (gen_rtx_REG (DImode, LR_REGNUM)); - if (!for_sibcall) - emit_jump_insn (ret_rtx); -@@ -2403,17 +2585,13 @@ - rtx - aarch64_final_eh_return_addr (void) - { -- HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset; -+ HOST_WIDE_INT fp_offset; -+ - aarch64_layout_frame (); -- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; -- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size -- + crtl->outgoing_args_size); -- offset = frame_size = AARCH64_ROUND_UP (frame_size, -- STACK_BOUNDARY / BITS_PER_UNIT); -- fp_offset = offset -- - original_frame_size -- - cfun->machine->frame.saved_regs_size; - -+ fp_offset = cfun->machine->frame.frame_size -+ - cfun->machine->frame.hard_fp_offset; -+ - if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) - return gen_rtx_REG (DImode, LR_REGNUM); - -@@ -2449,12 +2627,22 @@ - - 2 * UNITS_PER_WORD)); - } - --/* Output code to build up a constant in a register. */ --static void --aarch64_build_constant (int regnum, HOST_WIDE_INT val) -+/* Possibly output code to build up a constant in a register. For -+ the benefit of the costs infrastructure, returns the number of -+ instructions which would be emitted. GENERATE inhibits or -+ enables code generation. */ -+ -+static int -+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) - { -+ int insns = 0; -+ - if (aarch64_bitmask_imm (val, DImode)) -- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); -+ { -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); -+ insns = 1; -+ } - else - { - int i; -@@ -2485,15 +2673,19 @@ - the same. */ - if (ncount < zcount) - { -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); - tval = 0xffff; -+ insns++; - } - else - { -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val & 0xffff)); -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (val & 0xffff)); - tval = 0; -+ insns++; - } - - val >>= 16; -@@ -2501,11 +2693,17 @@ - for (i = 16; i < 64; i += 16) - { - if ((val & 0xffff) != tval) -- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), -- GEN_INT (i), GEN_INT (val & 0xffff))); -+ { -+ if (generate) -+ emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (i), -+ GEN_INT (val & 0xffff))); -+ insns++; -+ } - val >>= 16; - } - } -+ return insns; - } - - static void -@@ -2520,7 +2718,7 @@ - - if (mdelta >= 4096 * 4096) - { -- aarch64_build_constant (scratchreg, delta); -+ (void) aarch64_build_constant (scratchreg, delta, true); - emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); - } - else if (mdelta > 0) -@@ -2594,7 +2792,7 @@ - addr = plus_constant (Pmode, temp0, vcall_offset); - else - { -- aarch64_build_constant (IP1_REGNUM, vcall_offset); -+ (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); - addr = gen_rtx_PLUS (Pmode, temp0, temp1); - } - -@@ -3011,8 +3209,8 @@ - return false; - } - --static inline bool --offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) -+bool -+aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) - { - return (offset >= -64 * GET_MODE_SIZE (mode) - && offset < 64 * GET_MODE_SIZE (mode) -@@ -3046,11 +3244,11 @@ - enum rtx_code code = GET_CODE (x); - rtx op0, op1; - bool allow_reg_index_p = -- outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; -- -+ outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 -+ || aarch64_vector_mode_supported_p (mode)); - /* Don't support anything other than POST_INC or REG addressing for - AdvSIMD. */ -- if (aarch64_vector_mode_p (mode) -+ if (aarch64_vect_struct_mode_p (mode) - && (code != POST_INC && code != REG)) - return false; - -@@ -3066,6 +3264,21 @@ - case PLUS: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); -+ -+ if (! strict_p -+ && REG_P (op0) -+ && (op0 == virtual_stack_vars_rtx -+ || op0 == frame_pointer_rtx -+ || op0 == arg_pointer_rtx) -+ && CONST_INT_P (op1)) -+ { -+ info->type = ADDRESS_REG_IMM; -+ info->base = op0; -+ info->offset = op1; -+ -+ return true; -+ } -+ - if (GET_MODE_SIZE (mode) != 0 - && CONST_INT_P (op1) - && aarch64_base_register_rtx_p (op0, strict_p)) -@@ -3084,12 +3297,12 @@ - We conservatively require an offset representable in either mode. - */ - if (mode == TImode || mode == TFmode) -- return (offset_7bit_signed_scaled_p (mode, offset) -+ return (aarch64_offset_7bit_signed_scaled_p (mode, offset) - && offset_9bit_signed_unscaled_p (mode, offset)); - - if (outer_code == PARALLEL) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) -- && offset_7bit_signed_scaled_p (mode, offset)); -+ && aarch64_offset_7bit_signed_scaled_p (mode, offset)); - else - return (offset_9bit_signed_unscaled_p (mode, offset) - || offset_12bit_unsigned_scaled_p (mode, offset)); -@@ -3144,12 +3357,12 @@ - We conservatively require an offset representable in either mode. - */ - if (mode == TImode || mode == TFmode) -- return (offset_7bit_signed_scaled_p (mode, offset) -+ return (aarch64_offset_7bit_signed_scaled_p (mode, offset) - && offset_9bit_signed_unscaled_p (mode, offset)); - - if (outer_code == PARALLEL) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) -- && offset_7bit_signed_scaled_p (mode, offset)); -+ && aarch64_offset_7bit_signed_scaled_p (mode, offset)); - else - return offset_9bit_signed_unscaled_p (mode, offset); - } -@@ -3333,7 +3546,7 @@ - the comparison will have to be swapped when we emit the assembly - code. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) -- && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) -+ && (REG_P (y) || GET_CODE (y) == SUBREG) - && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT - || GET_CODE (x) == LSHIFTRT - || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) -@@ -3342,7 +3555,7 @@ - /* Similarly for a negated operand, but we can only do this for - equalities. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) -- && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) -+ && (REG_P (y) || GET_CODE (y) == SUBREG) - && (code == EQ || code == NE) - && GET_CODE (x) == NEG) - return CC_Zmode; -@@ -3359,7 +3572,7 @@ - return CCmode; - } - --static unsigned -+int - aarch64_get_condition_code (rtx x) - { - enum machine_mode mode = GET_MODE (XEXP (x, 0)); -@@ -3386,7 +3599,7 @@ - case UNLE: return AARCH64_LE; - case UNGT: return AARCH64_HI; - case UNGE: return AARCH64_PL; -- default: gcc_unreachable (); -+ default: return -1; - } - break; - -@@ -3403,7 +3616,7 @@ - case GTU: return AARCH64_HI; - case LEU: return AARCH64_LS; - case LTU: return AARCH64_CC; -- default: gcc_unreachable (); -+ default: return -1; - } - break; - -@@ -3422,7 +3635,7 @@ - case GTU: return AARCH64_CC; - case LEU: return AARCH64_CS; - case LTU: return AARCH64_HI; -- default: gcc_unreachable (); -+ default: return -1; - } - break; - -@@ -3433,7 +3646,7 @@ - case EQ: return AARCH64_EQ; - case GE: return AARCH64_PL; - case LT: return AARCH64_MI; -- default: gcc_unreachable (); -+ default: return -1; - } - break; - -@@ -3442,16 +3655,46 @@ - { - case NE: return AARCH64_NE; - case EQ: return AARCH64_EQ; -- default: gcc_unreachable (); -+ default: return -1; - } - break; - - default: -- gcc_unreachable (); -+ return -1; - break; - } - } - -+bool -+aarch64_const_vec_all_same_in_range_p (rtx x, -+ HOST_WIDE_INT minval, -+ HOST_WIDE_INT maxval) -+{ -+ HOST_WIDE_INT firstval; -+ int count, i; -+ -+ if (GET_CODE (x) != CONST_VECTOR -+ || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) -+ return false; -+ -+ firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); -+ if (firstval < minval || firstval > maxval) -+ return false; -+ -+ count = CONST_VECTOR_NUNITS (x); -+ for (i = 1; i < count; i++) -+ if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) -+ return false; -+ -+ return true; -+} -+ -+bool -+aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val) -+{ -+ return aarch64_const_vec_all_same_in_range_p (x, val, val); -+} -+ - static unsigned - bit_count (unsigned HOST_WIDE_INT value) - { -@@ -3502,7 +3745,7 @@ - { - int n; - -- if (GET_CODE (x) != CONST_INT -+ if (!CONST_INT_P (x) - || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) - { - output_operand_lossage ("invalid operand for '%%%c'", code); -@@ -3532,7 +3775,7 @@ - int n; - - /* Print N such that 2^N == X. */ -- if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0) -+ if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; -@@ -3544,7 +3787,7 @@ - - case 'P': - /* Print the number of non-zero bits in X (a const_int). */ -- if (GET_CODE (x) != CONST_INT) -+ if (!CONST_INT_P (x)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; -@@ -3555,7 +3798,7 @@ - - case 'H': - /* Print the higher numbered register of a pair (TImode) of regs. */ -- if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) -+ if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; -@@ -3565,39 +3808,48 @@ - break; - - case 'm': -- /* Print a condition (eq, ne, etc). */ -+ { -+ int cond_code; -+ /* Print a condition (eq, ne, etc). */ - -- /* CONST_TRUE_RTX means always -- that's the default. */ -- if (x == const_true_rtx) -- return; -- -- if (!COMPARISON_P (x)) -- { -- output_operand_lossage ("invalid operand for '%%%c'", code); -+ /* CONST_TRUE_RTX means always -- that's the default. */ -+ if (x == const_true_rtx) - return; -- } - -- fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f); -+ if (!COMPARISON_P (x)) -+ { -+ output_operand_lossage ("invalid operand for '%%%c'", code); -+ return; -+ } -+ -+ cond_code = aarch64_get_condition_code (x); -+ gcc_assert (cond_code >= 0); -+ fputs (aarch64_condition_codes[cond_code], f); -+ } - break; - - case 'M': -- /* Print the inverse of a condition (eq <-> ne, etc). */ -+ { -+ int cond_code; -+ /* Print the inverse of a condition (eq <-> ne, etc). */ - -- /* CONST_TRUE_RTX means never -- that's the default. */ -- if (x == const_true_rtx) -- { -- fputs ("nv", f); -- return; -- } -+ /* CONST_TRUE_RTX means never -- that's the default. */ -+ if (x == const_true_rtx) -+ { -+ fputs ("nv", f); -+ return; -+ } - -- if (!COMPARISON_P (x)) -- { -- output_operand_lossage ("invalid operand for '%%%c'", code); -- return; -- } -- -- fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE -- (aarch64_get_condition_code (x))], f); -+ if (!COMPARISON_P (x)) -+ { -+ output_operand_lossage ("invalid operand for '%%%c'", code); -+ return; -+ } -+ cond_code = aarch64_get_condition_code (x); -+ gcc_assert (cond_code >= 0); -+ fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE -+ (cond_code)], f); -+ } - break; - - case 'b': -@@ -3629,7 +3881,7 @@ - - case 'X': - /* Print bottom 16 bits of integer constant in hex. */ -- if (GET_CODE (x) != CONST_INT) -+ if (!CONST_INT_P (x)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; -@@ -3694,9 +3946,10 @@ - case CONST_VECTOR: - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) - { -- gcc_assert (aarch64_const_vec_all_same_int_p (x, -- HOST_WIDE_INT_MIN, -- HOST_WIDE_INT_MAX)); -+ gcc_assert ( -+ aarch64_const_vec_all_same_in_range_p (x, -+ HOST_WIDE_INT_MIN, -+ HOST_WIDE_INT_MAX)); - asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0))); - } - else if (aarch64_simd_imm_zero_p (x, GET_MODE (x))) -@@ -3839,34 +4092,34 @@ - if (addr.offset == const0_rtx) - asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); - else -- asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - - case ADDRESS_REG_REG: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)], - reg_names [REGNO (addr.offset)]); - else -- asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)], - reg_names [REGNO (addr.offset)], addr.shift); - return; - - case ADDRESS_REG_UXTW: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM); - else -- asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM, addr.shift); - return; - - case ADDRESS_REG_SXTW: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM); - else -- asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM, addr.shift); - return; - -@@ -3874,27 +4127,27 @@ - switch (GET_CODE (x)) - { - case PRE_INC: -- asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case POST_INC: -- asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case PRE_DEC: -- asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case POST_DEC: -- asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case PRE_MODIFY: -- asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - case POST_MODIFY: -- asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - default: -@@ -3903,7 +4156,7 @@ - break; - - case ADDRESS_LO_SUM: -- asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); -+ asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]); - output_addr_const (f, addr.offset); - asm_fprintf (f, "]"); - return; -@@ -3954,7 +4207,7 @@ - aarch64_regno_regclass (unsigned regno) - { - if (GP_REGNUM_P (regno)) -- return CORE_REGS; -+ return GENERAL_REGS; - - if (regno == SP_REGNUM) - return STACK_REG; -@@ -3969,6 +4222,47 @@ - return NO_REGS; - } - -+static rtx -+aarch64_legitimize_address (rtx x, rtx /* orig_x */, enum machine_mode mode) -+{ -+ /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask), -+ where mask is selected by alignment and size of the offset. -+ We try to pick as large a range for the offset as possible to -+ maximize the chance of a CSE. However, for aligned addresses -+ we limit the range to 4k so that structures with different sized -+ elements are likely to use the same base. */ -+ -+ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) -+ { -+ HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); -+ HOST_WIDE_INT base_offset; -+ -+ /* Does it look like we'll need a load/store-pair operation? */ -+ if (GET_MODE_SIZE (mode) > 16 -+ || mode == TImode) -+ base_offset = ((offset + 64 * GET_MODE_SIZE (mode)) -+ & ~((128 * GET_MODE_SIZE (mode)) - 1)); -+ /* For offsets aren't a multiple of the access size, the limit is -+ -256...255. */ -+ else if (offset & (GET_MODE_SIZE (mode) - 1)) -+ base_offset = (offset + 0x100) & ~0x1ff; -+ else -+ base_offset = offset & ~0xfff; -+ -+ if (base_offset == 0) -+ return x; -+ -+ offset -= base_offset; -+ rtx base_reg = gen_reg_rtx (Pmode); -+ rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset), -+ NULL_RTX); -+ emit_move_insn (base_reg, val); -+ x = plus_constant (Pmode, base_reg, offset); -+ } -+ -+ return x; -+} -+ - /* Try a machine-dependent way of reloading an illegitimate address - operand. If we find one, push the reload and return the new rtx. */ - -@@ -3980,8 +4274,8 @@ - { - rtx x = *x_p; - -- /* Do not allow mem (plus (reg, const)) if vector mode. */ -- if (aarch64_vector_mode_p (mode) -+ /* Do not allow mem (plus (reg, const)) if vector struct mode. */ -+ if (aarch64_vect_struct_mode_p (mode) - && GET_CODE (x) == PLUS - && REG_P (XEXP (x, 0)) - && CONST_INT_P (XEXP (x, 1))) -@@ -4105,12 +4399,12 @@ - /* A TFmode or TImode memory access should be handled via an FP_REGS - because AArch64 has richer addressing modes for LDR/STR instructions - than LDP/STP instructions. */ -- if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS -+ if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS - && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) - return FP_REGS; - - if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) -- return CORE_REGS; -+ return GENERAL_REGS; - - return NO_REGS; - } -@@ -4142,43 +4436,28 @@ - HOST_WIDE_INT - aarch64_initial_elimination_offset (unsigned from, unsigned to) - { -- HOST_WIDE_INT frame_size; -- HOST_WIDE_INT offset; -- - aarch64_layout_frame (); -- frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size -- + crtl->outgoing_args_size -- + cfun->machine->saved_varargs_size); - -- frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); -- offset = frame_size; -+ if (to == HARD_FRAME_POINTER_REGNUM) -+ { -+ if (from == ARG_POINTER_REGNUM) -+ return cfun->machine->frame.frame_size - crtl->outgoing_args_size; - -- if (to == HARD_FRAME_POINTER_REGNUM) -- { -- if (from == ARG_POINTER_REGNUM) -- return offset - crtl->outgoing_args_size; -+ if (from == FRAME_POINTER_REGNUM) -+ return (cfun->machine->frame.hard_fp_offset -+ - cfun->machine->frame.saved_varargs_size); -+ } - -- if (from == FRAME_POINTER_REGNUM) -- return cfun->machine->frame.saved_regs_size + get_frame_size (); -- } -+ if (to == STACK_POINTER_REGNUM) -+ { -+ if (from == FRAME_POINTER_REGNUM) -+ return (cfun->machine->frame.frame_size -+ - cfun->machine->frame.saved_varargs_size); -+ } - -- if (to == STACK_POINTER_REGNUM) -- { -- if (from == FRAME_POINTER_REGNUM) -- { -- HOST_WIDE_INT elim = crtl->outgoing_args_size -- + cfun->machine->frame.saved_regs_size -- + get_frame_size () -- - cfun->machine->frame.fp_lr_offset; -- elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); -- return elim; -- } -- } -- -- return offset; -+ return cfun->machine->frame.frame_size; - } - -- - /* Implement RETURN_ADDR_RTX. We do not support moving back to a - previous frame. */ - -@@ -4242,7 +4521,7 @@ - { - switch (regclass) - { -- case CORE_REGS: -+ case CALLER_SAVE_REGS: - case POINTER_REGS: - case GENERAL_REGS: - case ALL_REGS: -@@ -4443,9 +4722,13 @@ - { - rtx op = x; - -+ /* We accept both ROTATERT and ROTATE: since the RHS must be a constant -+ we can convert both to ROR during final output. */ - if ((GET_CODE (op) == ASHIFT - || GET_CODE (op) == ASHIFTRT -- || GET_CODE (op) == LSHIFTRT) -+ || GET_CODE (op) == LSHIFTRT -+ || GET_CODE (op) == ROTATERT -+ || GET_CODE (op) == ROTATE) - && CONST_INT_P (XEXP (op, 1))) - return XEXP (op, 0); - -@@ -4457,12 +4740,12 @@ - return x; - } - --/* Helper function for rtx cost calculation. Strip a shift or extend -+/* Helper function for rtx cost calculation. Strip an extend - expression from X. Returns the inner operand if successful, or the - original expression on failure. We deal with a number of possible - canonicalization variations here. */ - static rtx --aarch64_strip_shift_or_extend (rtx x) -+aarch64_strip_extend (rtx x) - { - rtx op = x; - -@@ -4469,6 +4752,7 @@ - /* Zero and sign extraction of a widened value. */ - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) - && XEXP (op, 2) == const0_rtx -+ && GET_CODE (XEXP (op, 0)) == MULT - && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), - XEXP (op, 1))) - return XEXP (XEXP (op, 0), 0); -@@ -4497,9 +4781,335 @@ - if (op != x) - return op; - -- return aarch64_strip_shift (x); -+ return x; - } - -+/* Helper function for rtx cost calculation. Calculate the cost of -+ a MULT, which may be part of a multiply-accumulate rtx. Return -+ the calculated cost of the expression, recursing manually in to -+ operands where needed. */ -+ -+static int -+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) -+{ -+ rtx op0, op1; -+ const struct cpu_cost_table *extra_cost -+ = aarch64_tune_params->insn_extra_cost; -+ int cost = 0; -+ bool maybe_fma = (outer == PLUS || outer == MINUS); -+ enum machine_mode mode = GET_MODE (x); -+ -+ gcc_checking_assert (code == MULT); -+ -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); -+ -+ if (VECTOR_MODE_P (mode)) -+ mode = GET_MODE_INNER (mode); -+ -+ /* Integer multiply/fma. */ -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ { -+ /* The multiply will be canonicalized as a shift, cost it as such. */ -+ if (CONST_INT_P (op1) -+ && exact_log2 (INTVAL (op1)) > 0) -+ { -+ if (speed) -+ { -+ if (maybe_fma) -+ /* ADD (shifted register). */ -+ cost += extra_cost->alu.arith_shift; -+ else -+ /* LSL (immediate). */ -+ cost += extra_cost->alu.shift; -+ } -+ -+ cost += rtx_cost (op0, GET_CODE (op0), 0, speed); -+ -+ return cost; -+ } -+ -+ /* Integer multiplies or FMAs have zero/sign extending variants. */ -+ if ((GET_CODE (op0) == ZERO_EXTEND -+ && GET_CODE (op1) == ZERO_EXTEND) -+ || (GET_CODE (op0) == SIGN_EXTEND -+ && GET_CODE (op1) == SIGN_EXTEND)) -+ { -+ cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed) -+ + rtx_cost (XEXP (op1, 0), MULT, 1, speed); -+ -+ if (speed) -+ { -+ if (maybe_fma) -+ /* MADD/SMADDL/UMADDL. */ -+ cost += extra_cost->mult[0].extend_add; -+ else -+ /* MUL/SMULL/UMULL. */ -+ cost += extra_cost->mult[0].extend; -+ } -+ -+ return cost; -+ } -+ -+ /* This is either an integer multiply or an FMA. In both cases -+ we want to recurse and cost the operands. */ -+ cost += rtx_cost (op0, MULT, 0, speed) -+ + rtx_cost (op1, MULT, 1, speed); -+ -+ if (speed) -+ { -+ if (maybe_fma) -+ /* MADD. */ -+ cost += extra_cost->mult[mode == DImode].add; -+ else -+ /* MUL. */ -+ cost += extra_cost->mult[mode == DImode].simple; -+ } -+ -+ return cost; -+ } -+ else -+ { -+ if (speed) -+ { -+ /* Floating-point FMA/FMUL can also support negations of the -+ operands. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ if (GET_CODE (op1) == NEG) -+ op1 = XEXP (op1, 0); -+ -+ if (maybe_fma) -+ /* FMADD/FNMADD/FNMSUB/FMSUB. */ -+ cost += extra_cost->fp[mode == DFmode].fma; -+ else -+ /* FMUL/FNMUL. */ -+ cost += extra_cost->fp[mode == DFmode].mult; -+ } -+ -+ cost += rtx_cost (op0, MULT, 0, speed) -+ + rtx_cost (op1, MULT, 1, speed); -+ return cost; -+ } -+} -+ -+static int -+aarch64_address_cost (rtx x, -+ enum machine_mode mode, -+ addr_space_t as ATTRIBUTE_UNUSED, -+ bool speed) -+{ -+ enum rtx_code c = GET_CODE (x); -+ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; -+ struct aarch64_address_info info; -+ int cost = 0; -+ info.shift = 0; -+ -+ if (!aarch64_classify_address (&info, x, mode, c, false)) -+ { -+ if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) -+ { -+ /* This is a CONST or SYMBOL ref which will be split -+ in a different way depending on the code model in use. -+ Cost it through the generic infrastructure. */ -+ int cost_symbol_ref = rtx_cost (x, MEM, 1, speed); -+ /* Divide through by the cost of one instruction to -+ bring it to the same units as the address costs. */ -+ cost_symbol_ref /= COSTS_N_INSNS (1); -+ /* The cost is then the cost of preparing the address, -+ followed by an immediate (possibly 0) offset. */ -+ return cost_symbol_ref + addr_cost->imm_offset; -+ } -+ else -+ { -+ /* This is most likely a jump table from a case -+ statement. */ -+ return addr_cost->register_offset; -+ } -+ } -+ -+ switch (info.type) -+ { -+ case ADDRESS_LO_SUM: -+ case ADDRESS_SYMBOLIC: -+ case ADDRESS_REG_IMM: -+ cost += addr_cost->imm_offset; -+ break; -+ -+ case ADDRESS_REG_WB: -+ if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) -+ cost += addr_cost->pre_modify; -+ else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) -+ cost += addr_cost->post_modify; -+ else -+ gcc_unreachable (); -+ -+ break; -+ -+ case ADDRESS_REG_REG: -+ cost += addr_cost->register_offset; -+ break; -+ -+ case ADDRESS_REG_UXTW: -+ case ADDRESS_REG_SXTW: -+ cost += addr_cost->register_extend; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ -+ if (info.shift > 0) -+ { -+ /* For the sake of calculating the cost of the shifted register -+ component, we can treat same sized modes in the same way. */ -+ switch (GET_MODE_BITSIZE (mode)) -+ { -+ case 16: -+ cost += addr_cost->addr_scale_costs.hi; -+ break; -+ -+ case 32: -+ cost += addr_cost->addr_scale_costs.si; -+ break; -+ -+ case 64: -+ cost += addr_cost->addr_scale_costs.di; -+ break; -+ -+ /* We can't tell, or this is a 128-bit vector. */ -+ default: -+ cost += addr_cost->addr_scale_costs.ti; -+ break; -+ } -+ } -+ -+ return cost; -+} -+ -+/* Return true if the RTX X in mode MODE is a zero or sign extract -+ usable in an ADD or SUB (extended register) instruction. */ -+static bool -+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode) -+{ -+ /* Catch add with a sign extract. -+ This is add_<optab><mode>_multp2. */ -+ if (GET_CODE (x) == SIGN_EXTRACT -+ || GET_CODE (x) == ZERO_EXTRACT) -+ { -+ rtx op0 = XEXP (x, 0); -+ rtx op1 = XEXP (x, 1); -+ rtx op2 = XEXP (x, 2); -+ -+ if (GET_CODE (op0) == MULT -+ && CONST_INT_P (op1) -+ && op2 == const0_rtx -+ && CONST_INT_P (XEXP (op0, 1)) -+ && aarch64_is_extend_from_extract (mode, -+ XEXP (op0, 1), -+ op1)) -+ { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static bool -+aarch64_frint_unspec_p (unsigned int u) -+{ -+ switch (u) -+ { -+ case UNSPEC_FRINTZ: -+ case UNSPEC_FRINTP: -+ case UNSPEC_FRINTM: -+ case UNSPEC_FRINTA: -+ case UNSPEC_FRINTN: -+ case UNSPEC_FRINTX: -+ case UNSPEC_FRINTI: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), -+ storing it in *COST. Result is true if the total cost of the operation -+ has now been calculated. */ -+static bool -+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) -+{ -+ rtx inner; -+ rtx comparator; -+ enum rtx_code cmpcode; -+ -+ if (COMPARISON_P (op0)) -+ { -+ inner = XEXP (op0, 0); -+ comparator = XEXP (op0, 1); -+ cmpcode = GET_CODE (op0); -+ } -+ else -+ { -+ inner = op0; -+ comparator = const0_rtx; -+ cmpcode = NE; -+ } -+ -+ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) -+ { -+ /* Conditional branch. */ -+ if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) -+ return true; -+ else -+ { -+ if (cmpcode == NE || cmpcode == EQ) -+ { -+ if (comparator == const0_rtx) -+ { -+ /* TBZ/TBNZ/CBZ/CBNZ. */ -+ if (GET_CODE (inner) == ZERO_EXTRACT) -+ /* TBZ/TBNZ. */ -+ *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT, -+ 0, speed); -+ else -+ /* CBZ/CBNZ. */ -+ *cost += rtx_cost (inner, cmpcode, 0, speed); -+ -+ return true; -+ } -+ } -+ else if (cmpcode == LT || cmpcode == GE) -+ { -+ /* TBZ/TBNZ. */ -+ if (comparator == const0_rtx) -+ return true; -+ } -+ } -+ } -+ else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) -+ { -+ /* It's a conditional operation based on the status flags, -+ so it must be some flavor of CSEL. */ -+ -+ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */ -+ if (GET_CODE (op1) == NEG -+ || GET_CODE (op1) == NOT -+ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx)) -+ op1 = XEXP (op1, 0); -+ -+ *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed); -+ *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed); -+ return true; -+ } -+ -+ /* We don't know what this is, cost all operands. */ -+ return false; -+} -+ - /* Calculate the cost of calculating X, storing it in *COST. Result - is true if the total cost of the operation has now been calculated. */ - static bool -@@ -4506,13 +5116,31 @@ - aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, - int param ATTRIBUTE_UNUSED, int *cost, bool speed) - { -- rtx op0, op1; -+ rtx op0, op1, op2; - const struct cpu_cost_table *extra_cost - = aarch64_tune_params->insn_extra_cost; -+ enum machine_mode mode = GET_MODE (x); - -+ /* By default, assume that everything has equivalent cost to the -+ cheapest instruction. Any additional costs are applied as a delta -+ above this default. */ -+ *cost = COSTS_N_INSNS (1); -+ -+ /* TODO: The cost infrastructure currently does not handle -+ vector operations. Assume that all vector operations -+ are equally expensive. */ -+ if (VECTOR_MODE_P (mode)) -+ { -+ if (speed) -+ *cost += extra_cost->vect.alu; -+ return true; -+ } -+ - switch (code) - { - case SET: -+ /* The cost depends entirely on the operands to SET. */ -+ *cost = 0; - op0 = SET_DEST (x); - op1 = SET_SRC (x); - -@@ -4520,52 +5148,194 @@ - { - case MEM: - if (speed) -- *cost += extra_cost->ldst.store; -+ { -+ rtx address = XEXP (op0, 0); -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ *cost += extra_cost->ldst.store; -+ else if (mode == SFmode) -+ *cost += extra_cost->ldst.storef; -+ else if (mode == DFmode) -+ *cost += extra_cost->ldst.stored; - -- if (op1 != const0_rtx) -- *cost += rtx_cost (op1, SET, 1, speed); -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } -+ -+ *cost += rtx_cost (op1, SET, 1, speed); - return true; - - case SUBREG: - if (! REG_P (SUBREG_REG (op0))) - *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); -+ - /* Fall through. */ - case REG: -- /* Cost is just the cost of the RHS of the set. */ -- *cost += rtx_cost (op1, SET, 1, true); -+ /* const0_rtx is in general free, but we will use an -+ instruction to set a register to 0. */ -+ if (REG_P (op1) || op1 == const0_rtx) -+ { -+ /* The cost is 1 per register copied. */ -+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) -+ / UNITS_PER_WORD; -+ *cost = COSTS_N_INSNS (n_minus_1 + 1); -+ } -+ else -+ /* Cost is just the cost of the RHS of the set. */ -+ *cost += rtx_cost (op1, SET, 1, speed); - return true; - -- case ZERO_EXTRACT: /* Bit-field insertion. */ -+ case ZERO_EXTRACT: - case SIGN_EXTRACT: -- /* Strip any redundant widening of the RHS to meet the width of -- the target. */ -+ /* Bit-field insertion. Strip any redundant widening of -+ the RHS to meet the width of the target. */ - if (GET_CODE (op1) == SUBREG) - op1 = SUBREG_REG (op1); - if ((GET_CODE (op1) == ZERO_EXTEND - || GET_CODE (op1) == SIGN_EXTEND) -- && GET_CODE (XEXP (op0, 1)) == CONST_INT -+ && CONST_INT_P (XEXP (op0, 1)) - && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) - >= INTVAL (XEXP (op0, 1)))) - op1 = XEXP (op1, 0); -- *cost += rtx_cost (op1, SET, 1, speed); -+ -+ if (CONST_INT_P (op1)) -+ { -+ /* MOV immediate is assumed to always be cheap. */ -+ *cost = COSTS_N_INSNS (1); -+ } -+ else -+ { -+ /* BFM. */ -+ if (speed) -+ *cost += extra_cost->alu.bfi; -+ *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed); -+ } -+ - return true; - - default: -- break; -+ /* We can't make sense of this, assume default cost. */ -+ *cost = COSTS_N_INSNS (1); -+ return false; - } - return false; - -+ case CONST_INT: -+ /* If an instruction can incorporate a constant within the -+ instruction, the instruction's expression avoids calling -+ rtx_cost() on the constant. If rtx_cost() is called on a -+ constant, then it is usually because the constant must be -+ moved into a register by one or more instructions. -+ -+ The exception is constant 0, which can be expressed -+ as XZR/WZR and is therefore free. The exception to this is -+ if we have (set (reg) (const0_rtx)) in which case we must cost -+ the move. However, we can catch that when we cost the SET, so -+ we don't need to consider that here. */ -+ if (x == const0_rtx) -+ *cost = 0; -+ else -+ { -+ /* To an approximation, building any other constant is -+ proportionally expensive to the number of instructions -+ required to build that constant. This is true whether we -+ are compiling for SPEED or otherwise. */ -+ *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate -+ (NULL_RTX, x, false, mode)); -+ } -+ return true; -+ -+ case CONST_DOUBLE: -+ if (speed) -+ { -+ /* mov[df,sf]_aarch64. */ -+ if (aarch64_float_const_representable_p (x)) -+ /* FMOV (scalar immediate). */ -+ *cost += extra_cost->fp[mode == DFmode].fpconst; -+ else if (!aarch64_float_const_zero_rtx_p (x)) -+ { -+ /* This will be a load from memory. */ -+ if (mode == DFmode) -+ *cost += extra_cost->ldst.loadd; -+ else -+ *cost += extra_cost->ldst.loadf; -+ } -+ else -+ /* Otherwise this is +0.0. We get this using MOVI d0, #0 -+ or MOV v0.s[0], wzr - neither of which are modeled by the -+ cost tables. Just use the default cost. */ -+ { -+ } -+ } -+ -+ return true; -+ - case MEM: - if (speed) -- *cost += extra_cost->ldst.load; -+ { -+ /* For loads we want the base cost of a load, plus an -+ approximation for the additional cost of the addressing -+ mode. */ -+ rtx address = XEXP (x, 0); -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ *cost += extra_cost->ldst.load; -+ else if (mode == SFmode) -+ *cost += extra_cost->ldst.loadf; -+ else if (mode == DFmode) -+ *cost += extra_cost->ldst.loadd; - -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } -+ - return true; - - case NEG: -- op0 = CONST0_RTX (GET_MODE (x)); -- op1 = XEXP (x, 0); -- goto cost_minus; -+ op0 = XEXP (x, 0); - -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -+ { -+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE -+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) -+ { -+ /* CSETM. */ -+ *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed); -+ return true; -+ } -+ -+ /* Cost this as SUB wzr, X. */ -+ op0 = CONST0_RTX (GET_MODE (x)); -+ op1 = XEXP (x, 0); -+ goto cost_minus; -+ } -+ -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) -+ { -+ /* Support (neg(fma...)) as a single instruction only if -+ sign of zeros is unimportant. This matches the decision -+ making in aarch64.md. */ -+ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0))) -+ { -+ /* FNMADD. */ -+ *cost = rtx_cost (op0, NEG, 0, speed); -+ return true; -+ } -+ if (speed) -+ /* FNEG. */ -+ *cost += extra_cost->fp[mode == DFmode].neg; -+ return false; -+ } -+ -+ return false; -+ -+ case CLRSB: -+ case CLZ: -+ if (speed) -+ *cost += extra_cost->alu.clz; -+ -+ return false; -+ - case COMPARE: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); -@@ -4577,96 +5347,228 @@ - goto cost_logic; - } - -- /* Comparisons can work if the order is swapped. -- Canonicalization puts the more complex operation first, but -- we want it in op1. */ -- if (! (REG_P (op0) -- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) -- { -- op0 = XEXP (x, 1); -- op1 = XEXP (x, 0); -- } -- goto cost_minus; -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) -+ { -+ /* TODO: A write to the CC flags possibly costs extra, this -+ needs encoding in the cost tables. */ - -+ /* CC_ZESWPmode supports zero extend for free. */ -+ if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) -+ op0 = XEXP (op0, 0); -+ -+ /* ANDS. */ -+ if (GET_CODE (op0) == AND) -+ { -+ x = op0; -+ goto cost_logic; -+ } -+ -+ if (GET_CODE (op0) == PLUS) -+ { -+ /* ADDS (and CMN alias). */ -+ x = op0; -+ goto cost_plus; -+ } -+ -+ if (GET_CODE (op0) == MINUS) -+ { -+ /* SUBS. */ -+ x = op0; -+ goto cost_minus; -+ } -+ -+ if (GET_CODE (op1) == NEG) -+ { -+ /* CMN. */ -+ if (speed) -+ *cost += extra_cost->alu.arith; -+ -+ *cost += rtx_cost (op0, COMPARE, 0, speed); -+ *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed); -+ return true; -+ } -+ -+ /* CMP. -+ -+ Compare can freely swap the order of operands, and -+ canonicalization puts the more complex operation first. -+ But the integer MINUS logic expects the shift/extend -+ operation in op1. */ -+ if (! (REG_P (op0) -+ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) -+ { -+ op0 = XEXP (x, 1); -+ op1 = XEXP (x, 0); -+ } -+ goto cost_minus; -+ } -+ -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) -+ { -+ /* FCMP. */ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].compare; -+ -+ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) -+ { -+ /* FCMP supports constant 0.0 for no extra cost. */ -+ return true; -+ } -+ return false; -+ } -+ -+ return false; -+ - case MINUS: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ { -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); - -- cost_minus: -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT -- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC -- && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) -- { -- if (op0 != const0_rtx) -+cost_minus: -+ /* Detect valid immediates. */ -+ if ((GET_MODE_CLASS (mode) == MODE_INT -+ || (GET_MODE_CLASS (mode) == MODE_CC -+ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) -+ && CONST_INT_P (op1) -+ && aarch64_uimm12_shift (INTVAL (op1))) -+ { - *cost += rtx_cost (op0, MINUS, 0, speed); - -- if (CONST_INT_P (op1)) -- { -- if (!aarch64_uimm12_shift (INTVAL (op1))) -- *cost += rtx_cost (op1, MINUS, 1, speed); -- } -- else -- { -- op1 = aarch64_strip_shift_or_extend (op1); -- *cost += rtx_cost (op1, MINUS, 1, speed); -- } -- return true; -- } -+ if (speed) -+ /* SUB(S) (immediate). */ -+ *cost += extra_cost->alu.arith; -+ return true; - -- return false; -+ } - -+ /* Look for SUB (extended register). */ -+ if (aarch64_rtx_arith_op_extract_p (op1, mode)) -+ { -+ if (speed) -+ *cost += extra_cost->alu.arith_shift; -+ -+ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), -+ (enum rtx_code) GET_CODE (op1), -+ 0, speed); -+ return true; -+ } -+ -+ rtx new_op1 = aarch64_strip_extend (op1); -+ -+ /* Cost this as an FMA-alike operation. */ -+ if ((GET_CODE (new_op1) == MULT -+ || GET_CODE (new_op1) == ASHIFT) -+ && code != COMPARE) -+ { -+ *cost += aarch64_rtx_mult_cost (new_op1, MULT, -+ (enum rtx_code) code, -+ speed); -+ *cost += rtx_cost (op0, MINUS, 0, speed); -+ return true; -+ } -+ -+ *cost += rtx_cost (new_op1, MINUS, 1, speed); -+ -+ if (speed) -+ { -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* SUB(S). */ -+ *cost += extra_cost->alu.arith; -+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ /* FSUB. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return true; -+ } -+ - case PLUS: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ { -+ rtx new_op0; - -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) -- { -- *cost += rtx_cost (op0, PLUS, 0, speed); -- } -- else -- { -- rtx new_op0 = aarch64_strip_shift_or_extend (op0); -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); - -- if (new_op0 == op0 -- && GET_CODE (op0) == MULT) -- { -- if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND -- && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) -- || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND -- && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) -- { -- *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, -- speed) -- + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, -- speed) -- + rtx_cost (op1, PLUS, 1, speed)); -- if (speed) -- *cost += -- extra_cost->mult[GET_MODE (x) == DImode].extend_add; -- return true; -- } -+cost_plus: -+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE -+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) -+ { -+ /* CSINC. */ -+ *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed); -+ *cost += rtx_cost (op1, PLUS, 1, speed); -+ return true; -+ } - -- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) -- + rtx_cost (XEXP (op0, 1), MULT, 1, speed) -- + rtx_cost (op1, PLUS, 1, speed)); -+ if (GET_MODE_CLASS (mode) == MODE_INT -+ && CONST_INT_P (op1) -+ && aarch64_uimm12_shift (INTVAL (op1))) -+ { -+ *cost += rtx_cost (op0, PLUS, 0, speed); - -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].add; -+ if (speed) -+ /* ADD (immediate). */ -+ *cost += extra_cost->alu.arith; -+ return true; -+ } - -- return true; -- } -+ /* Look for ADD (extended register). */ -+ if (aarch64_rtx_arith_op_extract_p (op0, mode)) -+ { -+ if (speed) -+ *cost += extra_cost->alu.arith_shift; - -- *cost += (rtx_cost (new_op0, PLUS, 0, speed) -- + rtx_cost (op1, PLUS, 1, speed)); -- } -- return true; -- } -+ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), -+ (enum rtx_code) GET_CODE (op0), -+ 0, speed); -+ return true; -+ } - -+ /* Strip any extend, leave shifts behind as we will -+ cost them through mult_cost. */ -+ new_op0 = aarch64_strip_extend (op0); -+ -+ if (GET_CODE (new_op0) == MULT -+ || GET_CODE (new_op0) == ASHIFT) -+ { -+ *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, -+ speed); -+ *cost += rtx_cost (op1, PLUS, 1, speed); -+ return true; -+ } -+ -+ *cost += (rtx_cost (new_op0, PLUS, 0, speed) -+ + rtx_cost (op1, PLUS, 1, speed)); -+ -+ if (speed) -+ { -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* ADD. */ -+ *cost += extra_cost->alu.arith; -+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ /* FADD. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return true; -+ } -+ -+ case BSWAP: -+ *cost = COSTS_N_INSNS (1); -+ -+ if (speed) -+ *cost += extra_cost->alu.rev; -+ - return false; - - case IOR: -+ if (aarch_rev16_p (x)) -+ { -+ *cost = COSTS_N_INSNS (1); -+ -+ if (speed) -+ *cost += extra_cost->alu.rev; -+ -+ return true; -+ } -+ /* Fall through. */ - case XOR: - case AND: - cost_logic: -@@ -4673,117 +5575,252 @@ - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - -+ if (code == AND -+ && GET_CODE (op0) == MULT -+ && CONST_INT_P (XEXP (op0, 1)) -+ && CONST_INT_P (op1) -+ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))), -+ INTVAL (op1)) != 0) -+ { -+ /* This is a UBFM/SBFM. */ -+ *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed); -+ if (speed) -+ *cost += extra_cost->alu.bfx; -+ return true; -+ } -+ - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - { -+ /* We possibly get the immediate for free, this is not -+ modelled. */ - if (CONST_INT_P (op1) - && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) - { -- *cost += rtx_cost (op0, AND, 0, speed); -+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); -+ -+ if (speed) -+ *cost += extra_cost->alu.logical; -+ -+ return true; - } - else - { -+ rtx new_op0 = op0; -+ -+ /* Handle ORN, EON, or BIC. */ - if (GET_CODE (op0) == NOT) - op0 = XEXP (op0, 0); -- op0 = aarch64_strip_shift (op0); -- *cost += (rtx_cost (op0, AND, 0, speed) -- + rtx_cost (op1, AND, 1, speed)); -+ -+ new_op0 = aarch64_strip_shift (op0); -+ -+ /* If we had a shift on op0 then this is a logical-shift- -+ by-register/immediate operation. Otherwise, this is just -+ a logical operation. */ -+ if (speed) -+ { -+ if (new_op0 != op0) -+ { -+ /* Shift by immediate. */ -+ if (CONST_INT_P (XEXP (op0, 1))) -+ *cost += extra_cost->alu.log_shift; -+ else -+ *cost += extra_cost->alu.log_shift_reg; -+ } -+ else -+ *cost += extra_cost->alu.logical; -+ } -+ -+ /* In both cases we want to cost both operands. */ -+ *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed) -+ + rtx_cost (op1, (enum rtx_code) code, 1, speed); -+ -+ return true; - } -- return true; - } - return false; - -+ case NOT: -+ /* MVN. */ -+ if (speed) -+ *cost += extra_cost->alu.logical; -+ -+ /* The logical instruction could have the shifted register form, -+ but the cost is the same if the shift is processed as a separate -+ instruction, so we don't bother with it here. */ -+ return false; -+ - case ZERO_EXTEND: -- if ((GET_MODE (x) == DImode -- && GET_MODE (XEXP (x, 0)) == SImode) -- || GET_CODE (XEXP (x, 0)) == MEM) -+ -+ op0 = XEXP (x, 0); -+ /* If a value is written in SI mode, then zero extended to DI -+ mode, the operation will in general be free as a write to -+ a 'w' register implicitly zeroes the upper bits of an 'x' -+ register. However, if this is -+ -+ (set (reg) (zero_extend (reg))) -+ -+ we must cost the explicit register move. */ -+ if (mode == DImode -+ && GET_MODE (op0) == SImode -+ && outer == SET) - { -- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); -+ int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); -+ -+ if (!op_cost && speed) -+ /* MOV. */ -+ *cost += extra_cost->alu.extend; -+ else -+ /* Free, the cost is that of the SI mode operation. */ -+ *cost = op_cost; -+ - return true; - } -+ else if (MEM_P (XEXP (x, 0))) -+ { -+ /* All loads can zero extend to any size for free. */ -+ *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed); -+ return true; -+ } -+ -+ /* UXTB/UXTH. */ -+ if (speed) -+ *cost += extra_cost->alu.extend; -+ - return false; - - case SIGN_EXTEND: -- if (GET_CODE (XEXP (x, 0)) == MEM) -+ if (MEM_P (XEXP (x, 0))) - { -- *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); -+ /* LDRSH. */ -+ if (speed) -+ { -+ rtx address = XEXP (XEXP (x, 0), 0); -+ *cost += extra_cost->ldst.load_sign_extend; -+ -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } - return true; - } -+ -+ if (speed) -+ *cost += extra_cost->alu.extend; - return false; - -+ case ASHIFT: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); -+ -+ if (CONST_INT_P (op1)) -+ { -+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all -+ aliases. */ -+ if (speed) -+ *cost += extra_cost->alu.shift; -+ -+ /* We can incorporate zero/sign extend for free. */ -+ if (GET_CODE (op0) == ZERO_EXTEND -+ || GET_CODE (op0) == SIGN_EXTEND) -+ op0 = XEXP (op0, 0); -+ -+ *cost += rtx_cost (op0, ASHIFT, 0, speed); -+ return true; -+ } -+ else -+ { -+ /* LSLV. */ -+ if (speed) -+ *cost += extra_cost->alu.shift_reg; -+ -+ return false; /* All arguments need to be in registers. */ -+ } -+ - case ROTATE: -- if (!CONST_INT_P (XEXP (x, 1))) -- *cost += COSTS_N_INSNS (2); -- /* Fall through. */ - case ROTATERT: - case LSHIFTRT: -- case ASHIFT: - case ASHIFTRT: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); - -- /* Shifting by a register often takes an extra cycle. */ -- if (speed && !CONST_INT_P (XEXP (x, 1))) -- *cost += extra_cost->alu.arith_shift_reg; -+ if (CONST_INT_P (op1)) -+ { -+ /* ASR (immediate) and friends. */ -+ if (speed) -+ *cost += extra_cost->alu.shift; - -- *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); -+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); -+ return true; -+ } -+ else -+ { -+ -+ /* ASR (register) and friends. */ -+ if (speed) -+ *cost += extra_cost->alu.shift_reg; -+ -+ return false; /* All arguments need to be in registers. */ -+ } -+ -+ case SYMBOL_REF: -+ -+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE) -+ { -+ /* LDR. */ -+ if (speed) -+ *cost += extra_cost->ldst.load; -+ } -+ else if (aarch64_cmodel == AARCH64_CMODEL_SMALL -+ || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC) -+ { -+ /* ADRP, followed by ADD. */ -+ *cost += COSTS_N_INSNS (1); -+ if (speed) -+ *cost += 2 * extra_cost->alu.arith; -+ } -+ else if (aarch64_cmodel == AARCH64_CMODEL_TINY -+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) -+ { -+ /* ADR. */ -+ if (speed) -+ *cost += extra_cost->alu.arith; -+ } -+ -+ if (flag_pic) -+ { -+ /* One extra load instruction, after accessing the GOT. */ -+ *cost += COSTS_N_INSNS (1); -+ if (speed) -+ *cost += extra_cost->ldst.load; -+ } - return true; - - case HIGH: -- if (!CONSTANT_P (XEXP (x, 0))) -- *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); -- return true; -- - case LO_SUM: -- if (!CONSTANT_P (XEXP (x, 1))) -- *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); -- *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); -+ /* ADRP/ADD (immediate). */ -+ if (speed) -+ *cost += extra_cost->alu.arith; - return true; - - case ZERO_EXTRACT: - case SIGN_EXTRACT: -- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); -+ /* UBFX/SBFX. */ -+ if (speed) -+ *cost += extra_cost->alu.bfx; -+ -+ /* We can trust that the immediates used will be correct (there -+ are no by-register forms), so we need only cost op0. */ -+ *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed); - return true; - - case MULT: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); -+ /* aarch64_rtx_mult_cost always handles recursion to its -+ operands. */ -+ return true; - -- *cost = COSTS_N_INSNS (1); -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- if (CONST_INT_P (op1) -- && exact_log2 (INTVAL (op1)) > 0) -- { -- *cost += rtx_cost (op0, ASHIFT, 0, speed); -- return true; -- } -- -- if ((GET_CODE (op0) == ZERO_EXTEND -- && GET_CODE (op1) == ZERO_EXTEND) -- || (GET_CODE (op0) == SIGN_EXTEND -- && GET_CODE (op1) == SIGN_EXTEND)) -- { -- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) -- + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].extend; -- return true; -- } -- -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].simple; -- } -- else if (speed) -- { -- if (GET_MODE (x) == DFmode) -- *cost += extra_cost->fp[1].mult; -- else if (GET_MODE (x) == SFmode) -- *cost += extra_cost->fp[0].mult; -- } -- -- return false; /* All arguments need to be in registers. */ -- - case MOD: - case UMOD: -- *cost = COSTS_N_INSNS (2); - if (speed) - { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -@@ -4800,53 +5837,222 @@ - - case DIV: - case UDIV: -- *cost = COSTS_N_INSNS (1); -+ case SQRT: - if (speed) - { -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv; -- else if (GET_MODE (x) == DFmode) -- *cost += extra_cost->fp[1].div; -- else if (GET_MODE (x) == SFmode) -- *cost += extra_cost->fp[0].div; -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* There is no integer SQRT, so only DIV and UDIV can get -+ here. */ -+ *cost += extra_cost->mult[mode == DImode].idiv; -+ else -+ *cost += extra_cost->fp[mode == DFmode].div; - } - return false; /* All arguments need to be in registers. */ - -+ case IF_THEN_ELSE: -+ return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1), -+ XEXP (x, 2), cost, speed); -+ -+ case EQ: -+ case NE: -+ case GT: -+ case GTU: -+ case LT: -+ case LTU: -+ case GE: -+ case GEU: -+ case LE: -+ case LEU: -+ -+ return false; /* All arguments must be in registers. */ -+ -+ case FMA: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); -+ op2 = XEXP (x, 2); -+ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].fma; -+ -+ /* FMSUB, FNMADD, and FNMSUB are free. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ -+ if (GET_CODE (op2) == NEG) -+ op2 = XEXP (op2, 0); -+ -+ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1, -+ and the by-element operand as operand 0. */ -+ if (GET_CODE (op1) == NEG) -+ op1 = XEXP (op1, 0); -+ -+ /* Catch vector-by-element operations. The by-element operand can -+ either be (vec_duplicate (vec_select (x))) or just -+ (vec_select (x)), depending on whether we are multiplying by -+ a vector or a scalar. -+ -+ Canonicalization is not very good in these cases, FMA4 will put the -+ by-element operand as operand 0, FNMA4 will have it as operand 1. */ -+ if (GET_CODE (op0) == VEC_DUPLICATE) -+ op0 = XEXP (op0, 0); -+ else if (GET_CODE (op1) == VEC_DUPLICATE) -+ op1 = XEXP (op1, 0); -+ -+ if (GET_CODE (op0) == VEC_SELECT) -+ op0 = XEXP (op0, 0); -+ else if (GET_CODE (op1) == VEC_SELECT) -+ op1 = XEXP (op1, 0); -+ -+ /* If the remaining parameters are not registers, -+ get the cost to put them into registers. */ -+ *cost += rtx_cost (op0, FMA, 0, speed); -+ *cost += rtx_cost (op1, FMA, 1, speed); -+ *cost += rtx_cost (op2, FMA, 2, speed); -+ return true; -+ -+ case FLOAT_EXTEND: -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].widen; -+ return false; -+ -+ case FLOAT_TRUNCATE: -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].narrow; -+ return false; -+ -+ case FIX: -+ case UNSIGNED_FIX: -+ x = XEXP (x, 0); -+ /* Strip the rounding part. They will all be implemented -+ by the fcvt* family of instructions anyway. */ -+ if (GET_CODE (x) == UNSPEC) -+ { -+ unsigned int uns_code = XINT (x, 1); -+ -+ if (uns_code == UNSPEC_FRINTA -+ || uns_code == UNSPEC_FRINTM -+ || uns_code == UNSPEC_FRINTN -+ || uns_code == UNSPEC_FRINTP -+ || uns_code == UNSPEC_FRINTZ) -+ x = XVECEXP (x, 0, 0); -+ } -+ -+ if (speed) -+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; -+ -+ *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); -+ return true; -+ -+ case ABS: -+ if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ { -+ /* FABS and FNEG are analogous. */ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].neg; -+ } -+ else -+ { -+ /* Integer ABS will either be split to -+ two arithmetic instructions, or will be an ABS -+ (scalar), which we don't model. */ -+ *cost = COSTS_N_INSNS (2); -+ if (speed) -+ *cost += 2 * extra_cost->alu.arith; -+ } -+ return false; -+ -+ case SMAX: -+ case SMIN: -+ if (speed) -+ { -+ /* FMAXNM/FMINNM/FMAX/FMIN. -+ TODO: This may not be accurate for all implementations, but -+ we do not model this in the cost tables. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return false; -+ -+ case UNSPEC: -+ /* The floating point round to integer frint* instructions. */ -+ if (aarch64_frint_unspec_p (XINT (x, 1))) -+ { -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].roundint; -+ -+ return false; -+ } -+ -+ if (XINT (x, 1) == UNSPEC_RBIT) -+ { -+ if (speed) -+ *cost += extra_cost->alu.rev; -+ -+ return false; -+ } -+ break; -+ -+ case TRUNCATE: -+ -+ /* Decompose <su>muldi3_highpart. */ -+ if (/* (truncate:DI */ -+ mode == DImode -+ /* (lshiftrt:TI */ -+ && GET_MODE (XEXP (x, 0)) == TImode -+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT -+ /* (mult:TI */ -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -+ /* (ANY_EXTEND:TI (reg:DI)) -+ (ANY_EXTEND:TI (reg:DI))) */ -+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND -+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND) -+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND -+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)) -+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode -+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode -+ /* (const_int 64) */ -+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -+ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64) -+ { -+ /* UMULH/SMULH. */ -+ if (speed) -+ *cost += extra_cost->mult[mode == DImode].extend; -+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), -+ MULT, 0, speed); -+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), -+ MULT, 1, speed); -+ return true; -+ } -+ -+ /* Fall through. */ - default: - break; - } -- return false; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "\nFailed to cost RTX. Assuming default cost.\n"); -+ -+ return true; - } - --static int --aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, -- enum machine_mode mode ATTRIBUTE_UNUSED, -- addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) -+/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost -+ calculated for X. This cost is stored in *COST. Returns true -+ if the total cost of X was calculated. */ -+static bool -+aarch64_rtx_costs_wrapper (rtx x, int code, int outer, -+ int param, int *cost, bool speed) - { -- enum rtx_code c = GET_CODE (x); -- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; -+ bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed); - -- if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) -- return addr_cost->pre_modify; -- -- if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) -- return addr_cost->post_modify; -- -- if (c == PLUS) -+ if (dump_file && (dump_flags & TDF_DETAILS)) - { -- if (GET_CODE (XEXP (x, 1)) == CONST_INT) -- return addr_cost->imm_offset; -- else if (GET_CODE (XEXP (x, 0)) == MULT -- || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) -- return addr_cost->register_extend; -- -- return addr_cost->register_offset; -+ print_rtl_single (dump_file, x); -+ fprintf (dump_file, "\n%s cost: %d (%s)\n", -+ speed ? "Hot" : "Cold", -+ *cost, result ? "final" : "partial"); - } -- else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) -- return addr_cost->imm_offset; - -- return 0; -+ return result; - } - - static int -@@ -4858,6 +6064,13 @@ - const struct cpu_regmove_cost *regmove_cost - = aarch64_tune_params->regmove_cost; - -+ /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ -+ if (to == CALLER_SAVE_REGS || to == POINTER_REGS) -+ to = GENERAL_REGS; -+ -+ if (from == CALLER_SAVE_REGS || from == POINTER_REGS) -+ from = GENERAL_REGS; -+ - /* Moving between GPR and stack cost is the same as GP2GP. */ - if ((from == GENERAL_REGS && to == STACK_REG) - || (to == GENERAL_REGS && from == STACK_REG)) -@@ -4880,7 +6093,7 @@ - secondary reload. A general register is used as a scratch to move - the upper DI value and the lower DI value is moved directly, - hence the cost is the sum of three moves. */ -- if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128) -+ if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 16) - return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; - - return regmove_cost->FP2FP; -@@ -5253,6 +6466,7 @@ - aarch64_tune_flags = selected_tune->flags; - aarch64_tune = selected_tune->core; - aarch64_tune_params = selected_tune->tune; -+ aarch64_architecture_version = selected_cpu->architecture_version; - - if (aarch64_fix_a53_err835769 == 2) - { -@@ -5998,7 +7212,7 @@ - - /* We don't save the size into *PRETEND_SIZE because we want to avoid - any complication of having crtl->args.pretend_args_size changed. */ -- cfun->machine->saved_varargs_size -+ cfun->machine->frame.saved_varargs_size - = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, - STACK_BOUNDARY / BITS_PER_UNIT) - + vr_saved * UNITS_PER_VREG); -@@ -6685,7 +7899,7 @@ - unsigned HOST_WIDE_INT elpart; - unsigned int part, parts; - -- if (GET_CODE (el) == CONST_INT) -+ if (CONST_INT_P (el)) - { - elpart = INTVAL (el); - parts = 1; -@@ -6816,30 +8030,6 @@ - #undef CHECK - } - --static bool --aarch64_const_vec_all_same_int_p (rtx x, -- HOST_WIDE_INT minval, -- HOST_WIDE_INT maxval) --{ -- HOST_WIDE_INT firstval; -- int count, i; -- -- if (GET_CODE (x) != CONST_VECTOR -- || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) -- return false; -- -- firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); -- if (firstval < minval || firstval > maxval) -- return false; -- -- count = CONST_VECTOR_NUNITS (x); -- for (i = 1; i < count; i++) -- if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) -- return false; -- -- return true; --} -- - /* Check of immediate shift constants are within range. */ - bool - aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left) -@@ -6846,9 +8036,9 @@ - { - int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; - if (left) -- return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1); -+ return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1); - else -- return aarch64_const_vec_all_same_int_p (x, 1, bit_width); -+ return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width); - } - - /* Return true if X is a uniform vector where all elements -@@ -6886,7 +8076,7 @@ - && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) - return true; - -- if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) -+ if (CONST_INT_P (x)) - return true; - - if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) -@@ -6923,17 +8113,43 @@ - return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); - } - --/* Construct and return a PARALLEL RTX vector. */ -+/* Construct and return a PARALLEL RTX vector with elements numbering the -+ lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of -+ the vector - from the perspective of the architecture. This does not -+ line up with GCC's perspective on lane numbers, so we end up with -+ different masks depending on our target endian-ness. The diagram -+ below may help. We must draw the distinction when building masks -+ which select one half of the vector. An instruction selecting -+ architectural low-lanes for a big-endian target, must be described using -+ a mask selecting GCC high-lanes. -+ -+ Big-Endian Little-Endian -+ -+GCC 0 1 2 3 3 2 1 0 -+ | x | x | x | x | | x | x | x | x | -+Architecture 3 2 1 0 3 2 1 0 -+ -+Low Mask: { 2, 3 } { 0, 1 } -+High Mask: { 0, 1 } { 2, 3 } -+*/ -+ - rtx - aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high) - { - int nunits = GET_MODE_NUNITS (mode); - rtvec v = rtvec_alloc (nunits / 2); -- int base = high ? nunits / 2 : 0; -+ int high_base = nunits / 2; -+ int low_base = 0; -+ int base; - rtx t1; - int i; - -- for (i=0; i < nunits / 2; i++) -+ if (BYTES_BIG_ENDIAN) -+ base = high ? low_base : high_base; -+ else -+ base = high ? high_base : low_base; -+ -+ for (i = 0; i < nunits / 2; i++) - RTVEC_ELT (v, i) = GEN_INT (base + i); - - t1 = gen_rtx_PARALLEL (mode, v); -@@ -6940,6 +8156,38 @@ - return t1; - } - -+/* Check OP for validity as a PARALLEL RTX vector with elements -+ numbering the lanes of either the high (HIGH == TRUE) or low lanes, -+ from the perspective of the architecture. See the diagram above -+ aarch64_simd_vect_par_cnst_half for more details. */ -+ -+bool -+aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode, -+ bool high) -+{ -+ rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high); -+ HOST_WIDE_INT count_op = XVECLEN (op, 0); -+ HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); -+ int i = 0; -+ -+ if (!VECTOR_MODE_P (mode)) -+ return false; -+ -+ if (count_op != count_ideal) -+ return false; -+ -+ for (i = 0; i < count_ideal; i++) -+ { -+ rtx elt_op = XVECEXP (op, 0, i); -+ rtx elt_ideal = XVECEXP (ideal, 0, i); -+ -+ if (!CONST_INT_P (elt_op) -+ || INTVAL (elt_ideal) != INTVAL (elt_op)) -+ return false; -+ } -+ return true; -+} -+ - /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and - HIGH (exclusive). */ - void -@@ -6946,7 +8194,7 @@ - aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) - { - HOST_WIDE_INT lane; -- gcc_assert (GET_CODE (operand) == CONST_INT); -+ gcc_assert (CONST_INT_P (operand)); - lane = INTVAL (operand); - - if (lane < low || lane >= high) -@@ -6956,7 +8204,7 @@ - void - aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) - { -- gcc_assert (GET_CODE (operand) == CONST_INT); -+ gcc_assert (CONST_INT_P (operand)); - HOST_WIDE_INT lane = INTVAL (operand); - - if (lane < low || lane >= high) -@@ -6994,7 +8242,7 @@ - aarch64_simd_mem_operand_p (rtx op) - { - return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC -- || GET_CODE (XEXP (op, 0)) == REG); -+ || REG_P (XEXP (op, 0))); - } - - /* Set up OPERANDS for a register copy from SRC to DEST, taking care -@@ -7647,6 +8895,9 @@ - if (!CONST_DOUBLE_P (x)) - return false; - -+ if (GET_MODE (x) == VOIDmode) -+ return false; -+ - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - - /* We cannot represent infinities, NaNs or +/-zero. We won't -@@ -7899,20 +9150,26 @@ - aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) - { - enum machine_mode vmode = GET_MODE (target); -- unsigned int i, nelt = GET_MODE_NUNITS (vmode); -+ unsigned int nelt = GET_MODE_NUNITS (vmode); - bool one_vector_p = rtx_equal_p (op0, op1); -- rtx rmask[MAX_VECT_LEN], mask; -+ rtx mask; - -- gcc_checking_assert (!BYTES_BIG_ENDIAN); -- - /* The TBL instruction does not use a modulo index, so we must take care - of that ourselves. */ -- mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1); -- for (i = 0; i < nelt; ++i) -- rmask[i] = mask; -- mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask)); -+ mask = aarch64_simd_gen_const_vector_dup (vmode, -+ one_vector_p ? nelt - 1 : 2 * nelt - 1); - sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); - -+ /* For big-endian, we also need to reverse the index within the vector -+ (but not which vector). */ -+ if (BYTES_BIG_ENDIAN) -+ { -+ /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */ -+ if (!one_vector_p) -+ mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1); -+ sel = expand_simple_binop (vmode, XOR, sel, mask, -+ NULL, 0, OPTAB_LIB_WIDEN); -+ } - aarch64_expand_vec_perm_1 (target, op0, op1, sel); - } - -@@ -8171,7 +9428,145 @@ - return true; - } - -+/* Recognize patterns for the EXT insn. */ -+ - static bool -+aarch64_evpc_ext (struct expand_vec_perm_d *d) -+{ -+ unsigned int i, nelt = d->nelt; -+ rtx (*gen) (rtx, rtx, rtx, rtx); -+ rtx offset; -+ -+ unsigned int location = d->perm[0]; /* Always < nelt. */ -+ -+ /* Check if the extracted indices are increasing by one. */ -+ for (i = 1; i < nelt; i++) -+ { -+ unsigned int required = location + i; -+ if (d->one_vector_p) -+ { -+ /* We'll pass the same vector in twice, so allow indices to wrap. */ -+ required &= (nelt - 1); -+ } -+ if (d->perm[i] != required) -+ return false; -+ } -+ -+ switch (d->vmode) -+ { -+ case V16QImode: gen = gen_aarch64_extv16qi; break; -+ case V8QImode: gen = gen_aarch64_extv8qi; break; -+ case V4HImode: gen = gen_aarch64_extv4hi; break; -+ case V8HImode: gen = gen_aarch64_extv8hi; break; -+ case V2SImode: gen = gen_aarch64_extv2si; break; -+ case V4SImode: gen = gen_aarch64_extv4si; break; -+ case V2SFmode: gen = gen_aarch64_extv2sf; break; -+ case V4SFmode: gen = gen_aarch64_extv4sf; break; -+ case V2DImode: gen = gen_aarch64_extv2di; break; -+ case V2DFmode: gen = gen_aarch64_extv2df; break; -+ default: -+ return false; -+ } -+ -+ /* Success! */ -+ if (d->testing_p) -+ return true; -+ -+ /* The case where (location == 0) is a no-op for both big- and little-endian, -+ and is removed by the mid-end at optimization levels -O1 and higher. */ -+ -+ if (BYTES_BIG_ENDIAN && (location != 0)) -+ { -+ /* After setup, we want the high elements of the first vector (stored -+ at the LSB end of the register), and the low elements of the second -+ vector (stored at the MSB end of the register). So swap. */ -+ rtx temp = d->op0; -+ d->op0 = d->op1; -+ d->op1 = temp; -+ /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ -+ location = nelt - location; -+ } -+ -+ offset = GEN_INT (location); -+ emit_insn (gen (d->target, d->op0, d->op1, offset)); -+ return true; -+} -+ -+/* Recognize patterns for the REV insns. */ -+ -+static bool -+aarch64_evpc_rev (struct expand_vec_perm_d *d) -+{ -+ unsigned int i, j, diff, nelt = d->nelt; -+ rtx (*gen) (rtx, rtx); -+ -+ if (!d->one_vector_p) -+ return false; -+ -+ diff = d->perm[0]; -+ switch (diff) -+ { -+ case 7: -+ switch (d->vmode) -+ { -+ case V16QImode: gen = gen_aarch64_rev64v16qi; break; -+ case V8QImode: gen = gen_aarch64_rev64v8qi; break; -+ default: -+ return false; -+ } -+ break; -+ case 3: -+ switch (d->vmode) -+ { -+ case V16QImode: gen = gen_aarch64_rev32v16qi; break; -+ case V8QImode: gen = gen_aarch64_rev32v8qi; break; -+ case V8HImode: gen = gen_aarch64_rev64v8hi; break; -+ case V4HImode: gen = gen_aarch64_rev64v4hi; break; -+ default: -+ return false; -+ } -+ break; -+ case 1: -+ switch (d->vmode) -+ { -+ case V16QImode: gen = gen_aarch64_rev16v16qi; break; -+ case V8QImode: gen = gen_aarch64_rev16v8qi; break; -+ case V8HImode: gen = gen_aarch64_rev32v8hi; break; -+ case V4HImode: gen = gen_aarch64_rev32v4hi; break; -+ case V4SImode: gen = gen_aarch64_rev64v4si; break; -+ case V2SImode: gen = gen_aarch64_rev64v2si; break; -+ case V4SFmode: gen = gen_aarch64_rev64v4sf; break; -+ case V2SFmode: gen = gen_aarch64_rev64v2sf; break; -+ default: -+ return false; -+ } -+ break; -+ default: -+ return false; -+ } -+ -+ for (i = 0; i < nelt ; i += diff + 1) -+ for (j = 0; j <= diff; j += 1) -+ { -+ /* This is guaranteed to be true as the value of diff -+ is 7, 3, 1 and we should have enough elements in the -+ queue to generate this. Getting a vector mask with a -+ value of diff other than these values implies that -+ something is wrong by the time we get here. */ -+ gcc_assert (i + j < nelt); -+ if (d->perm[i + j] != i + diff - j) -+ return false; -+ } -+ -+ /* Success! */ -+ if (d->testing_p) -+ return true; -+ -+ emit_insn (gen (d->target, d->op0)); -+ return true; -+} -+ -+static bool - aarch64_evpc_dup (struct expand_vec_perm_d *d) - { - rtx (*gen) (rtx, rtx, rtx); -@@ -8181,10 +9576,6 @@ - unsigned int i, elt, nelt = d->nelt; - rtx lane; - -- /* TODO: This may not be big-endian safe. */ -- if (BYTES_BIG_ENDIAN) -- return false; -- - elt = d->perm[0]; - for (i = 1; i < nelt; i++) - { -@@ -8198,7 +9589,7 @@ - use d->op0 and need not do any extra arithmetic to get the - correct lane number. */ - in0 = d->op0; -- lane = GEN_INT (elt); -+ lane = GEN_INT (elt); /* The pattern corrects for big-endian. */ - - switch (vmode) - { -@@ -8227,11 +9618,6 @@ - enum machine_mode vmode = d->vmode; - unsigned int i, nelt = d->nelt; - -- /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's -- numbering of elements for big-endian, we must reverse the order. */ -- if (BYTES_BIG_ENDIAN) -- return false; -- - if (d->testing_p) - return true; - -@@ -8242,7 +9628,15 @@ - return false; - - for (i = 0; i < nelt; ++i) -- rperm[i] = GEN_INT (d->perm[i]); -+ { -+ int nunits = GET_MODE_NUNITS (vmode); -+ -+ /* If big-endian and two vectors we end up with a weird mixed-endian -+ mode on NEON. Reverse the index within each word but not the word -+ itself. */ -+ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) -+ : d->perm[i]); -+ } - sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); - sel = force_reg (vmode, sel); - -@@ -8271,14 +9665,18 @@ - - if (TARGET_SIMD) - { -- if (aarch64_evpc_zip (d)) -+ if (aarch64_evpc_rev (d)) - return true; -+ else if (aarch64_evpc_ext (d)) -+ return true; -+ else if (aarch64_evpc_dup (d)) -+ return true; -+ else if (aarch64_evpc_zip (d)) -+ return true; - else if (aarch64_evpc_uzp (d)) - return true; - else if (aarch64_evpc_trn (d)) - return true; -- else if (aarch64_evpc_dup (d)) -- return true; - return aarch64_evpc_tbl (d); - } - return false; -@@ -8397,7 +9795,8 @@ - /* Limited combinations of subregs are safe on FPREGs. Particularly, - 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. - 2. Scalar to Scalar for integer modes or same size float modes. -- 3. Vector to Vector modes. */ -+ 3. Vector to Vector modes. -+ 4. On little-endian only, Vector-Structure to Vector modes. */ - if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) - { - if (aarch64_vector_mode_supported_p (from) -@@ -8413,11 +9812,215 @@ - if (aarch64_vector_mode_supported_p (from) - && aarch64_vector_mode_supported_p (to)) - return false; -+ -+ /* Within an vector structure straddling multiple vector registers -+ we are in a mixed-endian representation. As such, we can't -+ easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can -+ switch between vectors and vector structures cheaply. */ -+ if (!BYTES_BIG_ENDIAN) -+ if ((aarch64_vector_mode_supported_p (from) -+ && aarch64_vect_struct_mode_p (to)) -+ || (aarch64_vector_mode_supported_p (to) -+ && aarch64_vect_struct_mode_p (from))) -+ return false; - } - - return true; - } - -+/* Implement MODES_TIEABLE_P. */ -+ -+bool -+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) -+{ -+ if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) -+ return true; -+ -+ /* We specifically want to allow elements of "structure" modes to -+ be tieable to the structure. This more general condition allows -+ other rarer situations too. */ -+ if (TARGET_SIMD -+ && aarch64_vector_mode_p (mode1) -+ && aarch64_vector_mode_p (mode2)) -+ return true; -+ -+ return false; -+} -+ -+/* Return a new RTX holding the result of moving POINTER forward by -+ AMOUNT bytes. */ -+ -+static rtx -+aarch64_move_pointer (rtx pointer, int amount) -+{ -+ rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); -+ -+ return adjust_automodify_address (pointer, GET_MODE (pointer), -+ next, amount); -+} -+ -+/* Return a new RTX holding the result of moving POINTER forward by the -+ size of the mode it points to. */ -+ -+static rtx -+aarch64_progress_pointer (rtx pointer) -+{ -+ HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); -+ -+ return aarch64_move_pointer (pointer, amount); -+} -+ -+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by -+ MODE bytes. */ -+ -+static void -+aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, -+ enum machine_mode mode) -+{ -+ rtx reg = gen_reg_rtx (mode); -+ -+ /* "Cast" the pointers to the correct mode. */ -+ *src = adjust_address (*src, mode, 0); -+ *dst = adjust_address (*dst, mode, 0); -+ /* Emit the memcpy. */ -+ emit_move_insn (reg, *src); -+ emit_move_insn (*dst, reg); -+ /* Move the pointers forward. */ -+ *src = aarch64_progress_pointer (*src); -+ *dst = aarch64_progress_pointer (*dst); -+} -+ -+/* Expand movmem, as if from a __builtin_memcpy. Return true if -+ we succeed, otherwise return false. */ -+ -+bool -+aarch64_expand_movmem (rtx *operands) -+{ -+ unsigned int n; -+ rtx dst = operands[0]; -+ rtx src = operands[1]; -+ rtx base; -+ bool speed_p = !optimize_function_for_size_p (cfun); -+ -+ /* When optimizing for size, give a better estimate of the length of a -+ memcpy call, but use the default otherwise. */ -+ unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2; -+ -+ /* We can't do anything smart if the amount to copy is not constant. */ -+ if (!CONST_INT_P (operands[2])) -+ return false; -+ -+ n = UINTVAL (operands[2]); -+ -+ /* Try to keep the number of instructions low. For cases below 16 bytes we -+ need to make at most two moves. For cases above 16 bytes it will be one -+ move for each 16 byte chunk, then at most two additional moves. */ -+ if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions) -+ return false; -+ -+ base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); -+ dst = adjust_automodify_address (dst, VOIDmode, base, 0); -+ -+ base = copy_to_mode_reg (Pmode, XEXP (src, 0)); -+ src = adjust_automodify_address (src, VOIDmode, base, 0); -+ -+ /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a -+ 1-byte chunk. */ -+ if (n < 4) -+ { -+ if (n >= 2) -+ { -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); -+ n -= 2; -+ } -+ -+ if (n == 1) -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); -+ -+ return true; -+ } -+ -+ /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second -+ 4-byte chunk, partially overlapping with the previously copied chunk. */ -+ if (n < 8) -+ { -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); -+ n -= 4; -+ if (n > 0) -+ { -+ int move = n - 4; -+ -+ src = aarch64_move_pointer (src, move); -+ dst = aarch64_move_pointer (dst, move); -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); -+ } -+ return true; -+ } -+ -+ /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of -+ them, then (if applicable) an 8-byte chunk. */ -+ while (n >= 8) -+ { -+ if (n / 16) -+ { -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode); -+ n -= 16; -+ } -+ else -+ { -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); -+ n -= 8; -+ } -+ } -+ -+ /* Finish the final bytes of the copy. We can always do this in one -+ instruction. We either copy the exact amount we need, or partially -+ overlap with the previous chunk we copied and copy 8-bytes. */ -+ if (n == 0) -+ return true; -+ else if (n == 1) -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); -+ else if (n == 2) -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); -+ else if (n == 4) -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); -+ else -+ { -+ if (n == 3) -+ { -+ src = aarch64_move_pointer (src, -1); -+ dst = aarch64_move_pointer (dst, -1); -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); -+ } -+ else -+ { -+ int move = n - 8; -+ -+ src = aarch64_move_pointer (src, move); -+ dst = aarch64_move_pointer (dst, move); -+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); -+ } -+ } -+ -+ return true; -+} -+ -+static bool -+aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int align, -+ enum by_pieces_operation op, -+ bool speed_p) -+{ -+ /* STORE_BY_PIECES can be used when copying a constant string, but -+ in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR). -+ For now we always fail this and let the move_by_pieces code copy -+ the string from read-only memory. */ -+ if (op == STORE_BY_PIECES) -+ return false; -+ -+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); -+} -+ - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST aarch64_address_cost - -@@ -8588,7 +10191,7 @@ - #define TARGET_RETURN_IN_MSB aarch64_return_in_msb - - #undef TARGET_RTX_COSTS --#define TARGET_RTX_COSTS aarch64_rtx_costs -+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper - - #undef TARGET_SCHED_ISSUE_RATE - #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate -@@ -8626,6 +10229,10 @@ - #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ - aarch64_autovectorize_vector_sizes - -+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV -+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ -+ aarch64_atomic_assign_expand_fenv -+ - /* Section anchor support. */ - - #undef TARGET_MIN_ANCHOR_OFFSET -@@ -8654,6 +10261,19 @@ - #undef TARGET_FIXED_CONDITION_CODE_REGS - #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs - -+#undef TARGET_FLAGS_REGNUM -+#define TARGET_FLAGS_REGNUM CC_REGNUM -+ -+#undef TARGET_LEGITIMIZE_ADDRESS -+#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address -+ -+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P -+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ -+ aarch64_use_by_pieces_infrastructure_p -+ -+#undef TARGET_CAN_USE_DOLOOP_P -+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-aarch64.h" ---- a/src/gcc/config/aarch64/aarch64-elf-raw.h -+++ b/src/gcc/config/aarch64/aarch64-elf-raw.h -@@ -23,7 +23,9 @@ - #define GCC_AARCH64_ELF_RAW_H - - #define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" --#define ENDFILE_SPEC " crtend%O%s crtn%O%s" -+#define ENDFILE_SPEC \ -+ " crtend%O%s crtn%O%s " \ -+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" - - #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT - #define CA53_ERR_835769_SPEC \ ---- a/src/gcc/config/aarch64/aarch64-linux.h -+++ b/src/gcc/config/aarch64/aarch64-linux.h -@@ -21,7 +21,7 @@ - #ifndef GCC_AARCH64_LINUX_H - #define GCC_AARCH64_LINUX_H - --#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" -+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1" - - #define CPP_SPEC "%{pthread:-D_REENTRANT}" - -@@ -33,7 +33,7 @@ - -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ - -X \ - %{mbig-endian:-EB} %{mlittle-endian:-EL} \ -- -maarch64linux%{mbig-endian:b}" -+ -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" - - #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT - #define CA53_ERR_835769_SPEC \ -@@ -46,6 +46,14 @@ - #define LINK_SPEC LINUX_TARGET_LINK_SPEC \ - CA53_ERR_835769_SPEC - -+#define GNU_USER_TARGET_MATHFILE_SPEC \ -+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" -+ -+#undef ENDFILE_SPEC -+#define ENDFILE_SPEC \ -+ GNU_USER_TARGET_MATHFILE_SPEC " " \ -+ GNU_USER_TARGET_ENDFILE_SPEC -+ - #define TARGET_OS_CPP_BUILTINS() \ - do \ - { \ ---- a/src/gcc/config/aarch64/iterators.md -+++ b/src/gcc/config/aarch64/iterators.md -@@ -95,6 +95,9 @@ - ;; Vector Float modes. - (define_mode_iterator VDQF [V2SF V4SF V2DF]) - -+;; Vector Float modes, and DF. -+(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF]) -+ - ;; Vector single Float modes. - (define_mode_iterator VDQSF [V2SF V4SF]) - -@@ -156,6 +159,9 @@ - ;; Vector modes for H and S types. - (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) - -+;; Vector modes for H, S and D types. -+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) -+ - ;; Vector modes for Q, H and S types. - (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) - -@@ -273,6 +279,10 @@ - UNSPEC_UZP2 ; Used in vector permute patterns. - UNSPEC_TRN1 ; Used in vector permute patterns. - UNSPEC_TRN2 ; Used in vector permute patterns. -+ UNSPEC_EXT ; Used in aarch64-simd.md. -+ UNSPEC_REV64 ; Used in vector reverse patterns (permute). -+ UNSPEC_REV32 ; Used in vector reverse patterns (permute). -+ UNSPEC_REV16 ; Used in vector reverse patterns (permute). - UNSPEC_AESE ; Used in aarch64-simd.md. - UNSPEC_AESD ; Used in aarch64-simd.md. - UNSPEC_AESMC ; Used in aarch64-simd.md. -@@ -299,6 +309,10 @@ - ;; 32-bit version and "%x0" in the 64-bit version. - (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) - -+;; For inequal width int to float conversion -+(define_mode_attr w1 [(SF "w") (DF "x")]) -+(define_mode_attr w2 [(SF "x") (DF "w")]) -+ - ;; For constraints used in scalar immediate vector moves - (define_mode_attr hq [(HI "h") (QI "q")]) - -@@ -348,6 +362,9 @@ - ;; Attribute to describe constants acceptable in logical operations - (define_mode_attr lconst [(SI "K") (DI "L")]) - -+;; Attribute to describe constants acceptable in atomic logical operations -+(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")]) -+ - ;; Map a mode to a specific constraint character. - (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")]) - -@@ -358,6 +375,9 @@ - (V2DI "2d") (V2SF "2s") - (V4SF "4s") (V2DF "2d")]) - -+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") -+ (V4SI "32") (V2DI "64")]) -+ - (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") - (V4HI ".4h") (V8HI ".8h") - (V2SI ".2s") (V4SI ".4s") -@@ -552,13 +572,43 @@ - - (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) - -+;; Mode of pair of elements for each vector mode, to define transfer -+;; size for structure lane/dup loads and stores. -+(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI") -+ (V4HI "SI") (V8HI "SI") -+ (V2SI "V2SI") (V4SI "V2SI") -+ (DI "V2DI") (V2DI "V2DI") -+ (V2SF "V2SF") (V4SF "V2SF") -+ (DF "V2DI") (V2DF "V2DI")]) -+ -+;; Similar, for three elements. -+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK") -+ (V4HI "BLK") (V8HI "BLK") -+ (V2SI "BLK") (V4SI "BLK") -+ (DI "EI") (V2DI "EI") -+ (V2SF "BLK") (V4SF "BLK") -+ (DF "EI") (V2DF "EI")]) -+ -+;; Similar, for four elements. -+(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI") -+ (V4HI "V4HI") (V8HI "V4HI") -+ (V2SI "V4SI") (V4SI "V4SI") -+ (DI "OI") (V2DI "OI") -+ (V2SF "V4SF") (V4SF "V4SF") -+ (DF "OI") (V2DF "OI")]) -+ -+ - ;; Mode for atomic operation suffixes - (define_mode_attr atomic_sfx - [(QI "b") (HI "h") (SI "") (DI "")]) - --(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")]) --(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")]) -+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")]) -+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")]) - -+;; for the inequal width integer to fp conversions -+(define_mode_attr fcvt_iesize [(SF "di") (DF "si")]) -+(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")]) -+ - (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") - (V4HI "V8HI") (V8HI "V4HI") - (V2SI "V4SI") (V4SI "V2SI") -@@ -853,6 +903,8 @@ - UNSPEC_TRN1 UNSPEC_TRN2 - UNSPEC_UZP1 UNSPEC_UZP2]) - -+(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) -+ - (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM - UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX - UNSPEC_FRINTA]) -@@ -862,6 +914,10 @@ - - (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) - -+(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W -+ UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH -+ UNSPEC_CRC32CW UNSPEC_CRC32CX]) -+ - (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD]) - (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) - -@@ -980,6 +1036,10 @@ - (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") - (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) - -+; op code for REV instructions (size within which elements are reversed). -+(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") -+ (UNSPEC_REV16 "16")]) -+ - (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") - (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") - (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) -@@ -986,6 +1046,16 @@ - - (define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) - -+(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h") -+ (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x") -+ (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch") -+ (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")]) -+ -+(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI") -+ (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI") -+ (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI") -+ (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")]) -+ - (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")]) - (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")]) - ---- a/src/gcc/config/aarch64/aarch64.h -+++ b/src/gcc/config/aarch64/aarch64.h -@@ -26,14 +26,48 @@ - #define TARGET_CPU_CPP_BUILTINS() \ - do \ - { \ -- builtin_define ("__aarch64__"); \ -+ builtin_define ("__aarch64__"); \ -+ builtin_define ("__ARM_64BIT_STATE"); \ -+ builtin_define_with_int_value \ -+ ("__ARM_ARCH", aarch64_architecture_version); \ -+ cpp_define_formatted \ -+ (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version); \ -+ builtin_define ("__ARM_ARCH_ISA_A64"); \ -+ builtin_define_with_int_value \ -+ ("__ARM_ARCH_PROFILE", 'A'); \ -+ builtin_define ("__ARM_FEATURE_CLZ"); \ -+ builtin_define ("__ARM_FEATURE_IDIV"); \ -+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \ -+ if (flag_unsafe_math_optimizations) \ -+ builtin_define ("__ARM_FP_FAST"); \ -+ builtin_define ("__ARM_PCS_AAPCS64"); \ -+ builtin_define_with_int_value \ -+ ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); \ -+ builtin_define_with_int_value \ -+ ("__ARM_SIZEOF_MINIMAL_ENUM", \ -+ flag_short_enums? 1 : 4); \ - if (TARGET_BIG_END) \ -- builtin_define ("__AARCH64EB__"); \ -+ { \ -+ builtin_define ("__AARCH64EB__"); \ -+ builtin_define ("__ARM_BIG_ENDIAN"); \ -+ } \ - else \ - builtin_define ("__AARCH64EL__"); \ - \ -- if (TARGET_SIMD) \ -- builtin_define ("__ARM_NEON"); \ -+ if (TARGET_FLOAT) \ -+ { \ -+ builtin_define ("__ARM_FEATURE_FMA"); \ -+ builtin_define_with_int_value ("__ARM_FP", 0x0C); \ -+ } \ -+ if (TARGET_SIMD) \ -+ { \ -+ builtin_define ("__ARM_FEATURE_NUMERIC_MAXMIN"); \ -+ builtin_define ("__ARM_NEON"); \ -+ builtin_define_with_int_value ("__ARM_NEON_FP", 0x0C);\ -+ } \ -+ \ -+ if (TARGET_CRC32) \ -+ builtin_define ("__ARM_FEATURE_CRC32"); \ - \ - switch (aarch64_cmodel) \ - { \ -@@ -155,6 +189,8 @@ - - #define PCC_BITFIELD_TYPE_MATTERS 1 - -+/* Major revision number of the ARM Architecture implemented by the target. */ -+extern unsigned aarch64_architecture_version; - - /* Instruction tuning/selection flags. */ - -@@ -188,6 +224,9 @@ - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) - -+/* CRC instructions that can be enabled through +crc arch extension. */ -+#define TARGET_CRC32 (AARCH64_ISA_CRC) -+ - /* Standard register usage. */ - - /* 31 64-bit general purpose registers R0-R30: -@@ -365,8 +404,7 @@ - - #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) - --#define MODES_TIEABLE_P(MODE1, MODE2) \ -- (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) -+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2) - - #define DWARF2_UNWIND_INFO 1 - -@@ -409,7 +447,7 @@ - enum reg_class - { - NO_REGS, -- CORE_REGS, -+ CALLER_SAVE_REGS, - GENERAL_REGS, - STACK_REG, - POINTER_REGS, -@@ -424,7 +462,7 @@ - #define REG_CLASS_NAMES \ - { \ - "NO_REGS", \ -- "CORE_REGS", \ -+ "CALLER_SAVE_REGS", \ - "GENERAL_REGS", \ - "STACK_REG", \ - "POINTER_REGS", \ -@@ -436,7 +474,7 @@ - #define REG_CLASS_CONTENTS \ - { \ - { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ -- { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \ -+ { 0x0007ffff, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ - { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ - { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ - { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ -@@ -447,7 +485,7 @@ - - #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) - --#define INDEX_REG_CLASS CORE_REGS -+#define INDEX_REG_CLASS GENERAL_REGS - #define BASE_REG_CLASS POINTER_REGS - - /* Register pairs used to eliminate unneeded registers that point into -@@ -524,13 +562,33 @@ - struct GTY (()) aarch64_frame - { - HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; -+ -+ /* The number of extra stack bytes taken up by register varargs. -+ This area is allocated by the callee at the very top of the -+ frame. This value is rounded up to a multiple of -+ STACK_BOUNDARY. */ -+ HOST_WIDE_INT saved_varargs_size; -+ - HOST_WIDE_INT saved_regs_size; - /* Padding if needed after the all the callee save registers have - been saved. */ - HOST_WIDE_INT padding0; - HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ -- HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */ - -+ /* Offset from the base of the frame (incomming SP) to the -+ hard_frame_pointer. This value is always a multiple of -+ STACK_BOUNDARY. */ -+ HOST_WIDE_INT hard_fp_offset; -+ -+ /* The size of the frame. This value is the offset from base of the -+ * frame (incomming SP) to the stack_pointer. This value is always -+ * a multiple of STACK_BOUNDARY. */ -+ -+ unsigned wb_candidate1; -+ unsigned wb_candidate2; -+ -+ HOST_WIDE_INT frame_size; -+ - bool laid_out; - }; - -@@ -537,11 +595,6 @@ - typedef struct GTY (()) machine_function - { - struct aarch64_frame frame; -- -- /* The number of extra stack bytes taken up by register varargs. -- This area is allocated by the callee at the very top of the frame. */ -- HOST_WIDE_INT saved_varargs_size; -- - } machine_function; - #endif - -@@ -565,11 +618,7 @@ - }; - - --extern enum arm_pcs arm_pcs_variant; - --#ifndef ARM_DEFAULT_PCS --#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64 --#endif - - /* We can't use enum machine_mode inside a generator file because it - hasn't been created yet; we shouldn't be using any code that -@@ -670,12 +719,14 @@ - /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ - #define AARCH64_CALL_RATIO 8 - --/* When optimizing for size, give a better estimate of the length of a memcpy -- call, but use the default otherwise. But move_by_pieces_ninsns() counts -- memory-to-memory moves, and we'll have to generate a load & store for each, -- so halve the value to take that into account. */ -+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. -+ move_by_pieces will continually copy the largest safe chunks. So a -+ 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient -+ for both size and speed of copy, so we will instead use the "movmem" -+ standard name to implement the copy. This logic does not apply when -+ targeting -mstrict-align, so keep a sensible default in that case. */ - #define MOVE_RATIO(speed) \ -- (((speed) ? 15 : AARCH64_CALL_RATIO) / 2) -+ (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)) - - /* For CLEAR_RATIO, when optimizing for size, give a better estimate - of the length of a memset call, but use the default otherwise. */ -@@ -688,12 +739,6 @@ - #define SET_RATIO(speed) \ - ((speed) ? 15 : AARCH64_CALL_RATIO - 2) - --/* STORE_BY_PIECES_P can be used when copying a constant string, but -- in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR). -- For now we always fail this and let the move_by_pieces code copy -- the string from read-only memory. */ --#define STORE_BY_PIECES_P(SIZE, ALIGN) 0 -- - /* Disable auto-increment in move_by_pieces et al. Use of auto-increment is - rarely a good idea in straight-line code since it adds an extra address - dependency between each instruction. Better to use incrementing offsets. */ -@@ -835,6 +880,11 @@ - - #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD - -+/* Choose appropriate mode for caller saves, so we do the minimum -+ required size of load/store. */ -+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ -+ aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE)) -+ - /* Callee only saves lower 64-bits of a 128-bit register. Tell the - compiler the callee clobbers the top 64-bits when restoring the - bottom 64-bits. */ ---- a/src/gcc/config/arc/arc.c -+++ b/src/gcc/config/arc/arc.c -@@ -398,6 +398,11 @@ - - static bool arc_frame_pointer_required (void); - -+static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, -+ unsigned int, -+ enum by_pieces_operation op, -+ bool); -+ - /* Implements target hook vector_mode_supported_p. */ - - static bool -@@ -512,6 +517,10 @@ - #undef TARGET_DELEGITIMIZE_ADDRESS - #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address - -+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P -+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ -+ arc_use_by_pieces_infrastructure_p -+ - /* Usually, we will be able to scale anchor offsets. - When this fails, we want LEGITIMIZE_ADDRESS to kick in. */ - #undef TARGET_MIN_ANCHOR_OFFSET -@@ -9355,6 +9364,21 @@ - return false; - } - -+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ -+ -+static bool -+arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int align, -+ enum by_pieces_operation op, -+ bool speed_p) -+{ -+ /* Let the movmem expander handle small block moves. */ -+ if (op == MOVE_BY_PIECES) -+ return false; -+ -+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); -+} -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-arc.h" ---- a/src/gcc/config/arc/arc.h -+++ b/src/gcc/config/arc/arc.h -@@ -1553,12 +1553,6 @@ - in one reasonably fast instruction. */ - #define MOVE_MAX 4 - --/* Let the movmem expander handle small block moves. */ --#define MOVE_BY_PIECES_P(LEN, ALIGN) 0 --#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \ -- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ -- < (unsigned int) MOVE_RATIO (!optimize_size)) -- - /* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */ - #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3) - ---- a/src/gcc/config/arm/aarch-cost-tables.h -+++ b/src/gcc/config/arm/aarch-cost-tables.h -@@ -39,6 +39,7 @@ - 0, /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - COSTS_N_INSNS (1), /* non_exec. */ - false /* non_exec_costs_exec. */ - }, -@@ -139,6 +140,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -239,6 +241,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, ---- a/src/gcc/config/arm/cortex-a15.md -+++ b/src/gcc/config/arm/cortex-a15.md -@@ -64,7 +64,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,rev,\ -+ adr,bfm,clz,rbit,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg,\ -@@ -72,11 +72,14 @@ - "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") - - ;; ALU ops with immediate shift -+;; crc is also included here so that appropriate scheduling of CRC32 ARMv8-A -+;; instructions can be performed when tuning for the Cortex-A57 since that -+;; core reuses the Cortex-A15 pipeline description for the moment. - (define_insn_reservation "cortex_a15_alu_shift" 3 - (and (eq_attr "tune" "cortexa15") - (eq_attr "type" "extend,\ - alu_shift_imm,alus_shift_imm,\ -- logic_shift_imm,logics_shift_imm,\ -+ crc,logic_shift_imm,logics_shift_imm,\ - mov_shift,mvn_shift")) - "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ - |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") ---- a/src/gcc/config/arm/arm-tables.opt -+++ b/src/gcc/config/arm/arm-tables.opt -@@ -274,6 +274,9 @@ - Enum(processor_type) String(cortex-r7) Value(cortexr7) - - EnumValue -+Enum(processor_type) String(cortex-m7) Value(cortexm7) -+ -+EnumValue - Enum(processor_type) String(cortex-m4) Value(cortexm4) - - EnumValue -@@ -423,17 +426,23 @@ - Enum(arm_fpu) String(fpv4-sp-d16) Value(11) - - EnumValue --Enum(arm_fpu) String(neon-vfpv4) Value(12) -+Enum(arm_fpu) String(fpv5-sp-d16) Value(12) - - EnumValue --Enum(arm_fpu) String(fp-armv8) Value(13) -+Enum(arm_fpu) String(fpv5-d16) Value(13) - - EnumValue --Enum(arm_fpu) String(neon-fp-armv8) Value(14) -+Enum(arm_fpu) String(neon-vfpv4) Value(14) - - EnumValue --Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15) -+Enum(arm_fpu) String(fp-armv8) Value(15) - - EnumValue --Enum(arm_fpu) String(vfp3) Value(16) -+Enum(arm_fpu) String(neon-fp-armv8) Value(16) - -+EnumValue -+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(17) -+ -+EnumValue -+Enum(arm_fpu) String(vfp3) Value(18) -+ ---- a/src/gcc/config/arm/thumb2.md -+++ b/src/gcc/config/arm/thumb2.md -@@ -329,7 +329,7 @@ - movw%?\\t%0, %L1\\t%@ movhi - str%(h%)\\t%1, %0\\t%@ movhi - ldr%(h%)\\t%0, %1\\t%@ movhi" -- [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1") -+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no") - (set_attr "length" "2,4,2,4,4,4") -@@ -1370,6 +1370,103 @@ - (set_attr "type" "alu_reg")] - ) - -+; Constants for op 2 will never be given to these patterns. -+(define_insn_and_split "*iordi_notdi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) -+ (match_operand:DI 2 "s_register_operand" "r,0")))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2))) -+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[5] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notzesidi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (zero_extend:DI -+ (match_operand:SI 2 "s_register_operand" "r,r"))) -+ (match_operand:DI 1 "s_register_operand" "0,?r")))] -+ "TARGET_THUMB2" -+ "#" -+ ; (not (zero_extend...)) means operand0 will always be 0xffffffff -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (const_int -1))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ }" -+ [(set_attr "length" "4,8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notdi_zesidi" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r")) -+ (zero_extend:DI -+ (match_operand:SI 1 "s_register_operand" "r,r"))))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (not:SI (match_dup 4)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[4] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notsesidi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (sign_extend:DI -+ (match_operand:SI 2 "s_register_operand" "r,r"))) -+ (match_operand:DI 1 "s_register_operand" "0,r")))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (ior:SI (not:SI -+ (ashiftrt:SI (match_dup 2) (const_int 31))) -+ (match_dup 4)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ - (define_insn "*orsi_notsi_si" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) ---- a/src/gcc/config/arm/arm.c -+++ b/src/gcc/config/arm/arm.c -@@ -50,6 +50,7 @@ - #include "except.h" - #include "tm_p.h" - #include "target.h" -+#include "sched-int.h" - #include "target-def.h" - #include "debug.h" - #include "langhooks.h" -@@ -59,6 +60,7 @@ - #include "params.h" - #include "opts.h" - #include "dumpfile.h" -+#include "gimple-expr.h" - - /* Forward definitions of types. */ - typedef struct minipool_node Mnode; -@@ -93,6 +95,7 @@ - static bool thumb_force_lr_save (void); - static unsigned arm_size_return_regs (void); - static bool arm_assemble_integer (rtx, unsigned int, int); -+static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); - static void arm_print_operand (FILE *, rtx, int); - static void arm_print_operand_address (FILE *, rtx); - static bool arm_print_operand_punct_valid_p (unsigned char code); -@@ -584,6 +587,9 @@ - #undef TARGET_MANGLE_TYPE - #define TARGET_MANGLE_TYPE arm_mangle_type - -+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV -+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv -+ - #undef TARGET_BUILD_BUILTIN_VA_LIST - #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list - #undef TARGET_EXPAND_BUILTIN_VA_START -@@ -985,6 +991,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1068,7 +1075,210 @@ - } - }; - -+const struct cpu_cost_table cortexa8_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* arith. */ -+ 0, /* logical. */ -+ COSTS_N_INSNS (1), /* shift. */ -+ 0, /* shift_reg. */ -+ COSTS_N_INSNS (1), /* arith_shift. */ -+ 0, /* arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* log_shift. */ -+ 0, /* log_shift_reg. */ -+ 0, /* extend. */ -+ 0, /* extend_arith. */ -+ 0, /* bfi. */ -+ 0, /* bfx. */ -+ 0, /* clz. */ -+ 0, /* rev. */ -+ 0, /* non_exec. */ -+ true /* non_exec_costs_exec. */ -+ }, -+ { -+ /* MULT SImode */ -+ { -+ COSTS_N_INSNS (1), /* simple. */ -+ COSTS_N_INSNS (1), /* flag_setting. */ -+ COSTS_N_INSNS (1), /* extend. */ -+ COSTS_N_INSNS (1), /* add. */ -+ COSTS_N_INSNS (1), /* extend_add. */ -+ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ -+ }, -+ /* MULT DImode */ -+ { -+ 0, /* simple (N/A). */ -+ 0, /* flag_setting (N/A). */ -+ COSTS_N_INSNS (2), /* extend. */ -+ 0, /* add (N/A). */ -+ COSTS_N_INSNS (2), /* extend_add. */ -+ 0 /* idiv (N/A). */ -+ } -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (1), /* load. */ -+ COSTS_N_INSNS (1), /* load_sign_extend. */ -+ COSTS_N_INSNS (1), /* ldrd. */ -+ COSTS_N_INSNS (1), /* ldm_1st. */ -+ 1, /* ldm_regs_per_insn_1st. */ -+ 2, /* ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (1), /* loadf. */ -+ COSTS_N_INSNS (1), /* loadd. */ -+ COSTS_N_INSNS (1), /* load_unaligned. */ -+ COSTS_N_INSNS (1), /* store. */ -+ COSTS_N_INSNS (1), /* strd. */ -+ COSTS_N_INSNS (1), /* stm_1st. */ -+ 1, /* stm_regs_per_insn_1st. */ -+ 2, /* stm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (1), /* storef. */ -+ COSTS_N_INSNS (1), /* stored. */ -+ COSTS_N_INSNS (1) /* store_unaligned. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (36), /* div. */ -+ COSTS_N_INSNS (11), /* mult. */ -+ COSTS_N_INSNS (20), /* mult_addsub. */ -+ COSTS_N_INSNS (30), /* fma. */ -+ COSTS_N_INSNS (9), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (6), /* compare. */ -+ COSTS_N_INSNS (4), /* widen. */ -+ COSTS_N_INSNS (4), /* narrow. */ -+ COSTS_N_INSNS (8), /* toint. */ -+ COSTS_N_INSNS (8), /* fromint. */ -+ COSTS_N_INSNS (8) /* roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (64), /* div. */ -+ COSTS_N_INSNS (16), /* mult. */ -+ COSTS_N_INSNS (25), /* mult_addsub. */ -+ COSTS_N_INSNS (30), /* fma. */ -+ COSTS_N_INSNS (9), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (6), /* compare. */ -+ COSTS_N_INSNS (6), /* widen. */ -+ COSTS_N_INSNS (6), /* narrow. */ -+ COSTS_N_INSNS (8), /* toint. */ -+ COSTS_N_INSNS (8), /* fromint. */ -+ COSTS_N_INSNS (8) /* roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* alu. */ -+ } -+}; - -+const struct cpu_cost_table cortexa5_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* arith. */ -+ 0, /* logical. */ -+ COSTS_N_INSNS (1), /* shift. */ -+ COSTS_N_INSNS (1), /* shift_reg. */ -+ COSTS_N_INSNS (1), /* arith_shift. */ -+ COSTS_N_INSNS (1), /* arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* log_shift. */ -+ COSTS_N_INSNS (1), /* log_shift_reg. */ -+ COSTS_N_INSNS (1), /* extend. */ -+ COSTS_N_INSNS (1), /* extend_arith. */ -+ COSTS_N_INSNS (1), /* bfi. */ -+ COSTS_N_INSNS (1), /* bfx. */ -+ COSTS_N_INSNS (1), /* clz. */ -+ COSTS_N_INSNS (1), /* rev. */ -+ 0, /* non_exec. */ -+ true /* non_exec_costs_exec. */ -+ }, -+ -+ { -+ /* MULT SImode */ -+ { -+ 0, /* simple. */ -+ COSTS_N_INSNS (1), /* flag_setting. */ -+ COSTS_N_INSNS (1), /* extend. */ -+ COSTS_N_INSNS (1), /* add. */ -+ COSTS_N_INSNS (1), /* extend_add. */ -+ COSTS_N_INSNS (7) /* idiv. */ -+ }, -+ /* MULT DImode */ -+ { -+ 0, /* simple (N/A). */ -+ 0, /* flag_setting (N/A). */ -+ COSTS_N_INSNS (1), /* extend. */ -+ 0, /* add. */ -+ COSTS_N_INSNS (2), /* extend_add. */ -+ 0 /* idiv (N/A). */ -+ } -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (1), /* load. */ -+ COSTS_N_INSNS (1), /* load_sign_extend. */ -+ COSTS_N_INSNS (6), /* ldrd. */ -+ COSTS_N_INSNS (1), /* ldm_1st. */ -+ 1, /* ldm_regs_per_insn_1st. */ -+ 2, /* ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (2), /* loadf. */ -+ COSTS_N_INSNS (4), /* loadd. */ -+ COSTS_N_INSNS (1), /* load_unaligned. */ -+ COSTS_N_INSNS (1), /* store. */ -+ COSTS_N_INSNS (3), /* strd. */ -+ COSTS_N_INSNS (1), /* stm_1st. */ -+ 1, /* stm_regs_per_insn_1st. */ -+ 2, /* stm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (2), /* storef. */ -+ COSTS_N_INSNS (2), /* stored. */ -+ COSTS_N_INSNS (1) /* store_unaligned. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (15), /* div. */ -+ COSTS_N_INSNS (3), /* mult. */ -+ COSTS_N_INSNS (7), /* mult_addsub. */ -+ COSTS_N_INSNS (7), /* fma. */ -+ COSTS_N_INSNS (3), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (3), /* compare. */ -+ COSTS_N_INSNS (3), /* widen. */ -+ COSTS_N_INSNS (3), /* narrow. */ -+ COSTS_N_INSNS (3), /* toint. */ -+ COSTS_N_INSNS (3), /* fromint. */ -+ COSTS_N_INSNS (3) /* roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (30), /* div. */ -+ COSTS_N_INSNS (6), /* mult. */ -+ COSTS_N_INSNS (10), /* mult_addsub. */ -+ COSTS_N_INSNS (7), /* fma. */ -+ COSTS_N_INSNS (3), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (3), /* compare. */ -+ COSTS_N_INSNS (3), /* widen. */ -+ COSTS_N_INSNS (3), /* narrow. */ -+ COSTS_N_INSNS (3), /* toint. */ -+ COSTS_N_INSNS (3), /* fromint. */ -+ COSTS_N_INSNS (3) /* roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* alu. */ -+ } -+}; -+ -+ - const struct cpu_cost_table cortexa7_extra_costs = - { - /* ALU */ -@@ -1086,6 +1296,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - COSTS_N_INSNS (1), /* clz. */ -+ COSTS_N_INSNS (1), /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1187,6 +1398,7 @@ - 0, /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - COSTS_N_INSNS (1), /* clz. */ -+ COSTS_N_INSNS (1), /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1287,6 +1499,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1387,6 +1600,7 @@ - 0, /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - COSTS_N_INSNS (1), /* non_exec. */ - false /* non_exec_costs_exec. */ - }, -@@ -1483,7 +1697,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_fastmul_tune = -@@ -1499,7 +1714,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - /* StrongARM has early execution of branches, so a sequence that is worth -@@ -1518,7 +1734,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_xscale_tune = -@@ -1534,7 +1751,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_9e_tune = -@@ -1550,7 +1768,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_v6t2_tune = -@@ -1566,7 +1785,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - /* Generic Cortex tuning. Use more specific tunings if appropriate. */ -@@ -1583,9 +1803,27 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - -+const struct tune_params arm_cortex_a8_tune = -+{ -+ arm_9e_rtx_costs, -+ &cortexa8_extra_costs, -+ NULL, /* Sched adj cost. */ -+ 1, /* Constant limit. */ -+ 5, /* Max cond insns. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ false, /* Prefer constant pool. */ -+ arm_default_branch_cost, -+ false, /* Prefer LDRD/STRD. */ -+ {true, true}, /* Prefer non short circuit. */ -+ &arm_default_vec_cost, /* Vectorizer costs. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ -+}; -+ - const struct tune_params arm_cortex_a7_tune = - { - arm_9e_rtx_costs, -@@ -1599,7 +1837,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_cortex_a15_tune = -@@ -1615,7 +1854,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ true, true /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_cortex_a53_tune = -@@ -1631,7 +1871,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_cortex_a57_tune = -@@ -1647,7 +1888,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ true, true /* Prefer 32-bit encodings. */ - }; - - /* Branches can be dual-issued on Cortex-A5, so conditional execution is -@@ -1656,7 +1898,7 @@ - const struct tune_params arm_cortex_a5_tune = - { - arm_9e_rtx_costs, -- NULL, -+ &cortexa5_extra_costs, - NULL, /* Sched adj cost. */ - 1, /* Constant limit. */ - 1, /* Max cond insns. */ -@@ -1666,7 +1908,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_cortex_a9_tune = -@@ -1682,7 +1925,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_cortex_a12_tune = -@@ -1698,7 +1942,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single -@@ -1721,7 +1966,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than -@@ -1739,7 +1985,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - const struct tune_params arm_fa726te_tune = -@@ -1755,7 +2002,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; - - -@@ -2806,7 +3054,7 @@ - prefer_neon_for_64bits = true; - - /* Use the alternative scheduling-pressure algorithm by default. */ -- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, -+ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, - global_options.x_param_values, - global_options_set.x_param_values); - -@@ -6079,11 +6327,6 @@ - if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) - return false; - -- /* Cannot tail-call to long calls, since these are out of range of -- a branch instruction. */ -- if (decl && arm_is_long_call_p (decl)) -- return false; -- - /* If we are interworking and the function is not declared static - then we can't tail-call it unless we know that it exists in this - compilation unit (since it might be a Thumb routine). */ -@@ -9337,6 +9580,47 @@ - *cost = LIBCALL_COST (2); - return false; - -+ case BSWAP: -+ if (arm_arch6) -+ { -+ if (mode == SImode) -+ { -+ *cost = COSTS_N_INSNS (1); -+ if (speed_p) -+ *cost += extra_cost->alu.rev; -+ -+ return false; -+ } -+ } -+ else -+ { -+ /* No rev instruction available. Look at arm_legacy_rev -+ and thumb_legacy_rev for the form of RTL used then. */ -+ if (TARGET_THUMB) -+ { -+ *cost = COSTS_N_INSNS (10); -+ -+ if (speed_p) -+ { -+ *cost += 6 * extra_cost->alu.shift; -+ *cost += 3 * extra_cost->alu.logical; -+ } -+ } -+ else -+ { -+ *cost = COSTS_N_INSNS (5); -+ -+ if (speed_p) -+ { -+ *cost += 2 * extra_cost->alu.shift; -+ *cost += extra_cost->alu.arith_shift; -+ *cost += 2 * extra_cost->alu.logical; -+ } -+ } -+ return true; -+ } -+ return false; -+ - case MINUS: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT - && (mode == SFmode || !TARGET_VFP_SINGLE)) -@@ -9719,8 +10003,17 @@ - /* Vector mode? */ - *cost = LIBCALL_COST (2); - return false; -+ case IOR: -+ if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) -+ { -+ *cost = COSTS_N_INSNS (1); -+ if (speed_p) -+ *cost += extra_cost->alu.rev; - -- case AND: case XOR: case IOR: -+ return true; -+ } -+ /* Fall through. */ -+ case AND: case XOR: - if (mode == SImode) - { - enum rtx_code subcode = GET_CODE (XEXP (x, 0)); -@@ -10619,6 +10912,36 @@ - *cost = LIBCALL_COST (1); - return false; - -+ case FMA: -+ if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) -+ { -+ rtx op0 = XEXP (x, 0); -+ rtx op1 = XEXP (x, 1); -+ rtx op2 = XEXP (x, 2); -+ -+ *cost = COSTS_N_INSNS (1); -+ -+ /* vfms or vfnma. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ -+ /* vfnms or vfnma. */ -+ if (GET_CODE (op2) == NEG) -+ op2 = XEXP (op2, 0); -+ -+ *cost += rtx_cost (op0, FMA, 0, speed_p); -+ *cost += rtx_cost (op1, FMA, 1, speed_p); -+ *cost += rtx_cost (op2, FMA, 2, speed_p); -+ -+ if (speed_p) -+ *cost += extra_cost->fp[mode ==DFmode].fma; -+ -+ return true; -+ } -+ -+ *cost = LIBCALL_COST (3); -+ return false; -+ - case FIX: - case UNSIGNED_FIX: - if (TARGET_HARD_FLOAT) -@@ -10669,10 +10992,16 @@ - return true; - - case ASM_OPERANDS: -- /* Just a guess. Cost one insn per input. */ -- *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); -- return true; -+ { -+ /* Just a guess. Guess number of instructions in the asm -+ plus one insn per input. Always a minimum of COSTS_N_INSNS (1) -+ though (see PR60663). */ -+ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); -+ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); - -+ *cost = COSTS_N_INSNS (asm_length + num_operands); -+ return true; -+ } - default: - if (mode != VOIDmode) - *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -@@ -12566,7 +12895,11 @@ - || (type == 0 && GET_CODE (ind) == PRE_DEC)) - return arm_address_register_rtx_p (XEXP (ind, 0), 0); - -- /* FIXME: vld1 allows register post-modify. */ -+ /* Allow post-increment by register for VLDn */ -+ if (type == 2 && GET_CODE (ind) == POST_MODIFY -+ && GET_CODE (XEXP (ind, 1)) == PLUS -+ && REG_P (XEXP (XEXP (ind, 1), 1))) -+ return true; - - /* Match: - (plus (reg) -@@ -16787,9 +17120,20 @@ - compute_bb_for_insn (); - df_analyze (); - -+ enum Convert_Action {SKIP, CONV, SWAP_CONV}; -+ - FOR_EACH_BB_FN (bb, cfun) - { -+ if (current_tune->disparage_flag_setting_t16_encodings -+ && optimize_bb_for_speed_p (bb)) -+ continue; -+ - rtx insn; -+ Convert_Action action = SKIP; -+ Convert_Action action_for_partial_flag_setting -+ = (current_tune->disparage_partial_flag_setting_t16_encodings -+ && optimize_bb_for_speed_p (bb)) -+ ? SKIP : CONV; - - COPY_REG_SET (&live, DF_LR_OUT (bb)); - df_simulate_initialize_backwards (bb, &live); -@@ -16799,7 +17143,7 @@ - && !REGNO_REG_SET_P (&live, CC_REGNUM) - && GET_CODE (PATTERN (insn)) == SET) - { -- enum {SKIP, CONV, SWAP_CONV} action = SKIP; -+ action = SKIP; - rtx pat = PATTERN (insn); - rtx dst = XEXP (pat, 0); - rtx src = XEXP (pat, 1); -@@ -16880,10 +17224,11 @@ - /* ANDS <Rdn>,<Rm> */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - else if (rtx_equal_p (dst, op1) - && low_register_operand (op0, SImode)) -- action = SWAP_CONV; -+ action = action_for_partial_flag_setting == SKIP -+ ? SKIP : SWAP_CONV; - break; - - case ASHIFTRT: -@@ -16894,7 +17239,7 @@ - /* LSLS <Rdn>,<Rm> */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - /* ASRS <Rd>,<Rm>,#<imm5> */ - /* LSRS <Rd>,<Rm>,#<imm5> */ - /* LSLS <Rd>,<Rm>,#<imm5> */ -@@ -16901,7 +17246,7 @@ - else if (low_register_operand (op0, SImode) - && CONST_INT_P (op1) - && IN_RANGE (INTVAL (op1), 0, 31)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; - - case ROTATERT: -@@ -16908,12 +17253,16 @@ - /* RORS <Rdn>,<Rm> */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; - - case NOT: -+ /* MVNS <Rd>,<Rm> */ -+ if (low_register_operand (op0, SImode)) -+ action = action_for_partial_flag_setting; -+ break; -+ - case NEG: -- /* MVNS <Rd>,<Rm> */ - /* NEGS <Rd>,<Rm> (a.k.a RSBS) */ - if (low_register_operand (op0, SImode)) - action = CONV; -@@ -16923,7 +17272,7 @@ - /* MOVS <Rd>,#<imm8> */ - if (CONST_INT_P (src) - && IN_RANGE (INTVAL (src), 0, 255)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; - - case REG: -@@ -17144,24 +17493,7 @@ - - /* Routines to output assembly language. */ - --/* If the rtx is the correct value then return the string of the number. -- In this way we can ensure that valid double constants are generated even -- when cross compiling. */ --const char * --fp_immediate_constant (rtx x) --{ -- REAL_VALUE_TYPE r; -- -- if (!fp_consts_inited) -- init_fp_table (); -- -- REAL_VALUE_FROM_CONST_DOUBLE (r, x); -- -- gcc_assert (REAL_VALUES_EQUAL (r, value_fp0)); -- return "0"; --} -- --/* As for fp_immediate_constant, but value is passed directly, not in rtx. */ -+/* Return string representation of passed in real value. */ - static const char * - fp_const_from_val (REAL_VALUE_TYPE *r) - { -@@ -17252,14 +17584,22 @@ - /* Output the assembly for a store multiple. */ - - const char * --vfp_output_fstmd (rtx * operands) -+vfp_output_vstmd (rtx * operands) - { - char pattern[100]; - int p; - int base; - int i; -+ rtx addr_reg = REG_P (XEXP (operands[0], 0)) -+ ? XEXP (operands[0], 0) -+ : XEXP (XEXP (operands[0], 0), 0); -+ bool push_p = REGNO (addr_reg) == SP_REGNUM; - -- strcpy (pattern, "fstmfdd%?\t%m0!, {%P1"); -+ if (push_p) -+ strcpy (pattern, "vpush%?.64\t{%P1"); -+ else -+ strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1"); -+ - p = strlen (pattern); - - gcc_assert (REG_P (operands[1])); -@@ -17387,6 +17727,15 @@ - require_pic_register (); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); - } -+ -+ if (TARGET_AAPCS_BASED) -+ { -+ /* For AAPCS, IP and CC can be clobbered by veneers inserted by the -+ linker. */ -+ rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); -+ clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM)); -+ clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM)); -+ } - } - - /* Output a 'call' insn. */ -@@ -18066,19 +18415,19 @@ - switch (GET_CODE (addr)) - { - case PRE_DEC: -- templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s"; -+ templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s"; - ops[0] = XEXP (addr, 0); - ops[1] = reg; - break; - - case POST_INC: -- templ = "f%smia%c%%?\t%%0!, {%%%s1}%s"; -+ templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s"; - ops[0] = XEXP (addr, 0); - ops[1] = reg; - break; - - default: -- templ = "f%s%c%%?\t%%%s0, %%1%s"; -+ templ = "v%sr%%?.%s\t%%%s0, %%1%s"; - ops[0] = reg; - ops[1] = mem; - break; -@@ -18086,7 +18435,7 @@ - - sprintf (buff, templ, - load ? "ld" : "st", -- dp ? 'd' : 's', -+ dp ? "64" : "32", - dp ? "P" : "", - integer_p ? "\t%@ int" : ""); - output_asm_insn (buff, ops); -@@ -20426,6 +20775,18 @@ - { - int reg = -1; - -+ /* Register r3 is caller-saved. Normally it does not need to be -+ saved on entry by the prologue. However if we choose to save -+ it for padding then we may confuse the compiler into thinking -+ a prologue sequence is required when in fact it is not. This -+ will occur when shrink-wrapping if r3 is used as a scratch -+ register and there are no other callee-saved writes. -+ -+ This situation can be avoided when other callee-saved registers -+ are available and r3 is not mandatory if we choose a callee-saved -+ register for padding. */ -+ bool prefer_callee_reg_p = false; -+ - /* If it is safe to use r3, then do so. This sometimes - generates better code on Thumb-2 by avoiding the need to - use 32-bit push/pop instructions. */ -@@ -20432,24 +20793,29 @@ - if (! any_sibcall_could_use_r3 () - && arm_size_return_regs () <= 12 - && (offsets->saved_regs_mask & (1 << 3)) == 0 -- && (TARGET_THUMB2 -+ && (TARGET_THUMB2 - || !(TARGET_LDRD && current_tune->prefer_ldrd_strd))) - { - reg = 3; -+ if (!TARGET_THUMB2) -+ prefer_callee_reg_p = true; - } -- else -- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) -- { -- /* Avoid fixed registers; they may be changed at -- arbitrary times so it's unsafe to restore them -- during the epilogue. */ -- if (!fixed_regs[i] -- && (offsets->saved_regs_mask & (1 << i)) == 0) -- { -- reg = i; -- break; -- } -- } -+ if (reg == -1 -+ || prefer_callee_reg_p) -+ { -+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) -+ { -+ /* Avoid fixed registers; they may be changed at -+ arbitrary times so it's unsafe to restore them -+ during the epilogue. */ -+ if (!fixed_regs[i] -+ && (offsets->saved_regs_mask & (1 << i)) == 0) -+ { -+ reg = i; -+ break; -+ } -+ } -+ } - - if (reg != -1) - { -@@ -21039,7 +21405,15 @@ - } - - --/* If CODE is 'd', then the X is a condition operand and the instruction -+/* Globally reserved letters: acln -+ Puncutation letters currently used: @_|?().!# -+ Lower case letters currently used: bcdefhimpqtvwxyz -+ Upper case letters currently used: ABCDFGHJKLMNOPQRSTU -+ Letters previously used, but now deprecated/obsolete: sVWXYZ. -+ -+ Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P. -+ -+ If CODE is 'd', then the X is a condition operand and the instruction - should only be executed if the condition is true. - if CODE is 'D', then the X is a condition operand and the instruction - should only be executed if the condition is false: however, if the mode -@@ -21179,6 +21553,19 @@ - } - return; - -+ case 'b': -+ /* Print the log2 of a CONST_INT. */ -+ { -+ HOST_WIDE_INT val; -+ -+ if (!CONST_INT_P (x) -+ || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0) -+ output_operand_lossage ("Unsupported operand for code '%c'", code); -+ else -+ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); -+ } -+ return; -+ - case 'L': - /* The low 16 bits of an immediate constant. */ - fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); -@@ -21421,7 +21808,7 @@ - register. */ - case 'p': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; - - if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) -@@ -21445,7 +21832,7 @@ - case 'P': - case 'q': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int is_quad = (code == 'q'); - int regno; - -@@ -21481,7 +21868,7 @@ - case 'e': - case 'f': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; - - if ((GET_MODE_SIZE (mode) != 16 -@@ -21563,6 +21950,7 @@ - { - rtx addr; - bool postinc = FALSE; -+ rtx postinc_reg = NULL; - unsigned align, memsize, align_bits; - - gcc_assert (MEM_P (x)); -@@ -21572,6 +21960,11 @@ - postinc = 1; - addr = XEXP (addr, 0); - } -+ if (GET_CODE (addr) == POST_MODIFY) -+ { -+ postinc_reg = XEXP( XEXP (addr, 1), 1); -+ addr = XEXP (addr, 0); -+ } - asm_fprintf (stream, "[%r", REGNO (addr)); - - /* We know the alignment of this access, so we can emit a hint in the -@@ -21597,6 +21990,8 @@ - - if (postinc) - fputs("!", stream); -+ if (postinc_reg) -+ asm_fprintf (stream, ", %r", REGNO (postinc_reg)); - } - return; - -@@ -21614,7 +22009,7 @@ - /* Translate an S register number into a D register number and element index. */ - case 'y': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; - - if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) -@@ -21648,7 +22043,7 @@ - number into a D register number and element index. */ - case 'z': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; - - if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) -@@ -21688,15 +22083,12 @@ - break; - - case CONST_DOUBLE: -- if (TARGET_NEON) -- { -- char fpstr[20]; -- real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), -- sizeof (fpstr), 0, 1); -- fprintf (stream, "#%s", fpstr); -- } -- else -- fprintf (stream, "#%s", fp_immediate_constant (x)); -+ { -+ char fpstr[20]; -+ real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), -+ sizeof (fpstr), 0, 1); -+ fprintf (stream, "#%s", fpstr); -+ } - break; - - default: -@@ -22564,6 +22956,9 @@ - || (TARGET_HARD_FLOAT && TARGET_VFP - && regno == VFPCC_REGNUM)); - -+ if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC) -+ return false; -+ - if (TARGET_THUMB1) - /* For the Thumb we only allow values bigger than SImode in - registers 0 - 6, so that there is always a second low -@@ -22609,13 +23004,20 @@ - } - - /* We allow almost any value to be stored in the general registers. -- Restrict doubleword quantities to even register pairs so that we can -- use ldrd. Do not allow very large Neon structure opaque modes in -- general registers; they would use too many. */ -+ Restrict doubleword quantities to even register pairs in ARM state -+ so that we can use ldrd. Do not allow very large Neon structure -+ opaque modes in general registers; they would use too many. */ - if (regno <= LAST_ARM_REGNUM) -- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) -- && ARM_NUM_REGS (mode) <= 4; -+ { -+ if (ARM_NUM_REGS (mode) > 4) -+ return FALSE; - -+ if (TARGET_THUMB2) -+ return TRUE; -+ -+ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0); -+ } -+ - if (regno == FRAME_POINTER_REGNUM - || regno == ARG_POINTER_REGNUM) - /* We only allow integers in the fake hard registers. */ -@@ -22653,6 +23055,9 @@ - enum reg_class - arm_regno_class (int regno) - { -+ if (regno == PC_REGNUM) -+ return NO_REGS; -+ - if (TARGET_THUMB1) - { - if (regno == STACK_POINTER_REGNUM) -@@ -22826,10 +23231,12 @@ - NEON_BINOP, - NEON_TERNOP, - NEON_UNOP, -+ NEON_BSWAP, - NEON_GETLANE, - NEON_SETLANE, - NEON_CREATE, - NEON_RINT, -+ NEON_COPYSIGNF, - NEON_DUP, - NEON_DUPLANE, - NEON_COMBINE, -@@ -22847,7 +23254,6 @@ - NEON_FLOAT_NARROW, - NEON_FIXCONV, - NEON_SELECT, -- NEON_RESULTPAIR, - NEON_REINTERP, - NEON_VTBL, - NEON_VTBX, -@@ -23216,6 +23622,9 @@ - ARM_BUILTIN_CRC32CH, - ARM_BUILTIN_CRC32CW, - -+ ARM_BUILTIN_GET_FPSCR, -+ ARM_BUILTIN_SET_FPSCR, -+ - #undef CRYPTO1 - #undef CRYPTO2 - #undef CRYPTO3 -@@ -23293,14 +23702,19 @@ - - tree V8QI_type_node; - tree V4HI_type_node; -+ tree V4UHI_type_node; - tree V4HF_type_node; - tree V2SI_type_node; -+ tree V2USI_type_node; - tree V2SF_type_node; - tree V16QI_type_node; - tree V8HI_type_node; -+ tree V8UHI_type_node; - tree V4SI_type_node; -+ tree V4USI_type_node; - tree V4SF_type_node; - tree V2DI_type_node; -+ tree V2UDI_type_node; - - tree intUQI_type_node; - tree intUHI_type_node; -@@ -23312,27 +23726,6 @@ - tree intCI_type_node; - tree intXI_type_node; - -- tree V8QI_pointer_node; -- tree V4HI_pointer_node; -- tree V2SI_pointer_node; -- tree V2SF_pointer_node; -- tree V16QI_pointer_node; -- tree V8HI_pointer_node; -- tree V4SI_pointer_node; -- tree V4SF_pointer_node; -- tree V2DI_pointer_node; -- -- tree void_ftype_pv8qi_v8qi_v8qi; -- tree void_ftype_pv4hi_v4hi_v4hi; -- tree void_ftype_pv2si_v2si_v2si; -- tree void_ftype_pv2sf_v2sf_v2sf; -- tree void_ftype_pdi_di_di; -- tree void_ftype_pv16qi_v16qi_v16qi; -- tree void_ftype_pv8hi_v8hi_v8hi; -- tree void_ftype_pv4si_v4si_v4si; -- tree void_ftype_pv4sf_v4sf_v4sf; -- tree void_ftype_pv2di_v2di_v2di; -- - tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; - tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; - tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; -@@ -23396,6 +23789,12 @@ - const_intDI_pointer_node = build_pointer_type (const_intDI_node); - const_float_pointer_node = build_pointer_type (const_float_node); - -+ /* Unsigned integer types for various mode sizes. */ -+ intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); -+ intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); -+ intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); -+ intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); -+ neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); - /* Now create vector types based on our NEON element types. */ - /* 64-bit vectors. */ - V8QI_type_node = -@@ -23402,10 +23801,14 @@ - build_vector_type_for_mode (neon_intQI_type_node, V8QImode); - V4HI_type_node = - build_vector_type_for_mode (neon_intHI_type_node, V4HImode); -+ V4UHI_type_node = -+ build_vector_type_for_mode (intUHI_type_node, V4HImode); - V4HF_type_node = - build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); - V2SI_type_node = - build_vector_type_for_mode (neon_intSI_type_node, V2SImode); -+ V2USI_type_node = -+ build_vector_type_for_mode (intUSI_type_node, V2SImode); - V2SF_type_node = - build_vector_type_for_mode (neon_float_type_node, V2SFmode); - /* 128-bit vectors. */ -@@ -23413,21 +23816,20 @@ - build_vector_type_for_mode (neon_intQI_type_node, V16QImode); - V8HI_type_node = - build_vector_type_for_mode (neon_intHI_type_node, V8HImode); -+ V8UHI_type_node = -+ build_vector_type_for_mode (intUHI_type_node, V8HImode); - V4SI_type_node = - build_vector_type_for_mode (neon_intSI_type_node, V4SImode); -+ V4USI_type_node = -+ build_vector_type_for_mode (intUSI_type_node, V4SImode); - V4SF_type_node = - build_vector_type_for_mode (neon_float_type_node, V4SFmode); - V2DI_type_node = - build_vector_type_for_mode (neon_intDI_type_node, V2DImode); -+ V2UDI_type_node = -+ build_vector_type_for_mode (intUDI_type_node, V2DImode); - -- /* Unsigned integer types for various mode sizes. */ -- intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); -- intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); -- intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); -- intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); -- neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); - -- - (*lang_hooks.types.register_builtin_type) (intUQI_type_node, - "__builtin_neon_uqi"); - (*lang_hooks.types.register_builtin_type) (intUHI_type_node, -@@ -23458,53 +23860,8 @@ - (*lang_hooks.types.register_builtin_type) (intXI_type_node, - "__builtin_neon_xi"); - -- /* Pointers to vector types. */ -- V8QI_pointer_node = build_pointer_type (V8QI_type_node); -- V4HI_pointer_node = build_pointer_type (V4HI_type_node); -- V2SI_pointer_node = build_pointer_type (V2SI_type_node); -- V2SF_pointer_node = build_pointer_type (V2SF_type_node); -- V16QI_pointer_node = build_pointer_type (V16QI_type_node); -- V8HI_pointer_node = build_pointer_type (V8HI_type_node); -- V4SI_pointer_node = build_pointer_type (V4SI_type_node); -- V4SF_pointer_node = build_pointer_type (V4SF_type_node); -- V2DI_pointer_node = build_pointer_type (V2DI_type_node); -- -- /* Operations which return results as pairs. */ -- void_ftype_pv8qi_v8qi_v8qi = -- build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, -- V8QI_type_node, NULL); -- void_ftype_pv4hi_v4hi_v4hi = -- build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, -- V4HI_type_node, NULL); -- void_ftype_pv2si_v2si_v2si = -- build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, -- V2SI_type_node, NULL); -- void_ftype_pv2sf_v2sf_v2sf = -- build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, -- V2SF_type_node, NULL); -- void_ftype_pdi_di_di = -- build_function_type_list (void_type_node, intDI_pointer_node, -- neon_intDI_type_node, neon_intDI_type_node, NULL); -- void_ftype_pv16qi_v16qi_v16qi = -- build_function_type_list (void_type_node, V16QI_pointer_node, -- V16QI_type_node, V16QI_type_node, NULL); -- void_ftype_pv8hi_v8hi_v8hi = -- build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, -- V8HI_type_node, NULL); -- void_ftype_pv4si_v4si_v4si = -- build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, -- V4SI_type_node, NULL); -- void_ftype_pv4sf_v4sf_v4sf = -- build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, -- V4SF_type_node, NULL); -- void_ftype_pv2di_v2di_v2di = -- build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, -- V2DI_type_node, NULL); -- - if (TARGET_CRYPTO && TARGET_HARD_FLOAT) - { -- tree V4USI_type_node = -- build_vector_type_for_mode (intUSI_type_node, V4SImode); - - tree V16UQI_type_node = - build_vector_type_for_mode (intUQI_type_node, V16QImode); -@@ -23790,25 +24147,6 @@ - } - break; - -- case NEON_RESULTPAIR: -- { -- switch (insn_data[d->code].operand[1].mode) -- { -- case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; -- case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; -- case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; -- case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; -- case DImode: ftype = void_ftype_pdi_di_di; break; -- case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; -- case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; -- case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; -- case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; -- case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; -- default: gcc_unreachable (); -- } -- } -- break; -- - case NEON_REINTERP: - { - /* We iterate over NUM_DREG_TYPES doubleword types, -@@ -23868,6 +24206,47 @@ - ftype = build_function_type_list (return_type, eltype, NULL); - break; - } -+ case NEON_BSWAP: -+ { -+ tree eltype = NULL_TREE; -+ switch (insn_data[d->code].operand[1].mode) -+ { -+ case V4HImode: -+ eltype = V4UHI_type_node; -+ break; -+ case V8HImode: -+ eltype = V8UHI_type_node; -+ break; -+ case V2SImode: -+ eltype = V2USI_type_node; -+ break; -+ case V4SImode: -+ eltype = V4USI_type_node; -+ break; -+ case V2DImode: -+ eltype = V2UDI_type_node; -+ break; -+ default: gcc_unreachable (); -+ } -+ ftype = build_function_type_list (eltype, eltype, NULL); -+ break; -+ } -+ case NEON_COPYSIGNF: -+ { -+ tree eltype = NULL_TREE; -+ switch (insn_data[d->code].operand[1].mode) -+ { -+ case V2SFmode: -+ eltype = V2SF_type_node; -+ break; -+ case V4SFmode: -+ eltype = V4SF_type_node; -+ break; -+ default: gcc_unreachable (); -+ } -+ ftype = build_function_type_list (eltype, eltype, NULL); -+ break; -+ } - default: - gcc_unreachable (); - } -@@ -24014,6 +24393,15 @@ - IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) - IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) - -+ -+#define FP_BUILTIN(L, U) \ -+ {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ -+ UNKNOWN, 0}, -+ -+ FP_BUILTIN (get_fpscr, GET_FPSCR) -+ FP_BUILTIN (set_fpscr, SET_FPSCR) -+#undef FP_BUILTIN -+ - #define CRC32_BUILTIN(L, U) \ - {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ - UNKNOWN, 0}, -@@ -24528,6 +24916,21 @@ - - if (TARGET_CRC32) - arm_init_crc32_builtins (); -+ -+ if (TARGET_VFP && TARGET_HARD_FLOAT) -+ { -+ tree ftype_set_fpscr -+ = build_function_type_list (void_type_node, unsigned_type_node, NULL); -+ tree ftype_get_fpscr -+ = build_function_type_list (unsigned_type_node, NULL); -+ -+ arm_builtin_decls[ARM_BUILTIN_GET_FPSCR] -+ = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr, -+ ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); -+ arm_builtin_decls[ARM_BUILTIN_SET_FPSCR] -+ = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr, -+ ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); -+ } - } - - /* Return the ARM builtin for CODE. */ -@@ -25042,20 +25445,17 @@ - case NEON_SPLIT: - case NEON_FLOAT_WIDEN: - case NEON_FLOAT_NARROW: -+ case NEON_BSWAP: - case NEON_REINTERP: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - -+ case NEON_COPYSIGNF: - case NEON_COMBINE: - case NEON_VTBL: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - -- case NEON_RESULTPAIR: -- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, -- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, -- NEON_ARG_STOP); -- - case NEON_LANEMUL: - case NEON_LANEMULL: - case NEON_LANEMULH: -@@ -25117,24 +25517,6 @@ - emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); - } - --/* Emit code to place a Neon pair result in memory locations (with equal -- registers). */ --void --neon_emit_pair_result_insn (enum machine_mode mode, -- rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, -- rtx op1, rtx op2) --{ -- rtx mem = gen_rtx_MEM (mode, destaddr); -- rtx tmp1 = gen_reg_rtx (mode); -- rtx tmp2 = gen_reg_rtx (mode); -- -- emit_insn (intfn (tmp1, op1, op2, tmp2)); -- -- emit_move_insn (mem, tmp1); -- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); -- emit_move_insn (mem, tmp2); --} -- - /* Set up OPERANDS for a register copy from SRC to DEST, taking care - not to early-clobber SRC registers in the process. - -@@ -25255,6 +25637,25 @@ - - switch (fcode) - { -+ case ARM_BUILTIN_GET_FPSCR: -+ case ARM_BUILTIN_SET_FPSCR: -+ if (fcode == ARM_BUILTIN_GET_FPSCR) -+ { -+ icode = CODE_FOR_get_fpscr; -+ target = gen_reg_rtx (SImode); -+ pat = GEN_FCN (icode) (target); -+ } -+ else -+ { -+ target = NULL_RTX; -+ icode = CODE_FOR_set_fpscr; -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ pat = GEN_FCN (icode) (op0); -+ } -+ emit_insn (pat); -+ return target; -+ - case ARM_BUILTIN_TEXTRMSB: - case ARM_BUILTIN_TEXTRMUB: - case ARM_BUILTIN_TEXTRMSH: -@@ -25888,7 +26289,7 @@ - int pops_needed; - unsigned available; - unsigned required; -- int mode; -+ enum machine_mode mode; - int size; - int restore_a4 = FALSE; - -@@ -29555,10 +29956,10 @@ - { - enum machine_mode in_mode, out_mode; - int in_n, out_n; -+ bool out_unsigned_p = TYPE_UNSIGNED (type_out); - - if (TREE_CODE (type_out) != VECTOR_TYPE -- || TREE_CODE (type_in) != VECTOR_TYPE -- || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) -+ || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); -@@ -29570,7 +29971,13 @@ - decl of the vectorized builtin for the appropriate vector mode. - NULL_TREE is returned if no such builtin is available. */ - #undef ARM_CHECK_BUILTIN_MODE --#define ARM_CHECK_BUILTIN_MODE(C) \ -+#define ARM_CHECK_BUILTIN_MODE(C) \ -+ (TARGET_NEON && TARGET_FPU_ARMV8 \ -+ && flag_unsafe_math_optimizations \ -+ && ARM_CHECK_BUILTIN_MODE_1 (C)) -+ -+#undef ARM_CHECK_BUILTIN_MODE_1 -+#define ARM_CHECK_BUILTIN_MODE_1(C) \ - (out_mode == SFmode && out_n == C \ - && in_mode == SFmode && in_n == C) - -@@ -29595,6 +30002,67 @@ - return ARM_FIND_VRINT_VARIANT (vrintz); - case BUILT_IN_ROUNDF: - return ARM_FIND_VRINT_VARIANT (vrinta); -+#undef ARM_CHECK_BUILTIN_MODE_1 -+#define ARM_CHECK_BUILTIN_MODE_1(C) \ -+ (out_mode == SImode && out_n == C \ -+ && in_mode == SFmode && in_n == C) -+ -+#define ARM_FIND_VCVT_VARIANT(N) \ -+ (ARM_CHECK_BUILTIN_MODE (2) \ -+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \ -+ : (ARM_CHECK_BUILTIN_MODE (4) \ -+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \ -+ : NULL_TREE)) -+ -+#define ARM_FIND_VCVTU_VARIANT(N) \ -+ (ARM_CHECK_BUILTIN_MODE (2) \ -+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \ -+ : (ARM_CHECK_BUILTIN_MODE (4) \ -+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \ -+ : NULL_TREE)) -+ case BUILT_IN_LROUNDF: -+ return out_unsigned_p -+ ? ARM_FIND_VCVTU_VARIANT (vcvta) -+ : ARM_FIND_VCVT_VARIANT (vcvta); -+ case BUILT_IN_LCEILF: -+ return out_unsigned_p -+ ? ARM_FIND_VCVTU_VARIANT (vcvtp) -+ : ARM_FIND_VCVT_VARIANT (vcvtp); -+ case BUILT_IN_LFLOORF: -+ return out_unsigned_p -+ ? ARM_FIND_VCVTU_VARIANT (vcvtm) -+ : ARM_FIND_VCVT_VARIANT (vcvtm); -+#undef ARM_CHECK_BUILTIN_MODE -+#define ARM_CHECK_BUILTIN_MODE(C, N) \ -+ (out_mode == N##mode && out_n == C \ -+ && in_mode == N##mode && in_n == C) -+ case BUILT_IN_BSWAP16: -+ if (ARM_CHECK_BUILTIN_MODE (4, HI)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false); -+ else if (ARM_CHECK_BUILTIN_MODE (8, HI)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false); -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP32: -+ if (ARM_CHECK_BUILTIN_MODE (2, SI)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false); -+ else if (ARM_CHECK_BUILTIN_MODE (4, SI)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false); -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP64: -+ if (ARM_CHECK_BUILTIN_MODE (2, DI)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false); -+ else -+ return NULL_TREE; -+ case BUILT_IN_COPYSIGNF: -+ if (ARM_CHECK_BUILTIN_MODE (2, SF)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false); -+ else if (ARM_CHECK_BUILTIN_MODE (4, SF)) -+ return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false); -+ else -+ return NULL_TREE; -+ - default: - return NULL_TREE; - } -@@ -29601,9 +30069,12 @@ - } - return NULL_TREE; - } -+#undef ARM_FIND_VCVT_VARIANT -+#undef ARM_FIND_VCVTU_VARIANT - #undef ARM_CHECK_BUILTIN_MODE - #undef ARM_FIND_VRINT_VARIANT - -+ - /* The AAPCS sets the maximum alignment of a vector to 64 bits. */ - static HOST_WIDE_INT - arm_vector_alignment (const_tree type) -@@ -31174,6 +31645,75 @@ - return false; - } - -+static void -+arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) -+{ -+ const unsigned ARM_FE_INVALID = 1; -+ const unsigned ARM_FE_DIVBYZERO = 2; -+ const unsigned ARM_FE_OVERFLOW = 4; -+ const unsigned ARM_FE_UNDERFLOW = 8; -+ const unsigned ARM_FE_INEXACT = 16; -+ const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID -+ | ARM_FE_DIVBYZERO -+ | ARM_FE_OVERFLOW -+ | ARM_FE_UNDERFLOW -+ | ARM_FE_INEXACT); -+ const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8; -+ tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; -+ tree new_fenv_var, reload_fenv, restore_fnenv; -+ tree update_call, atomic_feraiseexcept, hold_fnclex; -+ -+ if (!TARGET_VFP || !TARGET_HARD_FLOAT) -+ return; -+ -+ /* Generate the equivalent of : -+ unsigned int fenv_var; -+ fenv_var = __builtin_arm_get_fpscr (); -+ -+ unsigned int masked_fenv; -+ masked_fenv = fenv_var & mask; -+ -+ __builtin_arm_set_fpscr (masked_fenv); */ -+ -+ fenv_var = create_tmp_var (unsigned_type_node, NULL); -+ get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]; -+ set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]; -+ mask = build_int_cst (unsigned_type_node, -+ ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT) -+ | ARM_FE_ALL_EXCEPT)); -+ ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, -+ fenv_var, build_call_expr (get_fpscr, 0)); -+ masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); -+ hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); -+ *hold = build2 (COMPOUND_EXPR, void_type_node, -+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), -+ hold_fnclex); -+ -+ /* Store the value of masked_fenv to clear the exceptions: -+ __builtin_arm_set_fpscr (masked_fenv); */ -+ -+ *clear = build_call_expr (set_fpscr, 1, masked_fenv); -+ -+ /* Generate the equivalent of : -+ unsigned int new_fenv_var; -+ new_fenv_var = __builtin_arm_get_fpscr (); -+ -+ __builtin_arm_set_fpscr (fenv_var); -+ -+ __atomic_feraiseexcept (new_fenv_var); */ -+ -+ new_fenv_var = create_tmp_var (unsigned_type_node, NULL); -+ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, -+ build_call_expr (get_fpscr, 0)); -+ restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); -+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); -+ update_call = build_call_expr (atomic_feraiseexcept, 1, -+ fold_convert (integer_type_node, new_fenv_var)); -+ *update = build2 (COMPOUND_EXPR, void_type_node, -+ build2 (COMPOUND_EXPR, void_type_node, -+ reload_fenv, restore_fnenv), update_call); -+} -+ - /* return TRUE if x is a reference to a value in a constant pool */ - extern bool - arm_is_constant_pool_ref (rtx x) ---- a/src/gcc/config/arm/arm.h -+++ b/src/gcc/config/arm/arm.h -@@ -166,7 +166,10 @@ - builtin_define ("__ARM_EABI__"); \ - } \ - if (TARGET_IDIV) \ -- builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ -+ { \ -+ builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ -+ builtin_define ("__ARM_FEATURE_IDIV"); \ -+ } \ - } while (0) - - #include "config/arm/arm-opts.h" -@@ -298,6 +301,9 @@ - /* FPU supports VFPv3 instructions. */ - #define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3) - -+/* FPU supports FPv5 instructions. */ -+#define TARGET_VFP5 (TARGET_VFP && arm_fpu_desc->rev >= 5) -+ - /* FPU only supports VFP single-precision instructions. */ - #define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE) - -@@ -442,9 +448,6 @@ - #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT - #endif - --#define LARGEST_EXPONENT_IS_NORMAL(bits) \ -- ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) -- - #ifndef ARM_DEFAULT_ABI - #define ARM_DEFAULT_ABI ARM_ABI_APCS - #endif ---- a/src/gcc/config/arm/unspecs.md -+++ b/src/gcc/config/arm/unspecs.md -@@ -143,6 +143,8 @@ - VUNSPEC_SLX ; Represent a store-register-release-exclusive. - VUNSPEC_LDA ; Represent a store-register-acquire. - VUNSPEC_STL ; Represent a store-register-release. -+ VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. -+ VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. - ]) - - ;; Enumerators for NEON unspecs. ---- a/src/gcc/config/arm/cortex-m4.md -+++ b/src/gcc/config/arm/cortex-m4.md -@@ -34,7 +34,7 @@ - (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,rev,\ -+ adr,bfm,clz,rbit,rev,\ - shift_imm,shift_reg,extend,\ - alu_shift_imm,alus_shift_imm,\ - logic_shift_imm,logics_shift_imm,\ ---- a/src/gcc/config/arm/arm-modes.def -+++ b/src/gcc/config/arm/arm-modes.def -@@ -21,9 +21,6 @@ - along with GCC; see the file COPYING3. If not see - <http://www.gnu.org/licenses/>. */ - --/* Extended precision floating point. -- FIXME What format is this? */ --FLOAT_MODE (XF, 12, 0); - - /* Half-precision floating point */ - FLOAT_MODE (HF, 2, 0); ---- a/src/gcc/config/arm/arm-cores.def -+++ b/src/gcc/config/arm/arm-cores.def -@@ -141,7 +141,7 @@ - ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) - ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) - ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) --ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex) -+ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8) - ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) - ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) - ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) -@@ -149,6 +149,7 @@ - ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, FL_LDSCHED, cortex) - ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) - ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) -+ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED, v7m) - ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) - ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) - ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) ---- a/src/gcc/config/arm/cortex-r4.md -+++ b/src/gcc/config/arm/cortex-r4.md -@@ -81,7 +81,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,rev,\ -+ adr,bfm,clz,rbit,rev,\ - shift_imm,shift_reg,mvn_imm,mvn_reg")) - "cortex_r4_alu") - ---- a/src/gcc/config/arm/arm-tune.md -+++ b/src/gcc/config/arm/arm-tune.md -@@ -28,7 +28,8 @@ - genericv7a,cortexa5,cortexa7, - cortexa8,cortexa9,cortexa12, - cortexa15,cortexr4,cortexr4f, -- cortexr5,cortexr7,cortexm4, -- cortexm3,marvell_pj4,cortexa15cortexa7, -- cortexa53,cortexa57,cortexa57cortexa53" -+ cortexr5,cortexr7,cortexm7, -+ cortexm4,cortexm3,marvell_pj4, -+ cortexa15cortexa7,cortexa53,cortexa57, -+ cortexa57cortexa53" - (const (symbol_ref "((enum attr_tune) arm_tune)"))) ---- a/src/gcc/config/arm/arm-protos.h -+++ b/src/gcc/config/arm/arm-protos.h -@@ -126,7 +126,6 @@ - extern int arm_const_double_inline_cost (rtx); - extern bool arm_const_double_by_parts (rtx); - extern bool arm_const_double_by_immediates (rtx); --extern const char *fp_immediate_constant (rtx); - extern void arm_emit_call_insn (rtx, rtx); - extern const char *output_call (rtx *); - extern const char *output_call_mem (rtx *); -@@ -150,7 +149,7 @@ - extern int arm_emit_vector_const (FILE *, rtx); - extern void arm_emit_fp16_const (rtx c); - extern const char * arm_output_load_gr (rtx *); --extern const char *vfp_output_fstmd (rtx *); -+extern const char *vfp_output_vstmd (rtx *); - extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool); - extern void arm_set_return_address (rtx, rtx); - extern int arm_eliminable_register (rtx); -@@ -273,6 +272,11 @@ - const struct cpu_vec_costs* vec_costs; - /* Prefer Neon for 64-bit bitops. */ - bool prefer_neon_for_64bits; -+ /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ -+ bool disparage_flag_setting_t16_encodings; -+ /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags -+ would be set. */ -+ bool disparage_partial_flag_setting_t16_encodings; - }; - - extern const struct tune_params *current_tune; ---- a/src/gcc/config/arm/vfp.md -+++ b/src/gcc/config/arm/vfp.md -@@ -41,11 +41,11 @@ - case 5: - return \"str%?\\t%1, %0\"; - case 6: -- return \"fmsr%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?\\t%0, %1\\t%@ int\"; - case 7: -- return \"fmrs%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?\\t%0, %1\\t%@ int\"; - case 8: -- return \"fcpys%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?.f32\\t%0, %1\\t%@ int\"; - case 9: case 10: - return output_move_vfp (operands); - default: -@@ -87,11 +87,11 @@ - case 8: - return \"str%?\\t%1, %0\"; - case 9: -- return \"fmsr%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?\\t%0, %1\\t%@ int\"; - case 10: -- return \"fmrs%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?\\t%0, %1\\t%@ int\"; - case 11: -- return \"fcpys%?\\t%0, %1\\t%@ int\"; -+ return \"vmov%?.f32\\t%0, %1\\t%@ int\"; - case 12: case 13: - return output_move_vfp (operands); - default: -@@ -100,7 +100,7 @@ - " - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") -- (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") -+ (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") - (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") - (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") - (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] -@@ -130,14 +130,14 @@ - case 6: - return output_move_double (operands, true, NULL); - case 7: -- return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; -+ return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\"; - case 8: -- return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; -+ return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\"; - case 9: - if (TARGET_VFP_SINGLE) -- return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; -+ return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\"; - else -- return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; -+ return \"vmov%?.f64\\t%P0, %P1\\t%@ int\"; - case 10: case 11: - return output_move_vfp (operands); - default: -@@ -181,11 +181,11 @@ - case 6: - return output_move_double (operands, true, NULL); - case 7: -- return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; -+ return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\"; - case 8: -- return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; -+ return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\"; - case 9: -- return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; -+ return \"vmov%?.f64\\t%P0, %P1\\t%@ int\"; - case 10: case 11: - return output_move_vfp (operands); - default: -@@ -229,13 +229,13 @@ - case 3: /* memory from ARM register */ - return \"strh\\t%1, %0\\t%@ __fp16\"; - case 4: /* S register from S register */ -- return \"fcpys\\t%0, %1\"; -+ return \"vmov.f32\\t%0, %1\"; - case 5: /* ARM register from ARM register */ - return \"mov\\t%0, %1\\t%@ __fp16\"; - case 6: /* S register from ARM register */ -- return \"fmsr\\t%0, %1\"; -+ return \"vmov\\t%0, %1\"; - case 7: /* ARM register from S register */ -- return \"fmrs\\t%0, %1\"; -+ return \"vmov\\t%0, %1\"; - case 8: /* ARM register from constant */ - { - REAL_VALUE_TYPE r; -@@ -280,13 +280,13 @@ - case 1: /* memory from ARM register */ - return \"strh\\t%1, %0\\t%@ __fp16\"; - case 2: /* S register from S register */ -- return \"fcpys\\t%0, %1\"; -+ return \"vmov.f32\\t%0, %1\"; - case 3: /* ARM register from ARM register */ - return \"mov\\t%0, %1\\t%@ __fp16\"; - case 4: /* S register from ARM register */ -- return \"fmsr\\t%0, %1\"; -+ return \"vmov\\t%0, %1\"; - case 5: /* ARM register from S register */ -- return \"fmrs\\t%0, %1\"; -+ return \"vmov\\t%0, %1\"; - case 6: /* ARM register from constant */ - { - REAL_VALUE_TYPE r; -@@ -322,7 +322,7 @@ - - (define_insn "*movsf_vfp" - [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t ,Uv,r ,m,t,r") -- (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] -+ (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP - && ( s_register_operand (operands[0], SFmode) - || s_register_operand (operands[1], SFmode))" -@@ -330,11 +330,11 @@ - switch (which_alternative) - { - case 0: -- return \"fmsr%?\\t%0, %1\"; -+ return \"vmov%?\\t%0, %1\"; - case 1: -- return \"fmrs%?\\t%0, %1\"; -+ return \"vmov%?\\t%0, %1\"; - case 2: -- return \"fconsts%?\\t%0, #%G1\"; -+ return \"vmov%?.f32\\t%0, %1\"; - case 3: case 4: - return output_move_vfp (operands); - case 5: -@@ -342,7 +342,7 @@ - case 6: - return \"str%?\\t%1, %0\\t%@ float\"; - case 7: -- return \"fcpys%?\\t%0, %1\"; -+ return \"vmov%?.f32\\t%0, %1\"; - case 8: - return \"mov%?\\t%0, %1\\t%@ float\"; - default: -@@ -366,11 +366,11 @@ - switch (which_alternative) - { - case 0: -- return \"fmsr%?\\t%0, %1\"; -+ return \"vmov%?\\t%0, %1\"; - case 1: -- return \"fmrs%?\\t%0, %1\"; -+ return \"vmov%?\\t%0, %1\"; - case 2: -- return \"fconsts%?\\t%0, #%G1\"; -+ return \"vmov%?.f32\\t%0, %1\"; - case 3: case 4: - return output_move_vfp (operands); - case 5: -@@ -378,7 +378,7 @@ - case 6: - return \"str%?\\t%1, %0\\t%@ float\"; - case 7: -- return \"fcpys%?\\t%0, %1\"; -+ return \"vmov%?.f32\\t%0, %1\"; - case 8: - return \"mov%?\\t%0, %1\\t%@ float\"; - default: -@@ -406,12 +406,12 @@ - switch (which_alternative) - { - case 0: -- return \"fmdrr%?\\t%P0, %Q1, %R1\"; -+ return \"vmov%?\\t%P0, %Q1, %R1\"; - case 1: -- return \"fmrrd%?\\t%Q0, %R0, %P1\"; -+ return \"vmov%?\\t%Q0, %R0, %P1\"; - case 2: - gcc_assert (TARGET_VFP_DOUBLE); -- return \"fconstd%?\\t%P0, #%G1\"; -+ return \"vmov%?.f64\\t%P0, %1\"; - case 3: case 4: - return output_move_vfp (operands); - case 5: case 6: -@@ -418,9 +418,9 @@ - return output_move_double (operands, true, NULL); - case 7: - if (TARGET_VFP_SINGLE) -- return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; -+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; - else -- return \"fcpyd%?\\t%P0, %P1\"; -+ return \"vmov%?.f64\\t%P0, %P1\"; - case 8: - return \"#\"; - default: -@@ -453,12 +453,12 @@ - switch (which_alternative) - { - case 0: -- return \"fmdrr%?\\t%P0, %Q1, %R1\"; -+ return \"vmov%?\\t%P0, %Q1, %R1\"; - case 1: -- return \"fmrrd%?\\t%Q0, %R0, %P1\"; -+ return \"vmov%?\\t%Q0, %R0, %P1\"; - case 2: - gcc_assert (TARGET_VFP_DOUBLE); -- return \"fconstd%?\\t%P0, #%G1\"; -+ return \"vmov%?.f64\\t%P0, %1\"; - case 3: case 4: - return output_move_vfp (operands); - case 5: case 6: case 8: -@@ -465,9 +465,9 @@ - return output_move_double (operands, true, NULL); - case 7: - if (TARGET_VFP_SINGLE) -- return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; -+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; - else -- return \"fcpyd%?\\t%P0, %P1\"; -+ return \"vmov%?.f64\\t%P0, %P1\"; - default: - abort (); - } -@@ -498,15 +498,15 @@ - (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" - "@ -- fcpys%D3\\t%0, %2 -- fcpys%d3\\t%0, %1 -- fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 -- fmsr%D3\\t%0, %2 -- fmsr%d3\\t%0, %1 -- fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 -- fmrs%D3\\t%0, %2 -- fmrs%d3\\t%0, %1 -- fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" -+ vmov%D3.f32\\t%0, %2 -+ vmov%d3.f32\\t%0, %1 -+ vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1 -+ vmov%D3\\t%0, %2 -+ vmov%d3\\t%0, %1 -+ vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1 -+ vmov%D3\\t%0, %2 -+ vmov%d3\\t%0, %1 -+ vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1" - [(set_attr "conds" "use") - (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] -@@ -521,15 +521,15 @@ - (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it" - "@ -- it\\t%D3\;fcpys%D3\\t%0, %2 -- it\\t%d3\;fcpys%d3\\t%0, %1 -- ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 -- it\\t%D3\;fmsr%D3\\t%0, %2 -- it\\t%d3\;fmsr%d3\\t%0, %1 -- ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 -- it\\t%D3\;fmrs%D3\\t%0, %2 -- it\\t%d3\;fmrs%d3\\t%0, %1 -- ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" -+ it\\t%D3\;vmov%D3.f32\\t%0, %2 -+ it\\t%d3\;vmov%d3.f32\\t%0, %1 -+ ite\\t%D3\;vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1 -+ it\\t%D3\;vmov%D3\\t%0, %2 -+ it\\t%d3\;vmov%d3\\t%0, %1 -+ ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1 -+ it\\t%D3\;vmov%D3\\t%0, %2 -+ it\\t%d3\;vmov%d3\\t%0, %1 -+ ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1" - [(set_attr "conds" "use") - (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] -@@ -544,15 +544,15 @@ - (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" - "@ -- fcpyd%D3\\t%P0, %P2 -- fcpyd%d3\\t%P0, %P1 -- fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 -- fmdrr%D3\\t%P0, %Q2, %R2 -- fmdrr%d3\\t%P0, %Q1, %R1 -- fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 -- fmrrd%D3\\t%Q0, %R0, %P2 -- fmrrd%d3\\t%Q0, %R0, %P1 -- fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" -+ vmov%D3.f64\\t%P0, %P2 -+ vmov%d3.f64\\t%P0, %P1 -+ vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1 -+ vmov%D3\\t%P0, %Q2, %R2 -+ vmov%d3\\t%P0, %Q1, %R1 -+ vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1 -+ vmov%D3\\t%Q0, %R0, %P2 -+ vmov%d3\\t%Q0, %R0, %P1 -+ vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1" - [(set_attr "conds" "use") - (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")] -@@ -567,15 +567,15 @@ - (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it" - "@ -- it\\t%D3\;fcpyd%D3\\t%P0, %P2 -- it\\t%d3\;fcpyd%d3\\t%P0, %P1 -- ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 -- it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2 -- it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1 -- ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 -- it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2 -- it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1 -- ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" -+ it\\t%D3\;vmov%D3.f64\\t%P0, %P2 -+ it\\t%d3\;vmov%d3.f64\\t%P0, %P1 -+ ite\\t%D3\;vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1 -+ it\t%D3\;vmov%D3\\t%P0, %Q2, %R2 -+ it\t%d3\;vmov%d3\\t%P0, %Q1, %R1 -+ ite\\t%D3\;vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1 -+ it\t%D3\;vmov%D3\\t%Q0, %R0, %P2 -+ it\t%d3\;vmov%d3\\t%Q0, %R0, %P1 -+ ite\\t%D3\;vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1" - [(set_attr "conds" "use") - (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")] -@@ -588,7 +588,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (abs:SF (match_operand:SF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fabss%?\\t%0, %1" -+ "vabs%?.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "ffariths")] -@@ -598,7 +598,7 @@ - [(set (match_operand:DF 0 "s_register_operand" "=w") - (abs:DF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fabsd%?\\t%P0, %P1" -+ "vabs%?.f64\\t%P0, %P1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "ffarithd")] -@@ -609,7 +609,7 @@ - (neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" - "@ -- fnegs%?\\t%0, %1 -+ vneg%?.f32\\t%0, %1 - eor%?\\t%0, %1, #-2147483648" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -621,7 +621,7 @@ - (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" - "@ -- fnegd%?\\t%P0, %P1 -+ vneg%?.f64\\t%P0, %P1 - # - #" - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed -@@ -671,7 +671,7 @@ - (plus:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fadds%?\\t%0, %1, %2" -+ "vadd%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fadds")] -@@ -682,7 +682,7 @@ - (plus:DF (match_operand:DF 1 "s_register_operand" "w") - (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "faddd%?\\t%P0, %P1, %P2" -+ "vadd%?.f64\\t%P0, %P1, %P2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "faddd")] -@@ -694,7 +694,7 @@ - (minus:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fsubs%?\\t%0, %1, %2" -+ "vsub%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fadds")] -@@ -705,7 +705,7 @@ - (minus:DF (match_operand:DF 1 "s_register_operand" "w") - (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fsubd%?\\t%P0, %P1, %P2" -+ "vsub%?.f64\\t%P0, %P1, %P2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "faddd")] -@@ -719,7 +719,7 @@ - (div:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fdivs%?\\t%0, %1, %2" -+ "vdiv%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fdivs")] -@@ -730,7 +730,7 @@ - (div:DF (match_operand:DF 1 "s_register_operand" "w") - (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fdivd%?\\t%P0, %P1, %P2" -+ "vdiv%?.f64\\t%P0, %P1, %P2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fdivd")] -@@ -744,7 +744,7 @@ - (mult:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fmuls%?\\t%0, %1, %2" -+ "vmul%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmuls")] -@@ -755,7 +755,7 @@ - (mult:DF (match_operand:DF 1 "s_register_operand" "w") - (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fmuld%?\\t%P0, %P1, %P2" -+ "vmul%?.f64\\t%P0, %P1, %P2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmuld")] -@@ -766,7 +766,7 @@ - (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) - (match_operand:SF 2 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fnmuls%?\\t%0, %1, %2" -+ "vnmul%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmuls")] -@@ -777,7 +777,7 @@ - (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) - (match_operand:DF 2 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fnmuld%?\\t%P0, %P1, %P2" -+ "vnmul%?.f64\\t%P0, %P1, %P2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmuld")] -@@ -793,7 +793,7 @@ - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fmacs%?\\t%0, %2, %3" -+ "vmla%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacs")] -@@ -805,7 +805,7 @@ - (match_operand:DF 3 "s_register_operand" "w")) - (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fmacd%?\\t%P0, %P2, %P3" -+ "vmla%?.f64\\t%P0, %P2, %P3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacd")] -@@ -818,7 +818,7 @@ - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fmscs%?\\t%0, %2, %3" -+ "vnmls%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacs")] -@@ -830,7 +830,7 @@ - (match_operand:DF 3 "s_register_operand" "w")) - (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fmscd%?\\t%P0, %P2, %P3" -+ "vnmls%?.f64\\t%P0, %P2, %P3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacd")] -@@ -843,7 +843,7 @@ - (mult:SF (match_operand:SF 2 "s_register_operand" "t") - (match_operand:SF 3 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fnmacs%?\\t%0, %2, %3" -+ "vmls%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacs")] -@@ -855,7 +855,7 @@ - (mult:DF (match_operand:DF 2 "s_register_operand" "w") - (match_operand:DF 3 "s_register_operand" "w"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fnmacd%?\\t%P0, %P2, %P3" -+ "vmls%?.f64\\t%P0, %P2, %P3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacd")] -@@ -870,7 +870,7 @@ - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fnmscs%?\\t%0, %2, %3" -+ "vnmla%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacs")] -@@ -883,7 +883,7 @@ - (match_operand:DF 3 "s_register_operand" "w")) - (match_operand:DF 1 "s_register_operand" "0")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fnmscd%?\\t%P0, %P2, %P3" -+ "vnmla%?.f64\\t%P0, %P2, %P3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fmacd")] -@@ -948,7 +948,7 @@ - [(set (match_operand:DF 0 "s_register_operand" "=w") - (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fcvtds%?\\t%P0, %1" -+ "vcvt%?.f64.f32\\t%P0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] -@@ -958,7 +958,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fcvtsd%?\\t%0, %P1" -+ "vcvt%?.f32.f64\\t%0, %P1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvt")] -@@ -988,7 +988,7 @@ - [(set (match_operand:SI 0 "s_register_operand" "=t") - (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "ftosizs%?\\t%0, %1" -+ "vcvt%?.s32.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvtf2i")] -@@ -998,7 +998,7 @@ - [(set (match_operand:SI 0 "s_register_operand" "=t") - (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "ftosizd%?\\t%0, %P1" -+ "vcvt%?.s32.f64\\t%0, %P1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvtf2i")] -@@ -1009,7 +1009,7 @@ - [(set (match_operand:SI 0 "s_register_operand" "=t") - (unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "ftouizs%?\\t%0, %1" -+ "vcvt%?.u32.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvtf2i")] -@@ -1019,7 +1019,7 @@ - [(set (match_operand:SI 0 "s_register_operand" "=t") - (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "ftouizd%?\\t%0, %P1" -+ "vcvt%?.u32.f64\\t%0, %P1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvtf2i")] -@@ -1030,7 +1030,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (float:SF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fsitos%?\\t%0, %1" -+ "vcvt%?.f32.s32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvti2f")] -@@ -1040,7 +1040,7 @@ - [(set (match_operand:DF 0 "s_register_operand" "=w") - (float:DF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fsitod%?\\t%P0, %1" -+ "vcvt%?.f64.s32\\t%P0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvti2f")] -@@ -1051,7 +1051,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fuitos%?\\t%0, %1" -+ "vcvt%?.f32.u32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvti2f")] -@@ -1061,7 +1061,7 @@ - [(set (match_operand:DF 0 "s_register_operand" "=w") - (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fuitod%?\\t%P0, %1" -+ "vcvt%?.f64.u32\\t%P0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "f_cvti2f")] -@@ -1074,7 +1074,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fsqrts%?\\t%0, %1" -+ "vsqrt%?.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fsqrts")] -@@ -1084,7 +1084,7 @@ - [(set (match_operand:DF 0 "s_register_operand" "=w") - (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" -- "fsqrtd%?\\t%P0, %P1" -+ "vsqrt%?.f64\\t%P0, %P1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fsqrtd")] -@@ -1097,7 +1097,7 @@ - [(set (reg CC_REGNUM) - (reg VFPCC_REGNUM))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "fmstat%?" -+ "vmrs%?\\tAPSR_nzcv, FPSCR" - [(set_attr "conds" "set") - (set_attr "type" "f_flag")] - ) -@@ -1165,6 +1165,9 @@ - - ;; Comparison patterns - -+;; In the compare with FP zero case the ARM Architecture Reference Manual -+;; specifies the immediate to be #0.0. However, some buggy assemblers only -+;; accept #0. We don't want to autodetect broken assemblers, so output #0. - (define_insn "*cmpsf_vfp" - [(set (reg:CCFP VFPCC_REGNUM) - (compare:CCFP (match_operand:SF 0 "s_register_operand" "t,t") -@@ -1171,8 +1174,8 @@ - (match_operand:SF 1 "vfp_compare_operand" "t,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" - "@ -- fcmps%?\\t%0, %1 -- fcmpzs%?\\t%0" -+ vcmp%?.f32\\t%0, %1 -+ vcmp%?.f32\\t%0, #0" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fcmps")] -@@ -1184,8 +1187,8 @@ - (match_operand:SF 1 "vfp_compare_operand" "t,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" - "@ -- fcmpes%?\\t%0, %1 -- fcmpezs%?\\t%0" -+ vcmpe%?.f32\\t%0, %1 -+ vcmpe%?.f32\\t%0, #0" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fcmps")] -@@ -1197,8 +1200,8 @@ - (match_operand:DF 1 "vfp_compare_operand" "w,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" - "@ -- fcmpd%?\\t%P0, %P1 -- fcmpzd%?\\t%P0" -+ vcmp%?.f64\\t%P0, %P1 -+ vcmp%?.f64\\t%P0, #0" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fcmpd")] -@@ -1210,8 +1213,8 @@ - (match_operand:DF 1 "vfp_compare_operand" "w,G")))] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" - "@ -- fcmped%?\\t%P0, %P1 -- fcmpezd%?\\t%P0" -+ vcmpe%?.f64\\t%P0, %P1 -+ vcmpe%?.f64\\t%P0, #0" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "fcmpd")] -@@ -1272,7 +1275,7 @@ - (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")] - UNSPEC_PUSH_MULT))])] - "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -- "* return vfp_output_fstmd (operands);" -+ "* return vfp_output_vstmd (operands);" - [(set_attr "type" "f_stored")] - ) - -@@ -1285,7 +1288,7 @@ - (unspec:SDF [(match_operand:SDF 1 - "register_operand" "<F_constraint>")] - VRINT))] -- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" -+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>" - "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1" - [(set_attr "predicable" "<vrint_predicable>") - (set_attr "predicable_short_it" "no") -@@ -1293,6 +1296,18 @@ - (set_attr "conds" "<vrint_conds>")] - ) - -+;; Implements the lround, lfloor and lceil optabs. -+(define_insn "l<vrint_pattern><su_optab><mode>si2" -+ [(set (match_operand:SI 0 "register_operand" "=t") -+ (FIXUORS:SI (unspec:SDF -+ [(match_operand:SDF 1 -+ "register_operand" "<F_constraint>")] VCVT)))] -+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" -+ "vcvt<vrint_variant>%?.<su>32.<V_if_elem>\\t%0, %<V_reg>1" -+ [(set_attr "predicable" "no") -+ (set_attr "type" "f_cvtf2i")] -+) -+ - ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. - ;; The 'smax' and 'smin' RTL standard pattern names do not specify which - ;; operand will be returned when both operands are zero (i.e. they may not -@@ -1304,7 +1319,7 @@ - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") - (match_operand:SDF 2 "register_operand" "<F_constraint>")))] -- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" -+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>" - "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set_attr "type" "f_minmax<vfp_type>") - (set_attr "conds" "unconditional")] -@@ -1314,12 +1329,28 @@ - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") - (match_operand:SDF 2 "register_operand" "<F_constraint>")))] -- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" -+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>" - "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" - [(set_attr "type" "f_minmax<vfp_type>") - (set_attr "conds" "unconditional")] - ) - -+;; Write Floating-point Status and Control Register. -+(define_insn "set_fpscr" -+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)] -+ "TARGET_VFP && TARGET_HARD_FLOAT" -+ "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR" -+ [(set_attr "type" "mrs")]) -+ -+;; Read Floating-point Status and Control Register. -+(define_insn "get_fpscr" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))] -+ "TARGET_VFP && TARGET_HARD_FLOAT" -+ "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR" -+ [(set_attr "type" "mrs")]) -+ -+ - ;; Unimplemented insns: - ;; fldm* - ;; fstm* ---- a/src/gcc/config/arm/neon.md -+++ b/src/gcc/config/arm/neon.md -@@ -296,7 +296,7 @@ - UNSPEC_MISALIGNED_ACCESS))] - "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" - "vld1.<V_sz_elem>\t{%q0}, %A1" -- [(set_attr "type" "neon_store1_1reg<q>")]) -+ [(set_attr "type" "neon_load1_1reg<q>")]) - - (define_insn "vec_set<mode>_internal" - [(set (match_operand:VD 0 "s_register_operand" "=w,w") -@@ -629,6 +629,17 @@ - [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] - ) - -+(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" -+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") -+ (FIXUORS:<V_cmp_result> (unspec:VCVTF -+ [(match_operand:VCVTF 1 "register_operand" "w")] -+ NEON_VCVT)))] -+ "TARGET_NEON && TARGET_FPU_ARMV8" -+ "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") -+ (set_attr "predicable" "no")] -+) -+ - (define_insn "ior<mode>3" - [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") - (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") -@@ -1041,7 +1052,9 @@ - } - else - { -- if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) -+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 -+ && (!reg_overlap_mentioned_p (operands[0], operands[1]) -+ || REGNO (operands[0]) == REGNO (operands[1]))) - /* This clobbers CC. */ - emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); - else -@@ -1141,7 +1154,9 @@ - } - else - { -- if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) -+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 -+ && (!reg_overlap_mentioned_p (operands[0], operands[1]) -+ || REGNO (operands[0]) == REGNO (operands[1]))) - /* This clobbers CC. */ - emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); - else -@@ -1334,33 +1349,47 @@ - - ;; Reduction operations - --(define_expand "reduc_splus_<mode>" -- [(match_operand:VD 0 "s_register_operand" "") -+(define_expand "reduc_plus_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" - { -- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, -+ rtx vec = gen_reg_rtx (<MODE>mode); -+ neon_pairwise_reduce (vec, operands[1], <MODE>mode, - &gen_neon_vpadd_internal<mode>); -+ /* The same result is actually computed into every element. */ -+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); - DONE; - }) - --(define_expand "reduc_splus_<mode>" -- [(match_operand:VQ 0 "s_register_operand" "") -+(define_expand "reduc_plus_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" - { - rtx step1 = gen_reg_rtx (<V_HALF>mode); -- rtx res_d = gen_reg_rtx (<V_HALF>mode); - - emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); -- emit_insn (gen_reduc_splus_<V_half> (res_d, step1)); -- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d)); -+ emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); - - DONE; - }) - --(define_insn "reduc_splus_v2di" -+(define_expand "reduc_plus_scal_v2di" -+ [(match_operand:DI 0 "nonimmediate_operand" "=w") -+ (match_operand:V2DI 1 "s_register_operand" "")] -+ "TARGET_NEON && !BYTES_BIG_ENDIAN" -+{ -+ rtx vec = gen_reg_rtx (V2DImode); -+ -+ emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); -+ emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx)); -+ -+ DONE; -+}) -+ -+(define_insn "arm_reduc_plus_internal_v2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=w") - (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] - UNSPEC_VPADD))] -@@ -1369,115 +1398,109 @@ - [(set_attr "type" "neon_add_q")] - ) - --;; NEON does not distinguish between signed and unsigned addition except on --;; widening operations. --(define_expand "reduc_uplus_<mode>" -- [(match_operand:VDQI 0 "s_register_operand" "") -- (match_operand:VDQI 1 "s_register_operand" "")] -- "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)" --{ -- emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1])); -- DONE; --}) -- --(define_expand "reduc_smin_<mode>" -- [(match_operand:VD 0 "s_register_operand" "") -+(define_expand "reduc_smin_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" - { -- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, -+ rtx vec = gen_reg_rtx (<MODE>mode); -+ -+ neon_pairwise_reduce (vec, operands[1], <MODE>mode, - &gen_neon_vpsmin<mode>); -+ /* The result is computed into every element of the vector. */ -+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); - DONE; - }) - --(define_expand "reduc_smin_<mode>" -- [(match_operand:VQ 0 "s_register_operand" "") -+(define_expand "reduc_smin_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" - { - rtx step1 = gen_reg_rtx (<V_HALF>mode); -- rtx res_d = gen_reg_rtx (<V_HALF>mode); - - emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); -- emit_insn (gen_reduc_smin_<V_half> (res_d, step1)); -- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d)); -+ emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); - - DONE; - }) - --(define_expand "reduc_smax_<mode>" -- [(match_operand:VD 0 "s_register_operand" "") -+(define_expand "reduc_smax_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" - { -- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, -+ rtx vec = gen_reg_rtx (<MODE>mode); -+ neon_pairwise_reduce (vec, operands[1], <MODE>mode, - &gen_neon_vpsmax<mode>); -+ /* The result is computed into every element of the vector. */ -+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); - DONE; - }) - --(define_expand "reduc_smax_<mode>" -- [(match_operand:VQ 0 "s_register_operand" "") -+(define_expand "reduc_smax_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" - { - rtx step1 = gen_reg_rtx (<V_HALF>mode); -- rtx res_d = gen_reg_rtx (<V_HALF>mode); - - emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); -- emit_insn (gen_reduc_smax_<V_half> (res_d, step1)); -- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d)); -+ emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); - - DONE; - }) - --(define_expand "reduc_umin_<mode>" -- [(match_operand:VDI 0 "s_register_operand" "") -+(define_expand "reduc_umin_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VDI 1 "s_register_operand" "")] - "TARGET_NEON" - { -- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, -+ rtx vec = gen_reg_rtx (<MODE>mode); -+ neon_pairwise_reduce (vec, operands[1], <MODE>mode, - &gen_neon_vpumin<mode>); -+ /* The result is computed into every element of the vector. */ -+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); - DONE; - }) - --(define_expand "reduc_umin_<mode>" -- [(match_operand:VQI 0 "s_register_operand" "") -+(define_expand "reduc_umin_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VQI 1 "s_register_operand" "")] - "TARGET_NEON && !BYTES_BIG_ENDIAN" - { - rtx step1 = gen_reg_rtx (<V_HALF>mode); -- rtx res_d = gen_reg_rtx (<V_HALF>mode); - - emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); -- emit_insn (gen_reduc_umin_<V_half> (res_d, step1)); -- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d)); -+ emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); - - DONE; - }) - --(define_expand "reduc_umax_<mode>" -- [(match_operand:VDI 0 "s_register_operand" "") -+(define_expand "reduc_umax_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VDI 1 "s_register_operand" "")] - "TARGET_NEON" - { -- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, -+ rtx vec = gen_reg_rtx (<MODE>mode); -+ neon_pairwise_reduce (vec, operands[1], <MODE>mode, - &gen_neon_vpumax<mode>); -+ /* The result is computed into every element of the vector. */ -+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); - DONE; - }) - --(define_expand "reduc_umax_<mode>" -- [(match_operand:VQI 0 "s_register_operand" "") -+(define_expand "reduc_umax_scal_<mode>" -+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "") - (match_operand:VQI 1 "s_register_operand" "")] - "TARGET_NEON && !BYTES_BIG_ENDIAN" - { - rtx step1 = gen_reg_rtx (<V_HALF>mode); -- rtx res_d = gen_reg_rtx (<V_HALF>mode); - - emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); -- emit_insn (gen_reduc_umax_<V_half> (res_d, step1)); -- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d)); -+ emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); - - DONE; - }) -@@ -1842,9 +1865,9 @@ - ; good for plain vadd, vaddq. - - (define_expand "neon_vadd<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "=w") -- (match_operand:VDQX 1 "s_register_operand" "w") -- (match_operand:VDQX 2 "s_register_operand" "w") -+ [(match_operand:VCVTF 0 "s_register_operand" "=w") -+ (match_operand:VCVTF 1 "s_register_operand" "w") -+ (match_operand:VCVTF 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - "TARGET_NEON" - { -@@ -1869,9 +1892,9 @@ - ; Used for intrinsics when flag_unsafe_math_optimizations is false. - - (define_insn "neon_vadd<mode>_unspec" -- [(set (match_operand:VDQX 0 "s_register_operand" "=w") -- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") -- (match_operand:VDQX 2 "s_register_operand" "w")] -+ [(set (match_operand:VCVTF 0 "s_register_operand" "=w") -+ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") -+ (match_operand:VCVTF 2 "s_register_operand" "w")] - UNSPEC_VADD))] - "TARGET_NEON" - "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -@@ -2132,9 +2155,9 @@ - ) - - (define_expand "neon_vsub<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "=w") -- (match_operand:VDQX 1 "s_register_operand" "w") -- (match_operand:VDQX 2 "s_register_operand" "w") -+ [(match_operand:VCVTF 0 "s_register_operand" "=w") -+ (match_operand:VCVTF 1 "s_register_operand" "w") -+ (match_operand:VCVTF 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] - "TARGET_NEON" - { -@@ -2149,9 +2172,9 @@ - ; Used for intrinsics when flag_unsafe_math_optimizations is false. - - (define_insn "neon_vsub<mode>_unspec" -- [(set (match_operand:VDQX 0 "s_register_operand" "=w") -- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") -- (match_operand:VDQX 2 "s_register_operand" "w")] -+ [(set (match_operand:VCVTF 0 "s_register_operand" "=w") -+ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") -+ (match_operand:VCVTF 2 "s_register_operand" "w")] - UNSPEC_VSUB))] - "TARGET_NEON" - "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -@@ -2547,6 +2570,14 @@ - [(set_attr "type" "neon_qabs<q>")] - ) - -+(define_insn "neon_bswap<mode>" -+ [(set (match_operand:VDQHSD 0 "register_operand" "=w") -+ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] -+ "TARGET_NEON" -+ "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_rev<q>")] -+) -+ - (define_expand "neon_vneg<mode>" - [(match_operand:VDQW 0 "s_register_operand" "") - (match_operand:VDQW 1 "s_register_operand" "") -@@ -2557,6 +2588,33 @@ - DONE; - }) - -+(define_expand "neon_copysignf<mode>" -+ [(match_operand:VCVTF 0 "register_operand") -+ (match_operand:VCVTF 1 "register_operand") -+ (match_operand:VCVTF 2 "register_operand")] -+ "TARGET_NEON" -+ "{ -+ rtx v_bitmask_cast; -+ rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); -+ int i, n_elt = GET_MODE_NUNITS (<MODE>mode); -+ rtvec v = rtvec_alloc (n_elt); -+ -+ /* Create bitmask for vector select. */ -+ for (i = 0; i < n_elt; ++i) -+ RTVEC_ELT (v, i) = GEN_INT (0x80000000); -+ -+ emit_move_insn (v_bitmask, -+ gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v)); -+ emit_move_insn (operands[0], operands[2]); -+ v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, -+ <VCVTF:V_cmp_result>mode, 0); -+ emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], -+ operands[1])); -+ -+ DONE; -+ }" -+) -+ - (define_insn "neon_vqneg<mode>" - [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") -@@ -4140,17 +4198,6 @@ - [(set_attr "type" "neon_permute<q>")] - ) - --(define_expand "neon_vtrn<mode>" -- [(match_operand:SI 0 "s_register_operand" "r") -- (match_operand:VDQW 1 "s_register_operand" "w") -- (match_operand:VDQW 2 "s_register_operand" "w")] -- "TARGET_NEON" --{ -- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal, -- operands[0], operands[1], operands[2]); -- DONE; --}) -- - (define_expand "neon_vzip<mode>_internal" - [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") -@@ -4177,17 +4224,6 @@ - [(set_attr "type" "neon_zip<q>")] - ) - --(define_expand "neon_vzip<mode>" -- [(match_operand:SI 0 "s_register_operand" "r") -- (match_operand:VDQW 1 "s_register_operand" "w") -- (match_operand:VDQW 2 "s_register_operand" "w")] -- "TARGET_NEON" --{ -- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal, -- operands[0], operands[1], operands[2]); -- DONE; --}) -- - (define_expand "neon_vuzp<mode>_internal" - [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") -@@ -4214,17 +4250,6 @@ - [(set_attr "type" "neon_zip<q>")] - ) - --(define_expand "neon_vuzp<mode>" -- [(match_operand:SI 0 "s_register_operand" "r") -- (match_operand:VDQW 1 "s_register_operand" "w") -- (match_operand:VDQW 2 "s_register_operand" "w")] -- "TARGET_NEON" --{ -- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal, -- operands[0], operands[1], operands[2]); -- DONE; --}) -- - (define_expand "neon_vreinterpretv8qi<mode>" - [(match_operand:V8QI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] -@@ -5357,61 +5382,6 @@ - [(set_attr "type" "neon_store4_4reg<q>")] - ) - --(define_expand "neon_vand<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "") -- (match_operand:VDQX 1 "s_register_operand" "") -- (match_operand:VDQX 2 "neon_inv_logic_op2" "") -- (match_operand:SI 3 "immediate_operand" "")] -- "TARGET_NEON" --{ -- emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2])); -- DONE; --}) -- --(define_expand "neon_vorr<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "") -- (match_operand:VDQX 1 "s_register_operand" "") -- (match_operand:VDQX 2 "neon_logic_op2" "") -- (match_operand:SI 3 "immediate_operand" "")] -- "TARGET_NEON" --{ -- emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2])); -- DONE; --}) -- --(define_expand "neon_veor<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "") -- (match_operand:VDQX 1 "s_register_operand" "") -- (match_operand:VDQX 2 "s_register_operand" "") -- (match_operand:SI 3 "immediate_operand" "")] -- "TARGET_NEON" --{ -- emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2])); -- DONE; --}) -- --(define_expand "neon_vbic<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "") -- (match_operand:VDQX 1 "s_register_operand" "") -- (match_operand:VDQX 2 "neon_logic_op2" "") -- (match_operand:SI 3 "immediate_operand" "")] -- "TARGET_NEON" --{ -- emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2])); -- DONE; --}) -- --(define_expand "neon_vorn<mode>" -- [(match_operand:VDQX 0 "s_register_operand" "") -- (match_operand:VDQX 1 "s_register_operand" "") -- (match_operand:VDQX 2 "neon_inv_logic_op2" "") -- (match_operand:SI 3 "immediate_operand" "")] -- "TARGET_NEON" --{ -- emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2])); -- DONE; --}) -- - (define_insn "neon_vec_unpack<US>_lo_<mode>" - [(set (match_operand:<V_unpack> 0 "register_operand" "=w") - (SE:<V_unpack> (vec_select:<V_HALF> ---- a/src/gcc/config/arm/types.md -+++ b/src/gcc/config/arm/types.md -@@ -66,7 +66,6 @@ - ; f_mrc transfer vfp to arm reg. - ; f_mrrc transfer vfp to two arm regs. - ; f_rint[d,s] double/single floating point rount to integral. --; f_sel[d,s] double/single floating byte select. - ; f_store[d,s] double/single store to memory. Used for VFP unit. - ; fadd[d,s] double/single floating-point scalar addition. - ; fcmp[d,s] double/single floating-point compare. -@@ -571,8 +570,6 @@ - f_mrrc,\ - f_rintd,\ - f_rints,\ -- f_seld,\ -- f_sels,\ - f_stored,\ - f_stores,\ - faddd,\ ---- a/src/gcc/config/arm/arm_neon_builtins.def -+++ b/src/gcc/config/arm/arm_neon_builtins.def -@@ -18,8 +18,7 @@ - along with GCC; see the file COPYING3. If not see - <http://www.gnu.org/licenses/>. */ - --VAR10 (BINOP, vadd, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -+VAR2 (BINOP, vadd, v2sf, v4sf), - VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), - VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), - VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), -@@ -54,7 +53,7 @@ - VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), - VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), - VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), --VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -+VAR2 (BINOP, vsub, v2sf, v4sf), - VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), - VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), - VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), -@@ -89,6 +88,7 @@ - VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), - VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), -+VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di), - VAR2 (UNOP, vcnt, v8qi, v16qi), - VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), - VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), -@@ -135,6 +135,7 @@ - VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf), - VAR10 (SELECT, vbsl, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -+VAR2 (COPYSIGNF, copysignf, v2sf, v4sf), - VAR2 (RINT, vrintn, v2sf, v4sf), - VAR2 (RINT, vrinta, v2sf, v4sf), - VAR2 (RINT, vrintp, v2sf, v4sf), -@@ -141,6 +142,18 @@ - VAR2 (RINT, vrintm, v2sf, v4sf), - VAR2 (RINT, vrintz, v2sf, v4sf), - VAR2 (RINT, vrintx, v2sf, v4sf), -+VAR1 (RINT, vcvtav2sf, v2si), -+VAR1 (RINT, vcvtav4sf, v4si), -+VAR1 (RINT, vcvtauv2sf, v2si), -+VAR1 (RINT, vcvtauv4sf, v4si), -+VAR1 (RINT, vcvtpv2sf, v2si), -+VAR1 (RINT, vcvtpv4sf, v4si), -+VAR1 (RINT, vcvtpuv2sf, v2si), -+VAR1 (RINT, vcvtpuv4sf, v4si), -+VAR1 (RINT, vcvtmv2sf, v2si), -+VAR1 (RINT, vcvtmv4sf, v4si), -+VAR1 (RINT, vcvtmuv2sf, v2si), -+VAR1 (RINT, vcvtmuv4sf, v4si), - VAR1 (VTBL, vtbl1, v8qi), - VAR1 (VTBL, vtbl2, v8qi), - VAR1 (VTBL, vtbl3, v8qi), -@@ -149,9 +162,6 @@ - VAR1 (VTBX, vtbx2, v8qi), - VAR1 (VTBX, vtbx3, v8qi), - VAR1 (VTBX, vtbx4, v8qi), --VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), --VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), --VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), - VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), - VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), -@@ -199,14 +209,4 @@ - VAR9 (STORESTRUCT, vst4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), - VAR7 (STORESTRUCTLANE, vst4_lane, -- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), --VAR10 (LOGICBINOP, vand, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), --VAR10 (LOGICBINOP, vorr, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), --VAR10 (BINOP, veor, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), --VAR10 (LOGICBINOP, vbic, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), --VAR10 (LOGICBINOP, vorn, -- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) ---- a/src/gcc/config/arm/cortex-a7.md -+++ b/src/gcc/config/arm/cortex-a7.md -@@ -137,7 +137,7 @@ - (and (eq_attr "tune" "cortexa7") - (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- bfm,rev,\ -+ bfm,clz,rbit,rev,\ - shift_imm,shift_reg,mov_reg,mvn_reg")) - "cortex_a7_ex1") - ---- a/src/gcc/config/arm/aarch-common-protos.h -+++ b/src/gcc/config/arm/aarch-common-protos.h -@@ -24,6 +24,9 @@ - #define GCC_AARCH_COMMON_PROTOS_H - - extern int aarch_crypto_can_dual_issue (rtx, rtx); -+extern bool aarch_rev16_p (rtx); -+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); -+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); - extern int arm_early_load_addr_dep (rtx, rtx); - extern int arm_early_store_addr_dep (rtx, rtx); - extern int arm_mac_accumulator_is_mul_result (rtx, rtx); -@@ -54,6 +57,7 @@ - const int bfi; /* Bit-field insert. */ - const int bfx; /* Bit-field extraction. */ - const int clz; /* Count Leading Zeros. */ -+ const int rev; /* Reverse bits/bytes. */ - const int non_exec; /* Extra cost when not executing insn. */ - const bool non_exec_costs_exec; /* True if non-execution must add the exec - cost. */ ---- a/src/gcc/config/arm/predicates.md -+++ b/src/gcc/config/arm/predicates.md -@@ -291,6 +291,15 @@ - || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) - (match_test "mode == GET_MODE (op)"))) - -+(define_special_predicate "shift_nomul_operator" -+ (and (ior (and (match_code "rotate") -+ (match_test "CONST_INT_P (XEXP (op, 1)) -+ && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")) -+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") -+ (match_test "!CONST_INT_P (XEXP (op, 1)) -+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) -+ (match_test "mode == GET_MODE (op)"))) -+ - ;; True for shift operators which can be used with saturation instructions. - (define_special_predicate "sat_shift_operator" - (and (ior (and (match_code "mult") -@@ -681,5 +690,6 @@ - (match_code "reg" "0"))) - - (define_predicate "call_insn_operand" -- (ior (match_code "symbol_ref") -+ (ior (and (match_code "symbol_ref") -+ (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))")) - (match_operand 0 "s_register_operand"))) ---- a/src/gcc/config/arm/arm_neon.h -+++ b/src/gcc/config/arm/arm_neon.h -@@ -452,114 +452,121 @@ - } poly64x2x4_t; - #endif - -- -- -+/* vadd */ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vadd_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vadd_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vadd_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vadd_f32 (float32x2_t __a, float32x2_t __b) - { -- return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a + __b; -+#else -+ return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3); -+#endif - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vadd_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vadd_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vadd_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vadd_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vadd_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vaddq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vaddq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vaddq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vaddq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1); -+ return __a + __b; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vaddq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a + __b; -+#else -+ return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3); -+#endif - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a + __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -@@ -949,93 +956,102 @@ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vmul_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vmul_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vmul_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vmul_f32 (float32x2_t __a, float32x2_t __b) - { -- return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a * __b; -+#else -+ return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3); -+#endif -+ - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vmul_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a * __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vmul_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a * __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vmul_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a * __b; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vmul_p8 (poly8x8_t __a, poly8x8_t __b) --{ -- return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); --} -- - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vmulq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vmulq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vmulq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1); -+ return __a * __b; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vmulq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a * __b; -+#else -+ return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3); -+#endif - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vmulq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a * __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vmulq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a * __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vmulq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a * __b; - } - -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vmul_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+ return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); -+} -+ - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vmulq_p8 (poly8x16_t __a, poly8x16_t __b) - { -@@ -1520,112 +1536,121 @@ - } - - #endif -+ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vsub_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vsub_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vsub_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vsub_f32 (float32x2_t __a, float32x2_t __b) - { -- return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a - __b; -+#else -+ return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3); -+#endif - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vsub_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vsub_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vsub_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vsub_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vsub_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vsubq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vsubq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vsubq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vsubq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1); -+ return __a - __b; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vsubq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3); -+#ifdef __FAST_MATH -+ return __a - __b; -+#else -+ return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3); -+#endif - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a - __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -@@ -11295,484 +11320,483 @@ - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vand_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vand_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vand_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vand_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vand_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vand_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vand_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vand_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vandq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vandq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vandq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vandq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1); -+ return __a & __b; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vandq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vandq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vandq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vandq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a & __b; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vorr_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vorr_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vorr_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vorr_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vorr_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vorr_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vorr_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vorr_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vorrq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vorrq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vorrq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vorrq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1); -+ return __a | __b; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vorrq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vorrq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vorrq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vorrq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a | __b; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - veor_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - veor_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - veor_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - veor_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - veor_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - veor_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - veor_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_veordi (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - veor_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - veorq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - veorq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - veorq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - veorq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1); -+ return __a ^ __b; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - veorq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - veorq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - veorq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - veorq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a ^ __b; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vbic_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vbic_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vbic_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vbic_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vbic_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vbic_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vbic_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vbic_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vbicq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vbicq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vbicq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vbicq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1); -+ return __a & ~__b; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vbicq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vbicq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vbicq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vbicq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a & ~__b; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vorn_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vorn_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vorn_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vorn_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vorn_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vorn_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vorn_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vorn_u64 (uint64x1_t __a, uint64x1_t __b) - { -- return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vornq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vornq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vornq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vornq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1); -+ return __a | ~__b; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vornq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vornq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vornq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0); -+ return __a | ~__b; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vornq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0); -+ return __a | ~__b; - } - -- - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_p16 (poly16x4_t __a) - { ---- a/src/gcc/config/arm/aarch-common.c -+++ b/src/gcc/config/arm/aarch-common.c -@@ -191,6 +191,83 @@ - return 0; - } - -+bool -+aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) -+{ -+ return CONST_INT_P (val) -+ && INTVAL (val) -+ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), -+ mode); -+} -+ -+bool -+aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) -+{ -+ return CONST_INT_P (val) -+ && INTVAL (val) -+ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), -+ mode); -+} -+ -+ -+static bool -+aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode) -+{ -+ if (GET_CODE (lhs) == AND -+ && GET_CODE (XEXP (lhs, 0)) == ASHIFT -+ && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) -+ && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 -+ && REG_P (XEXP (XEXP (lhs, 0), 0)) -+ && CONST_INT_P (XEXP (lhs, 1)) -+ && GET_CODE (rhs) == AND -+ && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT -+ && REG_P (XEXP (XEXP (rhs, 0), 0)) -+ && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) -+ && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 -+ && CONST_INT_P (XEXP (rhs, 1)) -+ && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) -+ -+ { -+ rtx lhs_mask = XEXP (lhs, 1); -+ rtx rhs_mask = XEXP (rhs, 1); -+ -+ return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) -+ && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); -+ } -+ -+ return false; -+} -+ -+/* Recognise a sequence of bitwise operations corresponding to a rev16 operation. -+ These will be of the form: -+ ((x >> 8) & 0x00ff00ff) -+ | ((x << 8) & 0xff00ff00) -+ for SImode and with similar but wider bitmasks for DImode. -+ The two sub-expressions of the IOR can appear on either side so check both -+ permutations with the help of aarch_rev16_p_1 above. */ -+ -+bool -+aarch_rev16_p (rtx x) -+{ -+ rtx left_sub_rtx, right_sub_rtx; -+ bool is_rev = false; -+ -+ if (GET_CODE (x) != IOR) -+ return false; -+ -+ left_sub_rtx = XEXP (x, 0); -+ right_sub_rtx = XEXP (x, 1); -+ -+ /* There are no canonicalisation rules for the position of the two shifts -+ involved in a rev, so try both permutations. */ -+ is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); -+ -+ if (!is_rev) -+ is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); -+ -+ return is_rev; -+} -+ - /* Return nonzero if the CONSUMER instruction (a load) does need - PRODUCER's value to calculate the address. */ - int ---- a/src/gcc/config/arm/arm-fpus.def -+++ b/src/gcc/config/arm/arm-fpus.def -@@ -37,6 +37,8 @@ - ARM_FPU("vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false) - ARM_FPU("vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false) - ARM_FPU("fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false) -+ARM_FPU("fpv5-sp-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_SINGLE, false, true, false) -+ARM_FPU("fpv5-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_D16, false, true, false) - ARM_FPU("neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false) - ARM_FPU("fp-armv8", ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false) - ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false) ---- a/src/gcc/config/arm/cortex-a53.md -+++ b/src/gcc/config/arm/cortex-a53.md -@@ -75,7 +75,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,csel,rev,\ -+ adr,bfm,csel,clz,rbit,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ - mrs,multiple,no_insn")) -@@ -84,8 +84,8 @@ - (define_insn_reservation "cortex_a53_alu_shift" 2 - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ -- logic_shift_imm,logics_shift_imm,\ -- alu_shift_reg,alus_shift_reg,\ -+ crc,logic_shift_imm,logics_shift_imm,\ -+ alu_ext,alus_ext,alu_shift_reg,alus_shift_reg,\ - logic_shift_reg,logics_shift_reg,\ - extend,mov_shift,mov_shift_reg,\ - mvn_shift,mvn_shift_reg")) -@@ -216,7 +216,8 @@ - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\ - f_cvt,f_cvtf2i,f_cvti2f,\ -- fcmps, fcmpd, fcsel")) -+ fcmps, fcmpd, fcsel, f_rints, f_rintd, f_minmaxs,\ -+ f_minmaxd")) - "cortex_a53_slot0+cortex_a53_fpadd_pipe") - - (define_insn_reservation "cortex_a53_fconst" 2 ---- a/src/gcc/config/arm/bpabi.h -+++ b/src/gcc/config/arm/bpabi.h -@@ -73,7 +73,7 @@ - |mcpu=generic-armv7-a \ - |march=armv7ve \ - |march=armv7-m|mcpu=cortex-m3 \ -- |march=armv7e-m|mcpu=cortex-m4 \ -+ |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7 \ - |march=armv6-m|mcpu=cortex-m0 \ - |march=armv8-a \ - :%{!r:--be8}}}" -@@ -91,7 +91,7 @@ - |mcpu=generic-armv7-a \ - |march=armv7ve \ - |march=armv7-m|mcpu=cortex-m3 \ -- |march=armv7e-m|mcpu=cortex-m4 \ -+ |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7 \ - |march=armv6-m|mcpu=cortex-m0 \ - |march=armv8-a \ - :%{!r:--be8}}}" ---- a/src/gcc/config/arm/iterators.md -+++ b/src/gcc/config/arm/iterators.md -@@ -116,6 +116,9 @@ - ;; Vector modes including 64-bit integer elements, but no floats. - (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) - -+;; Vector modes for H, S and D types. -+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) -+ - ;; Vector modes for float->int conversions. - (define_mode_iterator VCVTF [V2SF V4SF]) - -@@ -191,6 +194,23 @@ - ;; Right shifts - (define_code_iterator rshifts [ashiftrt lshiftrt]) - -+;; Iterator for integer conversions -+(define_code_iterator FIXUORS [fix unsigned_fix]) -+ -+;; Binary operators whose second operand can be shifted. -+(define_code_iterator shiftable_ops [plus minus ior xor and]) -+ -+;; plus and minus are the only shiftable_ops for which Thumb2 allows -+;; a stack pointer opoerand. The minus operation is a candidate for an rsub -+;; and hence only plus is supported. -+(define_code_attr t2_binop0 -+ [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")]) -+ -+;; The instruction to use when a shiftable_ops has a shift operation as -+;; its first operand. -+(define_code_attr arith_shift_insn -+ [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")]) -+ - ;;---------------------------------------------------------------------------- - ;; Int iterators - ;;---------------------------------------------------------------------------- -@@ -198,9 +218,13 @@ - (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM - UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) - -+(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) -+ - (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM - UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN]) - -+(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA]) -+ - (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W - UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW]) - -@@ -502,6 +526,13 @@ - ;; Assembler mnemonics for signedness of widening operations. - (define_code_attr US [(sign_extend "s") (zero_extend "u")]) - -+;; Signedness suffix for float->fixed conversions. Empty for signed -+;; conversion. -+(define_code_attr su_optab [(fix "") (unsigned_fix "u")]) -+ -+;; Sign prefix to use in instruction type suffixes, i.e. s32, u32. -+(define_code_attr su [(fix "s") (unsigned_fix "u")]) -+ - ;; Right shifts - (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")]) - (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")]) ---- a/src/gcc/config/arm/arm.md -+++ b/src/gcc/config/arm/arm.md -@@ -205,17 +205,9 @@ - (const_string "yes")] - (const_string "no"))) - --; Allows an insn to disable certain alternatives for reasons other than --; arch support. --(define_attr "insn_enabled" "no,yes" -- (const_string "yes")) -- - ; Enable all alternatives that are both arch_enabled and insn_enabled. - (define_attr "enabled" "no,yes" -- (cond [(eq_attr "insn_enabled" "no") -- (const_string "no") -- -- (and (eq_attr "predicable_short_it" "no") -+ (cond [(and (eq_attr "predicable_short_it" "no") - (and (eq_attr "predicated" "yes") - (match_test "arm_restrict_it"))) - (const_string "no") -@@ -2868,6 +2860,28 @@ - (set_attr "type" "multiple")] - ) - -+(define_insn_and_split "*anddi_notdi_zesidi" -+ [(set (match_operand:DI 0 "s_register_operand" "=r") -+ (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r")) -+ (zero_extend:DI -+ (match_operand:SI 1 "s_register_operand" "r"))))] -+ "TARGET_32BIT" -+ "#" -+ "TARGET_32BIT && reload_completed" -+ [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (const_int 0))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ - (define_insn_and_split "*anddi_notsesidi_di" - [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") - (and:DI (not:DI (sign_extend:DI -@@ -8906,7 +8920,7 @@ - return \"\"; - }" - [(set_attr "conds" "use") -- (set_attr "type" "f_sel<vfp_type>")] -+ (set_attr "type" "fcsel")] - ) - - (define_insn_and_split "*movsicc_insn" -@@ -9343,8 +9357,10 @@ - "TARGET_32BIT" - " - { -- if (!REG_P (XEXP (operands[0], 0)) -- && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) -+ if ((!REG_P (XEXP (operands[0], 0)) -+ && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF) -+ || (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF -+ && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[0], 0))))) - XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0)); - - if (operands[2] == NULL_RTX) -@@ -9361,8 +9377,10 @@ - "TARGET_32BIT" - " - { -- if (!REG_P (XEXP (operands[1], 0)) && -- (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF)) -+ if ((!REG_P (XEXP (operands[1], 0)) -+ && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF) -+ || (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF -+ && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[1], 0))))) - XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0)); - - if (operands[3] == NULL_RTX) -@@ -9848,39 +9866,35 @@ - - ;; Patterns to allow combination of arithmetic, cond code and shifts - --(define_insn "*arith_shiftsi" -- [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") -- (match_operator:SI 1 "shiftable_operator" -- [(match_operator:SI 3 "shift_operator" -- [(match_operand:SI 4 "s_register_operand" "r,r,r,r") -- (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) -- (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] -+(define_insn "*<arith_shift_insn>_multsi" -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") -+ (shiftable_ops:SI -+ (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") -+ (match_operand:SI 3 "power_of_two_operand" "")) -+ (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))] - "TARGET_32BIT" -- "%i1%?\\t%0, %2, %4%S3" -+ "<arith_shift_insn>%?\\t%0, %1, %2, lsl %b3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "shift" "4") -- (set_attr "arch" "a,t2,t2,a") -- ;; Thumb2 doesn't allow the stack pointer to be used for -- ;; operand1 for all operations other than add and sub. In this case -- ;; the minus operation is a candidate for an rsub and hence needs -- ;; to be disabled. -- ;; We have to make sure to disable the fourth alternative if -- ;; the shift_operator is MULT, since otherwise the insn will -- ;; also match a multiply_accumulate pattern and validate_change -- ;; will allow a replacement of the constant with a register -- ;; despite the checks done in shift_operator. -- (set_attr_alternative "insn_enabled" -- [(const_string "yes") -- (if_then_else -- (match_operand:SI 1 "add_operator" "") -- (const_string "yes") (const_string "no")) -- (const_string "yes") -- (if_then_else -- (match_operand:SI 3 "mult_operator" "") -- (const_string "no") (const_string "yes"))]) -- (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")]) -+ (set_attr "arch" "a,t2") -+ (set_attr "type" "alu_shift_imm")]) - -+(define_insn "*<arith_shift_insn>_shiftsi" -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") -+ (shiftable_ops:SI -+ (match_operator:SI 2 "shift_nomul_operator" -+ [(match_operand:SI 3 "s_register_operand" "r,r,r") -+ (match_operand:SI 4 "shift_amount_operand" "M,M,r")]) -+ (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>,rk")))] -+ "TARGET_32BIT && GET_CODE (operands[3]) != MULT" -+ "<arith_shift_insn>%?\\t%0, %1, %3%S2" -+ [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "shift" "4") -+ (set_attr "arch" "a,t2,a") -+ (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")]) -+ - (define_split - [(set (match_operand:SI 0 "s_register_operand" "") - (match_operator:SI 1 "shiftable_operator" -@@ -12169,7 +12183,7 @@ - int num_regs = XVECLEN (operands[0], 0); - char pattern[100]; - rtx op_list[2]; -- strcpy (pattern, \"fldmfdd\\t\"); -+ strcpy (pattern, \"vldm\\t\"); - strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]); - strcat (pattern, \"!, {\"); - op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0); -@@ -12373,6 +12387,7 @@ - "TARGET_32BIT && arm_arch5" - "clz%?\\t%0, %1" - [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") - (set_attr "type" "clz")]) - - (define_insn "rbitsi2" -@@ -12381,6 +12396,7 @@ - "TARGET_32BIT && arm_arch_thumb2" - "rbit%?\\t%0, %1" - [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") - (set_attr "type" "clz")]) - - (define_expand "ctzsi2" -@@ -12556,6 +12572,8 @@ - rev%?\t%0, %1" - [(set_attr "arch" "t1,t2,32") - (set_attr "length" "2,2,4") -+ (set_attr "predicable" "no,yes,yes") -+ (set_attr "predicable_short_it" "no") - (set_attr "type" "rev")] - ) - -@@ -12673,6 +12691,44 @@ - (set_attr "type" "rev")] - ) - -+;; There are no canonicalisation rules for the position of the lshiftrt, ashift -+;; operations within an IOR/AND RTX, therefore we have two patterns matching -+;; each valid permutation. -+ -+(define_insn "arm_rev16si2" -+ [(set (match_operand:SI 0 "register_operand" "=l,l,r") -+ (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r") -+ (const_int 8)) -+ (match_operand:SI 3 "const_int_operand" "n,n,n")) -+ (and:SI (lshiftrt:SI (match_dup 1) -+ (const_int 8)) -+ (match_operand:SI 2 "const_int_operand" "n,n,n"))))] -+ "arm_arch6 -+ && aarch_rev16_shleft_mask_imm_p (operands[3], SImode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], SImode)" -+ "rev16\\t%0, %1" -+ [(set_attr "arch" "t1,t2,32") -+ (set_attr "length" "2,2,4") -+ (set_attr "type" "rev")] -+) -+ -+(define_insn "arm_rev16si2_alt" -+ [(set (match_operand:SI 0 "register_operand" "=l,l,r") -+ (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r") -+ (const_int 8)) -+ (match_operand:SI 2 "const_int_operand" "n,n,n")) -+ (and:SI (ashift:SI (match_dup 1) -+ (const_int 8)) -+ (match_operand:SI 3 "const_int_operand" "n,n,n"))))] -+ "arm_arch6 -+ && aarch_rev16_shleft_mask_imm_p (operands[3], SImode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], SImode)" -+ "rev16\\t%0, %1" -+ [(set_attr "arch" "t1,t2,32") -+ (set_attr "length" "2,2,4") -+ (set_attr "type" "rev")] -+) -+ - (define_expand "bswaphi2" - [(set (match_operand:HI 0 "s_register_operand" "=r") - (bswap:HI (match_operand:HI 1 "s_register_operand" "r")))] ---- a/src/gcc/config/arm/cortex-a5.md -+++ b/src/gcc/config/arm/cortex-a5.md -@@ -61,7 +61,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,rev,\ -+ adr,bfm,clz,rbit,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ - mrs,multiple,no_insn")) ---- a/src/gcc/config/arm/cortex-a9.md -+++ b/src/gcc/config/arm/cortex-a9.md -@@ -83,7 +83,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_reg,alus_reg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,rev,\ -+ adr,bfm,clz,rbit,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ - mov_shift_reg,mov_shift,\ ---- a/src/gcc/config/mips/mips.c -+++ b/src/gcc/config/mips/mips.c -@@ -7197,12 +7197,17 @@ - emit_insn (gen_slt_sf (dest, fp2, fp1)); - } - --/* Implement MOVE_BY_PIECES_P. */ -+/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ - - bool --mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) -+mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, -+ unsigned int align, -+ enum by_pieces_operation op, -+ bool speed_p) - { -- if (HAVE_movmemsi) -+ if (op == STORE_BY_PIECES) -+ return mips_store_by_pieces_p (size, align); -+ if (op == MOVE_BY_PIECES && HAVE_movmemsi) - { - /* movmemsi is meant to generate code that is at least as good as - move_by_pieces. However, movmemsi effectively uses a by-pieces -@@ -7219,13 +7224,12 @@ - return size < UNITS_PER_WORD; - return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT; - } -- /* The default value. If this becomes a target hook, we should -- call the default definition instead. */ -- return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1) -- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ())); -+ -+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); - } - --/* Implement STORE_BY_PIECES_P. */ -+/* Implement a handler for STORE_BY_PIECES operations -+ for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */ - - bool - mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align) -@@ -19134,6 +19138,10 @@ - #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV - #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv - -+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P -+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ -+ mips_use_by_pieces_infrastructure_p -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-mips.h" ---- a/src/gcc/config/mips/mips.h -+++ b/src/gcc/config/mips/mips.h -@@ -2867,9 +2867,6 @@ - ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ - : MIPS_CALL_RATIO / 2) - --#define MOVE_BY_PIECES_P(SIZE, ALIGN) \ -- mips_move_by_pieces_p (SIZE, ALIGN) -- - /* For CLEAR_RATIO, when optimizing for size, give a better estimate - of the length of a memset call, but use the default otherwise. */ - -@@ -2882,9 +2879,6 @@ - - #define SET_RATIO(speed) \ - ((speed) ? 15 : MIPS_CALL_RATIO - 2) -- --#define STORE_BY_PIECES_P(SIZE, ALIGN) \ -- mips_store_by_pieces_p (SIZE, ALIGN) - - /* Since the bits of the _init and _fini function is spread across - many object files, each potentially with its own GP, we must assume ---- a/src/gcc/params.def -+++ b/src/gcc/params.def -@@ -303,7 +303,7 @@ - DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS, - "max-completely-peeled-insns", - "The maximum number of insns of a completely peeled loop", -- 100, 0, 0) -+ 200, 0, 0) - /* The maximum number of peelings of a single loop that is peeled completely. */ - DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES, - "max-completely-peel-times", -@@ -1095,6 +1095,21 @@ - "Maximum number of nested calls to search for control dependencies " - "during uninitialized variable analysis", - 1000, 1, 0) -+ -+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS, -+ "max-fsm-thread-path-insns", -+ "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path", -+ 100, 1, 999999) -+ -+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH, -+ "max-fsm-thread-length", -+ "Maximum number of basic blocks on a finite state automaton jump thread path", -+ 10, 1, 999999) -+ -+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS, -+ "max-fsm-thread-paths", -+ "Maximum number of new jump thread paths to create for a finite state automaton", -+ 50, 1, 999999) - /* - - Local variables: ---- a/src/gcc/tree-ssa-threadedge.c -+++ b/src/gcc/tree-ssa-threadedge.c -@@ -617,6 +617,7 @@ - rather than use a relational operator. These are simpler to handle. */ - if (TREE_CODE (cond) == SSA_NAME) - { -+ tree original_lhs = cond; - cached_lhs = cond; - - /* Get the variable's current value from the equivalence chains. -@@ -638,6 +639,12 @@ - pass specific callback to try and simplify it further. */ - if (cached_lhs && ! is_gimple_min_invariant (cached_lhs)) - cached_lhs = (*simplify) (stmt, stmt); -+ -+ /* We couldn't find an invariant. But, callers of this -+ function may be able to do something useful with the -+ unmodified destination. */ -+ if (!cached_lhs) -+ cached_lhs = original_lhs; - } - else - cached_lhs = NULL; -@@ -897,6 +904,248 @@ - return false; - } - -+/* Return true if the CFG contains at least one path from START_BB to END_BB. -+ When a path is found, record in PATH the blocks from END_BB to START_BB. -+ VISITED_BBS is used to make sure we don't fall into an infinite loop. Bound -+ the recursion to basic blocks belonging to LOOP. */ -+ -+static bool -+fsm_find_thread_path (basic_block start_bb, basic_block end_bb, -+ vec<basic_block, va_gc> *&path, -+ pointer_set_t *visited_bbs, loop_p loop) -+{ -+ if (loop != start_bb->loop_father) -+ return false; -+ -+ if (start_bb == end_bb) -+ { -+ vec_safe_push (path, start_bb); -+ return true; -+ } -+ -+ if (!pointer_set_insert (visited_bbs, start_bb)) -+ { -+ edge e; -+ edge_iterator ei; -+ FOR_EACH_EDGE (e, ei, start_bb->succs) -+ if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop)) -+ { -+ vec_safe_push (path, start_bb); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static int max_threaded_paths; -+ -+/* We trace the value of the variable EXPR back through any phi nodes looking -+ for places where it gets a constant value and save the path. Stop after -+ having recorded MAX_PATHS jump threading paths. */ -+ -+static void -+fsm_find_control_statement_thread_paths (tree expr, -+ pointer_set_t *visited_phis, -+ vec<basic_block, va_gc> *&path) -+{ -+ tree var = SSA_NAME_VAR (expr); -+ gimple def_stmt = SSA_NAME_DEF_STMT (expr); -+ basic_block var_bb = gimple_bb (def_stmt); -+ -+ if (var == NULL || var_bb == NULL) -+ return; -+ -+ /* For the moment we assume that an SSA chain only contains phi nodes, and -+ eventually one of the phi arguments will be an integer constant. In the -+ future, this could be extended to also handle simple assignments of -+ arithmetic operations. */ -+ if (gimple_code (def_stmt) != GIMPLE_PHI) -+ return; -+ -+ /* Avoid infinite recursion. */ -+ if (pointer_set_insert (visited_phis, def_stmt)) -+ return; -+ -+ int next_path_length = 0; -+ basic_block last_bb_in_path = path->last (); -+ -+ /* Following the chain of SSA_NAME definitions, we jumped from a definition in -+ LAST_BB_IN_PATH to a definition in VAR_BB. When these basic blocks are -+ different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB. */ -+ if (var_bb != last_bb_in_path) -+ { -+ edge e; -+ int e_count = 0; -+ edge_iterator ei; -+ vec<basic_block, va_gc> *next_path; -+ vec_alloc (next_path, n_basic_blocks_for_fn (cfun)); -+ -+ FOR_EACH_EDGE (e, ei, last_bb_in_path->preds) -+ { -+ pointer_set_t *visited_bbs = pointer_set_create (); -+ -+ if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs, -+ e->src->loop_father)) -+ ++e_count; -+ -+ pointer_set_destroy (visited_bbs); -+ -+ /* If there is more than one path, stop. */ -+ if (e_count > 1) -+ { -+ vec_free (next_path); -+ return; -+ } -+ } -+ -+ /* Stop if we have not found a path: this could occur when the recursion -+ is stopped by one of the bounds. */ -+ if (e_count == 0) -+ { -+ vec_free (next_path); -+ return; -+ } -+ -+ /* Append all the nodes from NEXT_PATH to PATH. */ -+ vec_safe_splice (path, next_path); -+ next_path_length = next_path->length (); -+ vec_free (next_path); -+ } -+ -+ gcc_assert (path->last () == var_bb); -+ -+ /* Iterate over the arguments of PHI. */ -+ unsigned int i; -+ for (i = 0; i < gimple_phi_num_args (def_stmt); i++) -+ { -+ tree arg = gimple_phi_arg_def (def_stmt, i); -+ basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src; -+ -+ /* Skip edges pointing outside the current loop. */ -+ if (!arg || var_bb->loop_father != bbi->loop_father) -+ continue; -+ -+ if (TREE_CODE (arg) == SSA_NAME) -+ { -+ vec_safe_push (path, bbi); -+ /* Recursively follow SSA_NAMEs looking for a constant definition. */ -+ fsm_find_control_statement_thread_paths (arg, visited_phis, path); -+ path->pop (); -+ continue; -+ } -+ -+ if (TREE_CODE (arg) != INTEGER_CST) -+ continue; -+ -+ int path_length = path->length (); -+ /* A path with less than 2 basic blocks should not be jump-threaded. */ -+ if (path_length < 2) -+ continue; -+ -+ if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "FSM jump-thread path not considered: " -+ "the number of basic blocks on the path " -+ "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n"); -+ continue; -+ } -+ -+ if (max_threaded_paths <= 0) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "FSM jump-thread path not considered: " -+ "the number of previously recorded FSM paths to thread " -+ "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n"); -+ continue; -+ } -+ -+ /* Add BBI to the path. */ -+ vec_safe_push (path, bbi); -+ ++path_length; -+ -+ int n_insns = 0; -+ gimple_stmt_iterator gsi; -+ int j; -+ loop_p loop = (*path)[0]->loop_father; -+ bool path_crosses_loops = false; -+ -+ /* Count the number of instructions on the path: as these instructions -+ will have to be duplicated, we will not record the path if there are -+ too many instructions on the path. Also check that all the blocks in -+ the path belong to a single loop. */ -+ for (j = 1; j < path_length - 1; j++) -+ { -+ basic_block bb = (*path)[j]; -+ -+ if (bb->loop_father != loop) -+ { -+ path_crosses_loops = true; -+ break; -+ } -+ -+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gimple stmt = gsi_stmt (gsi); -+ /* Do not count empty statements and labels. */ -+ if (gimple_code (stmt) != GIMPLE_NOP -+ && gimple_code (stmt) != GIMPLE_LABEL -+ && !is_gimple_debug (stmt)) -+ ++n_insns; -+ } -+ } -+ -+ if (path_crosses_loops) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "FSM jump-thread path not considered: " -+ "the path crosses loops.\n"); -+ path->pop (); -+ continue; -+ } -+ -+ if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "FSM jump-thread path not considered: " -+ "the number of instructions on the path " -+ "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n"); -+ path->pop (); -+ continue; -+ } -+ -+ vec<jump_thread_edge *> *jump_thread_path -+ = new vec<jump_thread_edge *> (); -+ -+ /* Record the edges between the blocks in PATH. */ -+ for (j = 0; j < path_length - 1; j++) -+ { -+ edge e = find_edge ((*path)[path_length - j - 1], -+ (*path)[path_length - j - 2]); -+ gcc_assert (e); -+ jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD); -+ jump_thread_path->safe_push (x); -+ } -+ -+ /* Add the edge taken when the control variable has value ARG. */ -+ edge taken_edge = find_taken_edge ((*path)[0], arg); -+ jump_thread_edge *x -+ = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK); -+ jump_thread_path->safe_push (x); -+ -+ register_jump_thread (jump_thread_path); -+ --max_threaded_paths; -+ -+ /* Remove BBI from the path. */ -+ path->pop (); -+ } -+ -+ /* Remove all the nodes that we added from NEXT_PATH. */ -+ if (next_path_length) -+ vec_safe_truncate (path, (path->length () - next_path_length)); -+} -+ - /* We are exiting E->src, see if E->dest ends with a conditional - jump which has a known value when reached via E. - -@@ -982,7 +1231,10 @@ - cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify, - handle_dominating_asserts); - -- if (cond && is_gimple_min_invariant (cond)) -+ if (!cond) -+ return 0; -+ -+ if (is_gimple_min_invariant (cond)) - { - edge taken_edge = find_taken_edge (e->dest, cond); - basic_block dest = (taken_edge ? taken_edge->dest : NULL); -@@ -1028,6 +1280,27 @@ - backedge_seen_p); - return 1; - } -+ -+ if (!flag_expensive_optimizations -+ || optimize_function_for_size_p (cfun) -+ || TREE_CODE (cond) != SSA_NAME -+ || e->dest->loop_father != e->src->loop_father -+ || loop_depth (e->dest->loop_father) == 0) -+ return 0; -+ -+ /* When COND cannot be simplified, try to find paths from a control -+ statement back through the PHI nodes which would affect that control -+ statement. */ -+ vec<basic_block, va_gc> *bb_path; -+ vec_alloc (bb_path, n_basic_blocks_for_fn (cfun)); -+ vec_safe_push (bb_path, e->dest); -+ pointer_set_t *visited_phis = pointer_set_create (); -+ -+ max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS); -+ fsm_find_control_statement_thread_paths (cond, visited_phis, bb_path); -+ -+ pointer_set_destroy (visited_phis); -+ vec_free (bb_path); - } - return 0; - } ---- a/src/gcc/convert.c -+++ b/src/gcc/convert.c -@@ -471,8 +471,8 @@ - break; - - CASE_FLT_FN (BUILT_IN_ROUND): -- /* Only convert in ISO C99 mode. */ -- if (!targetm.libc_has_function (function_c99_misc)) -+ /* Only convert in ISO C99 mode and with -fno-math-errno. */ -+ if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math) - break; - if (outprec < TYPE_PRECISION (integer_type_node) - || (outprec == TYPE_PRECISION (integer_type_node) -@@ -492,8 +492,8 @@ - break; - /* ... Fall through ... */ - CASE_FLT_FN (BUILT_IN_RINT): -- /* Only convert in ISO C99 mode. */ -- if (!targetm.libc_has_function (function_c99_misc)) -+ /* Only convert in ISO C99 mode and with -fno-math-errno. */ -+ if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math) - break; - if (outprec < TYPE_PRECISION (integer_type_node) - || (outprec == TYPE_PRECISION (integer_type_node) ---- a/src/libobjc/ChangeLog.linaro -+++ b/src/libobjc/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libvtv/ChangeLog.linaro -+++ b/src/libvtv/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libgfortran/configure -+++ b/src/libgfortran/configure -@@ -25941,7 +25941,7 @@ - # test is copied from libgomp, and modified to not link in -lrt as - # libgfortran calls clock_gettime via a weak reference if it's found - # in librt. --if test $ac_cv_func_clock_gettime = no; then -+if test "$ac_cv_func_clock_gettime" = no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 - $as_echo_n "checking for clock_gettime in -lrt... " >&6; } - if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then : ---- a/src/libgfortran/configure.ac -+++ b/src/libgfortran/configure.ac -@@ -511,7 +511,7 @@ - # test is copied from libgomp, and modified to not link in -lrt as - # libgfortran calls clock_gettime via a weak reference if it's found - # in librt. --if test $ac_cv_func_clock_gettime = no; then -+if test "$ac_cv_func_clock_gettime" = no; then - AC_CHECK_LIB(rt, clock_gettime, - [AC_DEFINE(HAVE_CLOCK_GETTIME_LIBRT, 1, - [Define to 1 if you have the `clock_gettime' function in librt.])]) ---- a/src/libgfortran/ChangeLog.linaro -+++ b/src/libgfortran/ChangeLog.linaro -@@ -0,0 +1,59 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ Backport from trunk r209747. -+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com> -+ -+ * configure.ac: Quote usage of ac_cv_func_clock_gettime in if test. -+ * configure: Regenerate. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libada/ChangeLog.linaro -+++ b/src/libada/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libffi/ChangeLog.linaro -+++ b/src/libffi/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libssp/ChangeLog.linaro -+++ b/src/libssp/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libcilkrts/ChangeLog.linaro -+++ b/src/libcilkrts/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libcpp/ChangeLog.linaro -+++ b/src/libcpp/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/libcpp/po/ChangeLog.linaro -+++ b/src/libcpp/po/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. ---- a/src/fixincludes/ChangeLog.linaro -+++ b/src/fixincludes/ChangeLog.linaro -@@ -0,0 +1,51 @@ -+2015-01-15 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2015.01 released. -+ -+2014-12-11 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.12 released. -+ -+2014-11-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.11 released. -+ -+2014-10-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10-1 released. -+ -+2014-10-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.10 released. -+ -+2014-09-10 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.09 released. -+ -+2014-08-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.08 released. -+ -+2014-07-24 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07-1 released. -+ -+2014-07-17 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.07 released. -+ -+2014-06-25 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06-1 released. -+ -+2014-06-12 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.06 released. -+ -+2014-05-14 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.05 released. -+ -+2014-04-22 Yvan Roux <yvan.roux@linaro.org> -+ -+ GCC Linaro 4.9-2014.04 released. |