summaryrefslogtreecommitdiff
path: root/debian/patches/gcc-linaro.diff
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches/gcc-linaro.diff')
-rw-r--r--debian/patches/gcc-linaro.diff48934
1 files changed, 3 insertions, 48931 deletions
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index e7d7c84..b363b1f 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,48934 +1,6 @@
-# DP: Changes for the Linaro 4.9-2015.01 release.
+# DP: Changes for the Linaro 5-2015.xx release.
-LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@219502 \
- svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@219643 \
+LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-5-branch@219502 \
+ svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-5-branch@219643 \
| filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
---- a/src/libitm/ChangeLog.linaro
-+++ b/src/libitm/ChangeLog.linaro
-@@ -0,0 +1,68 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213035.
-+ 2014-07-24 Richard Henderson <rth@redhat.com>
-+
-+ * config/aarch64/sjlj.S (_ITM_beginTransaction): Use post-inc
-+ addressing mode in epilogue.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210615.
-+ 2014-05-19 Richard Henderson <rth@redhat.com>
-+
-+ * config/aarch64/sjlj.S: New file.
-+ * config/aarch64/target.h: New file.
-+ * configure.tgt: Enable aarch64.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libgomp/ChangeLog.linaro
-+++ b/src/libgomp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libquadmath/ChangeLog.linaro
-+++ b/src/libquadmath/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libsanitizer/ChangeLog.linaro
-+++ b/src/libsanitizer/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/zlib/ChangeLog.linaro
-+++ b/src/zlib/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libstdc++-v3/ChangeLog.linaro
-+++ b/src/libstdc++-v3/ChangeLog.linaro
-@@ -0,0 +1,70 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216444.
-+ 2014-10-19 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
-+
-+ * testsuite/lib/libstdc++.exp (v3-copy-file): New proc split from ...
-+ (v3-copy-files): ... this. Update.
-+ (check_v3_target_fileio): Fix race on cin_unget-1.txt file.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215101.
-+ 2014-09-10 Tony Wang <tony.wang@arm.com>
-+
-+ PR target/56846
-+ * libsupc++/eh_personality.cc (PERSONALITY_FUNCTION):
-+ Return with CONTINUE_UNWINDING when the state pattern
-+ contains: _US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libstdc++-v3/testsuite/lib/libstdc++.exp
-+++ b/src/libstdc++-v3/testsuite/lib/libstdc++.exp
-@@ -63,19 +63,24 @@
- verbose "++ $var is $val" $n
- }
-
-+# Copy file to the target.
-+proc v3-copy-file {src dst} {
-+ if { [catch { set symlink [file readlink $src] } x] } then {
-+ remote_download target $src $dst
-+ } else {
-+ if { [regexp "^/" "$symlink"] } then {
-+ remote_download target $symlink $dst
-+ } else {
-+ set dirname [file dirname $f]
-+ remote_download target $dirname/$symlink $dst
-+ }
-+ }
-+}
-+
- # Called by v3-init below. "Static" to this file.
- proc v3-copy-files {srcfiles} {
- foreach f $srcfiles {
-- if { [catch { set symlink [file readlink $f] } x] } then {
-- remote_download target $f
-- } else {
-- if { [regexp "^/" "$symlink"] } then {
-- remote_download target $symlink
-- } else {
-- set dirname [file dirname $f]
-- remote_download target $dirname/$symlink
-- }
-- }
-+ v3-copy-file $f [file tail $f]
- }
- }
-
-@@ -681,8 +686,8 @@
- # the file functions
- set src fileio[pid].cc
- set exe fileio[pid].x
-- set testfile "cin_unget-1.txt"
-- v3-copy-files "$srcdir/data/$testfile"
-+ set testfile "cin_unget-1.[pid].txt"
-+ v3-copy-file "$srcdir/data/cin_unget-1.txt" "$testfile"
-
- set f [open $src "w"]
- puts $f "#include <sys/types.h>"
---- a/src/configure.ac
-+++ b/src/configure.ac
-@@ -331,7 +331,8 @@
- if test "$is_elf" = "yes"; then
- # Check for target supported by gold.
- case "${target}" in
-- i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
-+ i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-+ | aarch64*-*-* | tilegx*-*-*)
- configdirs="$configdirs gold"
- if test x${ENABLE_GOLD} = xdefault; then
- default_ld=gold
---- a/src/intl/ChangeLog.linaro
-+++ b/src/intl/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/ChangeLog.linaro
-+++ b/src/ChangeLog.linaro
-@@ -0,0 +1,59 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215865.
-+ 2014-10-03 Jing Yu <jingyu@google.com>
-+
-+ * configure.ac: Add aarch64 to list of targets that support gold.
-+ * configure: Regenerate.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/boehm-gc/ChangeLog.linaro
-+++ b/src/boehm-gc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/include/ChangeLog.linaro
-+++ b/src/include/ChangeLog.linaro
-@@ -0,0 +1,58 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209649.
-+ 2014-04-22 Yufeng Zhang <yufeng.zhang@arm.com>
-+
-+ * longlong.h: Merge from glibc.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/include/longlong.h
-+++ b/src/include/longlong.h
-@@ -1,5 +1,5 @@
- /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
-- Copyright (C) 1991-2013 Free Software Foundation, Inc.
-+ Copyright (C) 1991-2014 Free Software Foundation, Inc.
-
- This file is part of the GNU C Library.
-
-@@ -122,6 +122,22 @@
- #define __AND_CLOBBER_CC , "cc"
- #endif /* __GNUC__ < 2 */
-
-+#if defined (__aarch64__)
-+
-+#if W_TYPE_SIZE == 32
-+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
-+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
-+#define COUNT_LEADING_ZEROS_0 32
-+#endif /* W_TYPE_SIZE == 32 */
-+
-+#if W_TYPE_SIZE == 64
-+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
-+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
-+#define COUNT_LEADING_ZEROS_0 64
-+#endif /* W_TYPE_SIZE == 64 */
-+
-+#endif /* __aarch64__ */
-+
- #if defined (__alpha) && W_TYPE_SIZE == 64
- #define umul_ppmm(ph, pl, m0, m1) \
- do { \
---- a/src/libiberty/ChangeLog.linaro
-+++ b/src/libiberty/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/lto-plugin/ChangeLog.linaro
-+++ b/src/lto-plugin/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/regression/ChangeLog.linaro
-+++ b/src/contrib/regression/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/ChangeLog.linaro
-+++ b/src/contrib/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/reghunt/ChangeLog.linaro
-+++ b/src/contrib/reghunt/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libatomic/ChangeLog.linaro
-+++ b/src/libatomic/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/config/ChangeLog.linaro
-+++ b/src/config/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libbacktrace/ChangeLog.linaro
-+++ b/src/libbacktrace/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/libltdl/ChangeLog.linaro
-+++ b/src/libjava/libltdl/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/ChangeLog.linaro
-+++ b/src/libjava/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/classpath/ChangeLog.linaro
-+++ b/src/libjava/classpath/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gnattools/ChangeLog.linaro
-+++ b/src/gnattools/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/maintainer-scripts/ChangeLog.linaro
-+++ b/src/maintainer-scripts/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/configure
-+++ b/src/configure
-@@ -2971,7 +2971,8 @@
- if test "$is_elf" = "yes"; then
- # Check for target supported by gold.
- case "${target}" in
-- i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
-+ i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-+ | aarch64*-*-* | tilegx*-*-*)
- configdirs="$configdirs gold"
- if test x${ENABLE_GOLD} = xdefault; then
- default_ld=gold
---- a/src/libgcc/config.host
-+++ b/src/libgcc/config.host
-@@ -316,13 +316,15 @@
- case ${host} in
- aarch64*-*-elf)
- extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
-+ extra_parts="$extra_parts crtfastmath.o"
- tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
-- tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
-+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
- ;;
- aarch64*-*-linux*)
-+ extra_parts="$extra_parts crtfastmath.o"
- md_unwind_header=aarch64/linux-unwind.h
- tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
-- tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
-+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
- ;;
- alpha*-*-linux*)
- tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux"
---- a/src/libgcc/ChangeLog.linaro
-+++ b/src/libgcc/ChangeLog.linaro
-@@ -0,0 +1,69 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215013.
-+ 2014-09-08 Joseph Myers <joseph@codesourcery.com>
-+
-+ * fp-bit.c (pack_d, unpack_d): Remove LARGEST_EXPONENT_IS_NORMAL
-+ and ROUND_TOWARDS_ZERO conditionals.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215086.
-+ 2014-09-09 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config.host (aarch64*): Include crtfastmath.o and
-+ t-crtfm.
-+ * config/aarch64/crtfastmath.c: New file.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libgcc/config/aarch64/crtfastmath.c
-+++ b/src/libgcc/config/aarch64/crtfastmath.c
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (C) 2014 Free Software Foundation, Inc.
-+ *
-+ * This file is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 3, or (at your option) any
-+ * later version.
-+ *
-+ * This file is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License for more details.
-+ *
-+ * Under Section 7 of GPL version 3, you are granted additional
-+ * permissions described in the GCC Runtime Library Exception, version
-+ * 3.1, as published by the Free Software Foundation.
-+ *
-+ * You should have received a copy of the GNU General Public License and
-+ * a copy of the GCC Runtime Library Exception along with this program;
-+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-+ * <http://www.gnu.org/licenses/>.
-+ */
-+
-+#define _FPU_FPCR_FZ 0x1000000
-+
-+#define _FPU_SETCW(fpcr) \
-+ { \
-+ __asm__ __volatile__ ("msr fpcr, %0" : : "r" (fpcr)); \
-+ }
-+
-+static void __attribute__((constructor))
-+set_fast_math (void)
-+{
-+ /* Flush to zero, round to nearest, IEEE exceptions disabled. */
-+ _FPU_SETCW (_FPU_FPCR_FZ);
-+}
---- a/src/libgcc/config/arm/bpabi-v6m.S
-+++ b/src/libgcc/config/arm/bpabi-v6m.S
-@@ -148,7 +148,7 @@
- mov r0, sp
- push {r0, lr}
- ldr r0, [sp, #8]
-- bl SYM(__gnu_uldivmod_helper)
-+ bl SYM(__udivmoddi4)
- ldr r3, [sp, #4]
- mov lr, r3
- add sp, sp, #8
---- a/src/libgcc/config/arm/bpabi.c
-+++ b/src/libgcc/config/arm/bpabi.c
-@@ -26,9 +26,6 @@
- extern unsigned long long __udivdi3 (unsigned long long,
- unsigned long long);
- extern long long __gnu_ldivmod_helper (long long, long long, long long *);
--extern unsigned long long __gnu_uldivmod_helper (unsigned long long,
-- unsigned long long,
-- unsigned long long *);
-
-
- long long
-@@ -43,14 +40,3 @@
- return quotient;
- }
-
--unsigned long long
--__gnu_uldivmod_helper (unsigned long long a,
-- unsigned long long b,
-- unsigned long long *remainder)
--{
-- unsigned long long quotient;
--
-- quotient = __udivdi3 (a, b);
-- *remainder = a - b * quotient;
-- return quotient;
--}
---- a/src/libgcc/config/arm/bpabi.S
-+++ b/src/libgcc/config/arm/bpabi.S
-@@ -22,6 +22,8 @@
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-+ .cfi_sections .debug_frame
-+
- #ifdef __ARM_EABI__
- /* Some attributes that are common to all routines in this file. */
- /* Tag_ABI_align_needed: This code does not require 8-byte
-@@ -120,49 +122,137 @@
- #endif
- .endm
-
-+/* we can use STRD/LDRD on v5TE and later, and any Thumb-2 architecture. */
-+#if (defined(__ARM_EABI__) \
-+ && (defined(__thumb2__) \
-+ || (__ARM_ARCH >= 5 && defined(__TARGET_FEATURE_DSP))))
-+#define CAN_USE_LDRD 1
-+#else
-+#define CAN_USE_LDRD 0
-+#endif
-+
-+/* set up stack from for call to __udivmoddi4. At the end of the macro the
-+ stack is arranged as follows:
-+ sp+12 / space for remainder
-+ sp+8 \ (written by __udivmoddi4)
-+ sp+4 lr
-+ sp+0 sp+8 [rp (remainder pointer) argument for __udivmoddi4]
-+
-+ */
-+.macro push_for_divide fname
-+#if defined(__thumb2__) && CAN_USE_LDRD
-+ sub ip, sp, #8
-+ strd ip, lr, [sp, #-16]!
-+#else
-+ sub sp, sp, #8
-+ do_push {sp, lr}
-+#endif
-+ .cfi_adjust_cfa_offset 16
-+ .cfi_offset 14, -12
-+.endm
-+
-+/* restore stack */
-+.macro pop_for_divide
-+ ldr lr, [sp, #4]
-+#if CAN_USE_LDRD
-+ ldrd r2, r3, [sp, #8]
-+ add sp, sp, #16
-+#else
-+ add sp, sp, #8
-+ do_pop {r2, r3}
-+#endif
-+ .cfi_restore 14
-+ .cfi_adjust_cfa_offset 0
-+.endm
-+
- #ifdef L_aeabi_ldivmod
-
-+/* Perform 64 bit signed division.
-+ Inputs:
-+ r0:r1 numerator
-+ r2:r3 denominator
-+ Outputs:
-+ r0:r1 quotient
-+ r2:r3 remainder
-+ */
- ARM_FUNC_START aeabi_ldivmod
-- cfi_start __aeabi_ldivmod, LSYM(Lend_aeabi_ldivmod)
-- test_div_by_zero signed
-+ .cfi_startproc
-+ test_div_by_zero signed
-
-- sub sp, sp, #8
--#if defined(__thumb2__)
-- mov ip, sp
-- push {ip, lr}
--#else
-- do_push {sp, lr}
--#endif
--98: cfi_push 98b - __aeabi_ldivmod, 0xe, -0xc, 0x10
-- bl SYM(__gnu_ldivmod_helper) __PLT__
-- ldr lr, [sp, #4]
-- add sp, sp, #8
-- do_pop {r2, r3}
-+ push_for_divide __aeabi_ldivmod
-+ cmp xxh, #0
-+ blt 1f
-+ cmp yyh, #0
-+ blt 2f
-+ /* arguments in (r0:r1), (r2:r3) and *sp */
-+ bl SYM(__udivmoddi4) __PLT__
-+ .cfi_remember_state
-+ pop_for_divide
- RET
-- cfi_end LSYM(Lend_aeabi_ldivmod)
-+
-+1: /* xxh:xxl is negative */
-+ .cfi_restore_state
-+ negs xxl, xxl
-+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ cmp yyh, #0
-+ blt 3f
-+ /* arguments in (r0:r1), (r2:r3) and *sp */
-+ bl SYM(__udivmoddi4) __PLT__
-+ .cfi_remember_state
-+ pop_for_divide
-+ negs xxl, xxl
-+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ negs yyl, yyl
-+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ RET
-+
-+2: /* only yyh:yyl is negative */
-+ .cfi_restore_state
-+ negs yyl, yyl
-+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ /* arguments in (r0:r1), (r2:r3) and *sp */
-+ bl SYM(__udivmoddi4) __PLT__
-+ .cfi_remember_state
-+ pop_for_divide
-+ negs xxl, xxl
-+ sbc xxh, xxh, xxh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ RET
-+
-+3: /* both xxh:xxl and yyh:yyl are negative */
-+ .cfi_restore_state
-+ negs yyl, yyl
-+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ /* arguments in (r0:r1), (r2:r3) and *sp */
-+ bl SYM(__udivmoddi4) __PLT__
-+ pop_for_divide
-+ negs yyl, yyl
-+ sbc yyh, yyh, yyh, lsl #1 /* Thumb-2 has no RSC, so use X - 2X */
-+ RET
-+
-+ .cfi_endproc
-
- #endif /* L_aeabi_ldivmod */
-
- #ifdef L_aeabi_uldivmod
-
-+/* Perform 64 bit signed division.
-+ Inputs:
-+ r0:r1 numerator
-+ r2:r3 denominator
-+ Outputs:
-+ r0:r1 quotient
-+ r2:r3 remainder
-+ */
- ARM_FUNC_START aeabi_uldivmod
-- cfi_start __aeabi_uldivmod, LSYM(Lend_aeabi_uldivmod)
-- test_div_by_zero unsigned
-+ .cfi_startproc
-+ test_div_by_zero unsigned
-
-- sub sp, sp, #8
--#if defined(__thumb2__)
-- mov ip, sp
-- push {ip, lr}
--#else
-- do_push {sp, lr}
--#endif
--98: cfi_push 98b - __aeabi_uldivmod, 0xe, -0xc, 0x10
-- bl SYM(__gnu_uldivmod_helper) __PLT__
-- ldr lr, [sp, #4]
-- add sp, sp, #8
-- do_pop {r2, r3}
-+ push_for_divide __aeabi_uldivmod
-+ /* arguments in (r0:r1), (r2:r3) and *sp */
-+ bl SYM(__udivmoddi4) __PLT__
-+ pop_for_divide
- RET
-- cfi_end LSYM(Lend_aeabi_uldivmod)
-+ .cfi_endproc
-
- #endif /* L_aeabi_divmod */
-
---- a/src/libgcc/config/libbid/ChangeLog.linaro
-+++ b/src/libgcc/config/libbid/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libgcc/fp-bit.c
-+++ b/src/libgcc/fp-bit.c
-@@ -202,17 +202,9 @@
- int sign = src->sign;
- int exp = 0;
-
-- if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
-+ if (isnan (src))
- {
-- /* We can't represent these values accurately. By using the
-- largest possible magnitude, we guarantee that the conversion
-- of infinity is at least as big as any finite number. */
- exp = EXPMAX;
-- fraction = ((fractype) 1 << FRACBITS) - 1;
-- }
-- else if (isnan (src))
-- {
-- exp = EXPMAX;
- /* Restore the NaN's payload. */
- fraction >>= NGARDS;
- fraction &= QUIET_NAN - 1;
-@@ -291,8 +283,7 @@
- fraction >>= NGARDS;
- #endif /* NO_DENORMALS */
- }
-- else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
-- && __builtin_expect (src->normal_exp > EXPBIAS, 0))
-+ else if (__builtin_expect (src->normal_exp > EXPBIAS, 0))
- {
- exp = EXPMAX;
- fraction = 0;
-@@ -300,35 +291,25 @@
- else
- {
- exp = src->normal_exp + EXPBIAS;
-- if (!ROUND_TOWARDS_ZERO)
-+ /* IF the gard bits are the all zero, but the first, then we're
-+ half way between two numbers, choose the one which makes the
-+ lsb of the answer 0. */
-+ if ((fraction & GARDMASK) == GARDMSB)
- {
-- /* IF the gard bits are the all zero, but the first, then we're
-- half way between two numbers, choose the one which makes the
-- lsb of the answer 0. */
-- if ((fraction & GARDMASK) == GARDMSB)
-- {
-- if (fraction & (1 << NGARDS))
-- fraction += GARDROUND + 1;
-- }
-- else
-- {
-- /* Add a one to the guards to round up */
-- fraction += GARDROUND;
-- }
-- if (fraction >= IMPLICIT_2)
-- {
-- fraction >>= 1;
-- exp += 1;
-- }
-+ if (fraction & (1 << NGARDS))
-+ fraction += GARDROUND + 1;
- }
-- fraction >>= NGARDS;
--
-- if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
-+ else
- {
-- /* Saturate on overflow. */
-- exp = EXPMAX;
-- fraction = ((fractype) 1 << FRACBITS) - 1;
-+ /* Add a one to the guards to round up */
-+ fraction += GARDROUND;
- }
-+ if (fraction >= IMPLICIT_2)
-+ {
-+ fraction >>= 1;
-+ exp += 1;
-+ }
-+ fraction >>= NGARDS;
- }
- }
-
-@@ -556,8 +537,7 @@
- dst->fraction.ll = fraction;
- }
- }
-- else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
-- && __builtin_expect (exp == EXPMAX, 0))
-+ else if (__builtin_expect (exp == EXPMAX, 0))
- {
- /* Huge exponent*/
- if (fraction == 0)
-@@ -915,7 +895,7 @@
- low <<= 1;
- }
-
-- if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
-+ if ((high & GARDMASK) == GARDMSB)
- {
- if (high & (1 << NGARDS))
- {
-@@ -1035,7 +1015,7 @@
- numerator *= 2;
- }
-
-- if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
-+ if ((quotient & GARDMASK) == GARDMSB)
- {
- if (quotient & (1 << NGARDS))
- {
---- a/src/libdecnumber/ChangeLog.linaro
-+++ b/src/libdecnumber/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/LINARO-VERSION
-+++ b/src/gcc/LINARO-VERSION
-@@ -0,0 +1 @@
-+4.9-2015.01
---- a/src/gcc/ira-conflicts.c
-+++ b/src/gcc/ira-conflicts.c
-@@ -774,6 +774,27 @@
- temp_hard_reg_set);
- }
-
-+ /* Now we deal with paradoxical subreg cases where certain registers
-+ cannot be accessed in the widest mode. */
-+ enum machine_mode outer_mode = ALLOCNO_WMODE (a);
-+ enum machine_mode inner_mode = ALLOCNO_MODE (a);
-+ if (GET_MODE_SIZE (outer_mode) > GET_MODE_SIZE (inner_mode))
-+ {
-+ enum reg_class aclass = ALLOCNO_CLASS (a);
-+ for (int j = ira_class_hard_regs_num[aclass] - 1; j >= 0; --j)
-+ {
-+ int inner_regno = ira_class_hard_regs[aclass][j];
-+ int outer_regno = simplify_subreg_regno (inner_regno,
-+ inner_mode, 0,
-+ outer_mode);
-+ if (outer_regno < 0
-+ || !in_hard_reg_set_p (reg_class_contents[aclass],
-+ outer_mode, outer_regno))
-+ SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj),
-+ inner_regno);
-+ }
-+ }
-+
- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0)
- {
- int regno;
---- a/src/gcc/targhooks.c
-+++ b/src/gcc/targhooks.c
-@@ -1357,7 +1357,62 @@
- #endif
- }
-
-+/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
-+ behaviour. SPEED_P is true if we are compiling for speed. */
-+
-+static unsigned int
-+get_move_ratio (bool speed_p ATTRIBUTE_UNUSED)
-+{
-+ unsigned int move_ratio;
-+#ifdef MOVE_RATIO
-+ move_ratio = (unsigned int) MOVE_RATIO (speed_p);
-+#else
-+#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
-+ move_ratio = 2;
-+#else /* No movmem patterns, pick a default. */
-+ move_ratio = ((speed_p) ? 15 : 3);
-+#endif
-+#endif
-+ return move_ratio;
-+}
-+
-+/* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be
-+ used; return FALSE if the movmem/setmem optab should be expanded, or
-+ a call to memcpy emitted. */
-+
- bool
-+default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int alignment,
-+ enum by_pieces_operation op,
-+ bool speed_p)
-+{
-+ unsigned int max_size = 0;
-+ unsigned int ratio = 0;
-+
-+ switch (op)
-+ {
-+ case CLEAR_BY_PIECES:
-+ max_size = STORE_MAX_PIECES;
-+ ratio = CLEAR_RATIO (speed_p);
-+ break;
-+ case MOVE_BY_PIECES:
-+ max_size = MOVE_MAX_PIECES;
-+ ratio = get_move_ratio (speed_p);
-+ break;
-+ case SET_BY_PIECES:
-+ max_size = STORE_MAX_PIECES;
-+ ratio = SET_RATIO (speed_p);
-+ break;
-+ case STORE_BY_PIECES:
-+ max_size = STORE_MAX_PIECES;
-+ ratio = get_move_ratio (speed_p);
-+ break;
-+ }
-+
-+ return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
-+}
-+
-+bool
- default_profile_before_prologue (void)
- {
- #ifdef PROFILE_BEFORE_PROLOGUE
---- a/src/gcc/targhooks.h
-+++ b/src/gcc/targhooks.h
-@@ -177,6 +177,11 @@
- extern int default_register_move_cost (enum machine_mode, reg_class_t,
- reg_class_t);
-
-+extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+ unsigned int,
-+ enum by_pieces_operation,
-+ bool);
-+
- extern bool default_profile_before_prologue (void);
- extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
- extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
---- a/src/gcc/cppbuiltin.c
-+++ b/src/gcc/cppbuiltin.c
-@@ -53,18 +53,41 @@
- *patchlevel = s_patchlevel;
- }
-
-+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
-+ to create Linaro release number YYYYMM and spin version. */
-+static void
-+parse_linarover (int *release, int *spin)
-+{
-+ static int s_year = -1, s_month, s_spin;
-
-+ if (s_year == -1)
-+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
-+ {
-+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
-+ s_spin = 0;
-+ }
-+
-+ if (release)
-+ *release = s_year * 100 + s_month;
-+
-+ if (spin)
-+ *spin = s_spin;
-+}
-+
- /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */
- static void
- define__GNUC__ (cpp_reader *pfile)
- {
-- int major, minor, patchlevel;
-+ int major, minor, patchlevel, linaro_release, linaro_spin;
-
- parse_basever (&major, &minor, &patchlevel);
-+ parse_linarover (&linaro_release, &linaro_spin);
- cpp_define_formatted (pfile, "__GNUC__=%d", major);
- cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
- cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
- cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
-+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
-+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
- cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
- cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
- cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
---- a/src/gcc/tree-ssa-threadupdate.c
-+++ b/src/gcc/tree-ssa-threadupdate.c
-@@ -156,8 +156,9 @@
- bool registering)
- {
- fprintf (dump_file,
-- " %s jump thread: (%d, %d) incoming edge; ",
-+ " %s%s jump thread: (%d, %d) incoming edge; ",
- (registering ? "Registering" : "Cancelling"),
-+ (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
- path[0]->e->src->index, path[0]->e->dest->index);
-
- for (unsigned int i = 1; i < path.length (); i++)
-@@ -1622,6 +1623,155 @@
- return false;
- }
-
-+/* Verify that the REGION is a Single Entry Multiple Exits region: make sure no
-+ edge other than ENTRY is entering the REGION. */
-+
-+DEBUG_FUNCTION void
-+verify_seme (edge entry, basic_block *region, unsigned n_region)
-+{
-+ bitmap bbs = BITMAP_ALLOC (NULL);
-+
-+ for (unsigned i = 0; i < n_region; i++)
-+ bitmap_set_bit (bbs, region[i]->index);
-+
-+ for (unsigned i = 0; i < n_region; i++)
-+ {
-+ edge e;
-+ edge_iterator ei;
-+ basic_block bb = region[i];
-+
-+ /* All predecessors other than ENTRY->src should be in the region. */
-+ for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
-+ if (e != entry)
-+ gcc_assert (bitmap_bit_p (bbs, e->src->index));
-+ }
-+
-+ BITMAP_FREE (bbs);
-+}
-+
-+/* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic
-+ blocks). The ENTRY edge is redirected to the duplicate of the region. If
-+ REGION is not a Single Entry region, ignore any incoming edges other than
-+ ENTRY: this makes the copied region a Single Entry region.
-+
-+ Remove the last conditional statement in the last basic block in the REGION,
-+ and create a single fallthru edge pointing to the same destination as the
-+ EXIT edge.
-+
-+ The new basic blocks are stored to REGION_COPY in the same order as they had
-+ in REGION, provided that REGION_COPY is not NULL.
-+
-+ Returns false if it is unable to copy the region, true otherwise. */
-+
-+static bool
-+duplicate_seme_region (edge entry, edge exit,
-+ basic_block *region, unsigned n_region,
-+ basic_block *region_copy)
-+{
-+ unsigned i;
-+ bool free_region_copy = false, copying_header = false;
-+ struct loop *loop = entry->dest->loop_father;
-+ edge exit_copy;
-+ edge redirected;
-+ int total_freq = 0, entry_freq = 0;
-+ gcov_type total_count = 0, entry_count = 0;
-+
-+ if (!can_copy_bbs_p (region, n_region))
-+ return false;
-+
-+ /* Some sanity checking. Note that we do not check for all possible
-+ missuses of the functions. I.e. if you ask to copy something weird,
-+ it will work, but the state of structures probably will not be
-+ correct. */
-+ for (i = 0; i < n_region; i++)
-+ {
-+ /* We do not handle subloops, i.e. all the blocks must belong to the
-+ same loop. */
-+ if (region[i]->loop_father != loop)
-+ return false;
-+ }
-+
-+ initialize_original_copy_tables ();
-+
-+ if (copying_header)
-+ set_loop_copy (loop, loop_outer (loop));
-+ else
-+ set_loop_copy (loop, loop);
-+
-+ if (!region_copy)
-+ {
-+ region_copy = XNEWVEC (basic_block, n_region);
-+ free_region_copy = true;
-+ }
-+
-+ if (entry->dest->count)
-+ {
-+ total_count = entry->dest->count;
-+ entry_count = entry->count;
-+ /* Fix up corner cases, to avoid division by zero or creation of negative
-+ frequencies. */
-+ if (entry_count > total_count)
-+ entry_count = total_count;
-+ }
-+ else
-+ {
-+ total_freq = entry->dest->frequency;
-+ entry_freq = EDGE_FREQUENCY (entry);
-+ /* Fix up corner cases, to avoid division by zero or creation of negative
-+ frequencies. */
-+ if (total_freq == 0)
-+ total_freq = 1;
-+ else if (entry_freq > total_freq)
-+ entry_freq = total_freq;
-+ }
-+
-+ copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
-+ split_edge_bb_loc (entry), 0);
-+ if (total_count)
-+ {
-+ scale_bbs_frequencies_gcov_type (region, n_region,
-+ total_count - entry_count,
-+ total_count);
-+ scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
-+ total_count);
-+ }
-+ else
-+ {
-+ scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
-+ total_freq);
-+ scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
-+ }
-+
-+#ifdef ENABLE_CHECKING
-+ /* Make sure no edge other than ENTRY is entering the copied region. */
-+ verify_seme (entry, region_copy, n_region);
-+#endif
-+
-+ /* Remove the last branch in the jump thread path. */
-+ remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
-+ edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
-+
-+ if (e) {
-+ rescan_loop_exit (e, true, false);
-+ e->probability = REG_BR_PROB_BASE;
-+ e->count = region_copy[n_region - 1]->count;
-+ }
-+
-+ /* Redirect the entry and add the phi node arguments. */
-+ redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
-+ gcc_assert (redirected != NULL);
-+ flush_pending_stmts (entry);
-+
-+ /* Add the other PHI node arguments. */
-+ add_phi_args_after_copy (region_copy, n_region, NULL);
-+
-+ if (free_region_copy)
-+ free (region_copy);
-+
-+ free_original_copy_tables ();
-+ return true;
-+}
-+
- /* Walk through all blocks and thread incoming edges to the appropriate
- outgoing edge for each edge pair recorded in THREADED_EDGES.
-
-@@ -1651,6 +1801,57 @@
- threaded_blocks = BITMAP_ALLOC (NULL);
- memset (&thread_stats, 0, sizeof (thread_stats));
-
-+ /* Jump-thread all FSM threads before other jump-threads. */
-+ for (i = 0; i < paths.length ();)
-+ {
-+ vec<jump_thread_edge *> *path = paths[i];
-+ edge entry = (*path)[0]->e;
-+
-+ if ((*path)[0]->type != EDGE_FSM_THREAD
-+ /* Do not jump-thread twice from the same block. */
-+ || bitmap_bit_p (threaded_blocks, entry->src->index)) {
-+ i++;
-+ continue;
-+ }
-+
-+ unsigned len = path->length ();
-+ edge exit = (*path)[len - 1]->e;
-+ basic_block *region = XNEWVEC (basic_block, len - 1);
-+
-+ for (unsigned int j = 0; j < len - 1; j++)
-+ region[j] = (*path)[j]->e->dest;
-+
-+ if (duplicate_seme_region (entry, exit, region, len - 1, NULL))
-+ {
-+ /* We do not update dominance info. */
-+ free_dominance_info (CDI_DOMINATORS);
-+ bitmap_set_bit (threaded_blocks, entry->src->index);
-+ retval = true;
-+ }
-+
-+ delete_jump_thread_path (path);
-+ paths.unordered_remove (i);
-+ }
-+
-+ /* Remove from PATHS all the jump-threads starting with an edge already
-+ jump-threaded. */
-+ for (i = 0; i < paths.length ();)
-+ {
-+ vec<jump_thread_edge *> *path = paths[i];
-+ edge entry = (*path)[0]->e;
-+
-+ /* Do not jump-thread twice from the same block. */
-+ if (bitmap_bit_p (threaded_blocks, entry->src->index))
-+ {
-+ delete_jump_thread_path (path);
-+ paths.unordered_remove (i);
-+ }
-+ else
-+ i++;
-+ }
-+
-+ bitmap_clear (threaded_blocks);
-+
- mark_threaded_blocks (threaded_blocks);
-
- initialize_original_copy_tables ();
---- a/src/gcc/tree-ssa-threadupdate.h
-+++ b/src/gcc/tree-ssa-threadupdate.h
-@@ -26,6 +26,7 @@
- enum jump_thread_edge_type
- {
- EDGE_START_JUMP_THREAD,
-+ EDGE_FSM_THREAD,
- EDGE_COPY_SRC_BLOCK,
- EDGE_COPY_SRC_JOINER_BLOCK,
- EDGE_NO_COPY_SRC_BLOCK
---- a/src/gcc/c-family/ChangeLog.linaro
-+++ b/src/gcc/c-family/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/java/ChangeLog.linaro
-+++ b/src/gcc/java/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/c/c-parser.c
-+++ b/src/gcc/c/c-parser.c
-@@ -4210,7 +4210,8 @@
- init.original_type = NULL;
- c_parser_error (parser, "expected identifier");
- c_parser_skip_until_found (parser, CPP_COMMA, NULL);
-- process_init_element (init, false, braced_init_obstack);
-+ process_init_element (input_location, init, false,
-+ braced_init_obstack);
- return;
- }
- }
-@@ -4342,7 +4343,8 @@
- init.original_type = NULL;
- c_parser_error (parser, "expected %<=%>");
- c_parser_skip_until_found (parser, CPP_COMMA, NULL);
-- process_init_element (init, false, braced_init_obstack);
-+ process_init_element (input_location, init, false,
-+ braced_init_obstack);
- return;
- }
- }
-@@ -4363,11 +4365,12 @@
- {
- struct c_expr init;
- gcc_assert (!after || c_dialect_objc ());
-+ location_t loc = c_parser_peek_token (parser)->location;
-+
- if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after)
- init = c_parser_braced_init (parser, NULL_TREE, true);
- else
- {
-- location_t loc = c_parser_peek_token (parser)->location;
- init = c_parser_expr_no_commas (parser, after);
- if (init.value != NULL_TREE
- && TREE_CODE (init.value) != STRING_CST
-@@ -4374,7 +4377,7 @@
- && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR)
- init = convert_lvalue_to_rvalue (loc, init, true, true);
- }
-- process_init_element (init, false, braced_init_obstack);
-+ process_init_element (loc, init, false, braced_init_obstack);
- }
-
- /* Parse a compound statement (possibly a function body) (C90 6.6.2,
---- a/src/gcc/c/c-typeck.c
-+++ b/src/gcc/c/c-typeck.c
-@@ -102,8 +102,8 @@
- static char *print_spelling (char *);
- static void warning_init (int, const char *);
- static tree digest_init (location_t, tree, tree, tree, bool, bool, int);
--static void output_init_element (tree, tree, bool, tree, tree, int, bool,
-- struct obstack *);
-+static void output_init_element (location_t, tree, tree, bool, tree, tree, int,
-+ bool, struct obstack *);
- static void output_pending_init_elements (int, struct obstack *);
- static int set_designator (int, struct obstack *);
- static void push_range_stack (tree, struct obstack *);
-@@ -7187,13 +7187,15 @@
- if ((TREE_CODE (constructor_type) == RECORD_TYPE
- || TREE_CODE (constructor_type) == UNION_TYPE)
- && constructor_fields == 0)
-- process_init_element (pop_init_level (1, braced_init_obstack),
-+ process_init_element (input_location,
-+ pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- else if (TREE_CODE (constructor_type) == ARRAY_TYPE
- && constructor_max_index
- && tree_int_cst_lt (constructor_max_index,
- constructor_index))
-- process_init_element (pop_init_level (1, braced_init_obstack),
-+ process_init_element (input_location,
-+ pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- else
- break;
-@@ -7393,10 +7395,9 @@
- /* When we come to an explicit close brace,
- pop any inner levels that didn't have explicit braces. */
- while (constructor_stack->implicit)
-- {
-- process_init_element (pop_init_level (1, braced_init_obstack),
-- true, braced_init_obstack);
-- }
-+ process_init_element (input_location,
-+ pop_init_level (1, braced_init_obstack),
-+ true, braced_init_obstack);
- gcc_assert (!constructor_range_stack);
- }
-
-@@ -7574,10 +7575,9 @@
- /* Designator list starts at the level of closest explicit
- braces. */
- while (constructor_stack->implicit)
-- {
-- process_init_element (pop_init_level (1, braced_init_obstack),
-- true, braced_init_obstack);
-- }
-+ process_init_element (input_location,
-+ pop_init_level (1, braced_init_obstack),
-+ true, braced_init_obstack);
- constructor_designated = 1;
- return 0;
- }
-@@ -8197,9 +8197,9 @@
- existing initializer. */
-
- static void
--output_init_element (tree value, tree origtype, bool strict_string, tree type,
-- tree field, int pending, bool implicit,
-- struct obstack * braced_init_obstack)
-+output_init_element (location_t loc, tree value, tree origtype,
-+ bool strict_string, tree type, tree field, int pending,
-+ bool implicit, struct obstack * braced_init_obstack)
- {
- tree semantic_type = NULL_TREE;
- bool maybe_const = true;
-@@ -8297,8 +8297,8 @@
-
- if (semantic_type)
- value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value);
-- value = digest_init (input_location, type, value, origtype, npc,
-- strict_string, require_constant_value);
-+ value = digest_init (loc, type, value, origtype, npc, strict_string,
-+ require_constant_value);
- if (value == error_mark_node)
- {
- constructor_erroneous = 1;
-@@ -8425,8 +8425,8 @@
- {
- if (tree_int_cst_equal (elt->purpose,
- constructor_unfilled_index))
-- output_init_element (elt->value, elt->origtype, true,
-- TREE_TYPE (constructor_type),
-+ output_init_element (input_location, elt->value, elt->origtype,
-+ true, TREE_TYPE (constructor_type),
- constructor_unfilled_index, 0, false,
- braced_init_obstack);
- else if (tree_int_cst_lt (constructor_unfilled_index,
-@@ -8480,8 +8480,8 @@
- if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos))
- {
- constructor_unfilled_fields = elt->purpose;
-- output_init_element (elt->value, elt->origtype, true,
-- TREE_TYPE (elt->purpose),
-+ output_init_element (input_location, elt->value, elt->origtype,
-+ true, TREE_TYPE (elt->purpose),
- elt->purpose, 0, false,
- braced_init_obstack);
- }
-@@ -8554,7 +8554,7 @@
- existing initializer. */
-
- void
--process_init_element (struct c_expr value, bool implicit,
-+process_init_element (location_t loc, struct c_expr value, bool implicit,
- struct obstack * braced_init_obstack)
- {
- tree orig_value = value.value;
-@@ -8598,7 +8598,7 @@
- if ((TREE_CODE (constructor_type) == RECORD_TYPE
- || TREE_CODE (constructor_type) == UNION_TYPE)
- && constructor_fields == 0)
-- process_init_element (pop_init_level (1, braced_init_obstack),
-+ process_init_element (loc, pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- else if ((TREE_CODE (constructor_type) == ARRAY_TYPE
- || TREE_CODE (constructor_type) == VECTOR_TYPE)
-@@ -8605,7 +8605,7 @@
- && constructor_max_index
- && tree_int_cst_lt (constructor_max_index,
- constructor_index))
-- process_init_element (pop_init_level (1, braced_init_obstack),
-+ process_init_element (loc, pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- else
- break;
-@@ -8683,7 +8683,7 @@
- if (value.value)
- {
- push_member_name (constructor_fields);
-- output_init_element (value.value, value.original_type,
-+ output_init_element (loc, value.value, value.original_type,
- strict_string, fieldtype,
- constructor_fields, 1, implicit,
- braced_init_obstack);
-@@ -8775,7 +8775,7 @@
- if (value.value)
- {
- push_member_name (constructor_fields);
-- output_init_element (value.value, value.original_type,
-+ output_init_element (loc, value.value, value.original_type,
- strict_string, fieldtype,
- constructor_fields, 1, implicit,
- braced_init_obstack);
-@@ -8827,7 +8827,7 @@
- if (value.value)
- {
- push_array_bounds (tree_to_uhwi (constructor_index));
-- output_init_element (value.value, value.original_type,
-+ output_init_element (loc, value.value, value.original_type,
- strict_string, elttype,
- constructor_index, 1, implicit,
- braced_init_obstack);
-@@ -8862,7 +8862,7 @@
- {
- if (TREE_CODE (value.value) == VECTOR_CST)
- elttype = TYPE_MAIN_VARIANT (constructor_type);
-- output_init_element (value.value, value.original_type,
-+ output_init_element (loc, value.value, value.original_type,
- strict_string, elttype,
- constructor_index, 1, implicit,
- braced_init_obstack);
-@@ -8891,7 +8891,7 @@
- else
- {
- if (value.value)
-- output_init_element (value.value, value.original_type,
-+ output_init_element (loc, value.value, value.original_type,
- strict_string, constructor_type,
- NULL_TREE, 1, implicit,
- braced_init_obstack);
-@@ -8910,8 +8910,8 @@
- while (constructor_stack != range_stack->stack)
- {
- gcc_assert (constructor_stack->implicit);
-- process_init_element (pop_init_level (1,
-- braced_init_obstack),
-+ process_init_element (loc,
-+ pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- }
- for (p = range_stack;
-@@ -8919,7 +8919,8 @@
- p = p->prev)
- {
- gcc_assert (constructor_stack->implicit);
-- process_init_element (pop_init_level (1, braced_init_obstack),
-+ process_init_element (loc,
-+ pop_init_level (1, braced_init_obstack),
- true, braced_init_obstack);
- }
-
---- a/src/gcc/c/c-tree.h
-+++ b/src/gcc/c/c-tree.h
-@@ -612,7 +612,8 @@
- extern struct c_expr pop_init_level (int, struct obstack *);
- extern void set_init_index (tree, tree, struct obstack *);
- extern void set_init_label (tree, struct obstack *);
--extern void process_init_element (struct c_expr, bool, struct obstack *);
-+extern void process_init_element (location_t, struct c_expr, bool,
-+ struct obstack *);
- extern tree build_compound_literal (location_t, tree, tree, bool);
- extern void check_compound_literal_type (location_t, struct c_type_name *);
- extern tree c_start_case (location_t, location_t, tree);
---- a/src/gcc/c/ChangeLog.linaro
-+++ b/src/gcc/c/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/target.def
-+++ b/src/gcc/target.def
-@@ -3039,6 +3039,43 @@
- int, (enum machine_mode mode, reg_class_t rclass, bool in),
- default_memory_move_cost)
-
-+DEFHOOK
-+(use_by_pieces_infrastructure_p,
-+ "GCC will attempt several strategies when asked to copy between\n\
-+two areas of memory, or to set, clear or store to memory, for example\n\
-+when copying a @code{struct}. The @code{by_pieces} infrastructure\n\
-+implements such memory operations as a sequence of load, store or move\n\
-+insns. Alternate strategies are to expand the\n\
-+@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\
-+unit-by-unit, loop-based operations.\n\
-+\n\
-+This target hook should return true if, for a memory operation with a\n\
-+given @var{size} and @var{alignment}, using the @code{by_pieces}\n\
-+infrastructure is expected to result in better code generation.\n\
-+Both @var{size} and @var{alignment} are measured in terms of storage\n\
-+units.\n\
-+\n\
-+The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
-+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
-+These describe the type of memory operation under consideration.\n\
-+\n\
-+The parameter @var{speed_p} is true if the code is currently being\n\
-+optimized for speed rather than size.\n\
-+\n\
-+Returning true for higher values of @var{size} can improve code generation\n\
-+for speed if the target does not provide an implementation of the\n\
-+@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\
-+@code{setmem} implementation would be more expensive than a sequence of\n\
-+insns, or if the overhead of a library call would dominate that of\n\
-+the body of the memory operation.\n\
-+\n\
-+Returning true for higher values of @code{size} may also cause an increase\n\
-+in code size, for example where the number of insns emitted to perform a\n\
-+move would be greater than that of a library call.",
-+ bool, (unsigned HOST_WIDE_INT size, unsigned int alignment,
-+ enum by_pieces_operation op, bool speed_p),
-+ default_use_by_pieces_infrastructure_p)
-+
- /* True for MODE if the target expects that registers in this mode will
- be allocated to registers in a small register class. The compiler is
- allowed to use registers explicitly used in the rtl as spill registers
---- a/src/gcc/optabs.c
-+++ b/src/gcc/optabs.c
-@@ -4234,7 +4234,7 @@
- y = const0_rtx;
- }
-
-- *pmode = word_mode;
-+ *pmode = ret_mode;
- prepare_cmp_insn (x, y, comparison, NULL_RTX, unsignedp, methods,
- ptest, pmode);
- }
---- a/src/gcc/defaults.h
-+++ b/src/gcc/defaults.h
-@@ -914,14 +914,6 @@
- #define PREFERRED_DEBUGGING_TYPE NO_DEBUG
- #endif
-
--#ifndef LARGEST_EXPONENT_IS_NORMAL
--#define LARGEST_EXPONENT_IS_NORMAL(SIZE) 0
--#endif
--
--#ifndef ROUND_TOWARDS_ZERO
--#define ROUND_TOWARDS_ZERO 0
--#endif
--
- #ifndef FLOAT_LIB_COMPARE_RETURNS_BOOL
- #define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) false
- #endif
-@@ -1065,6 +1057,15 @@
- #define MOVE_MAX_PIECES MOVE_MAX
- #endif
-
-+/* STORE_MAX_PIECES is the number of bytes at a time that we can
-+ store efficiently. Due to internal GCC limitations, this is
-+ MOVE_MAX_PIECES limited by the number of bytes GCC can represent
-+ for an immediate constant. */
-+
-+#ifndef STORE_MAX_PIECES
-+#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
-+#endif
-+
- #ifndef MAX_MOVE_MAX
- #define MAX_MOVE_MAX MOVE_MAX
- #endif
---- a/src/gcc/target.h
-+++ b/src/gcc/target.h
-@@ -78,6 +78,17 @@
- SWITCH_TYPE_LINE_END /* Please emit a line terminator. */
- };
-
-+/* Types of memory operation understood by the "by_pieces" infrastructure.
-+ Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook. */
-+
-+enum by_pieces_operation
-+{
-+ CLEAR_BY_PIECES,
-+ MOVE_BY_PIECES,
-+ SET_BY_PIECES,
-+ STORE_BY_PIECES
-+};
-+
- typedef int (* print_switch_fn_type) (print_switch_type, const char *);
-
- /* An example implementation for ELF targets. Defined in varasm.c */
---- a/src/gcc/configure
-+++ b/src/gcc/configure
-@@ -1686,7 +1686,8 @@
- use sysroot as the system root during the build
- --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR
- --with-specs=SPECS add SPECS to driver command-line processing
-- --with-pkgversion=PKG Use PKG in the version string in place of "GCC"
-+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro
-+ GCC `cat $srcdir/LINARO-VERSION`"
- --with-bugurl=URL Direct users to URL to report a bug
- --with-multilib-list select multilibs (AArch64, SH and x86-64 only)
- --with-gnu-ld assume the C compiler uses GNU ld default=no
-@@ -7231,7 +7232,7 @@
- *) PKGVERSION="($withval) " ;;
- esac
- else
-- PKGVERSION="(GCC) "
-+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
-
- fi
-
-@@ -17936,7 +17937,7 @@
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
--#line 17939 "configure"
-+#line 17940 "configure"
- #include "confdefs.h"
-
- #if HAVE_DLFCN_H
-@@ -18042,7 +18043,7 @@
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<_LT_EOF
--#line 18045 "configure"
-+#line 18046 "configure"
- #include "confdefs.h"
-
- #if HAVE_DLFCN_H
---- a/src/gcc/lra-eliminations.c
-+++ b/src/gcc/lra-eliminations.c
-@@ -1164,7 +1164,9 @@
- ep->from, ep->to);
- /* If after processing RTL we decides that SP can be used as
- a result of elimination, it can not be changed. */
-- gcc_assert (ep->to_rtx != stack_pointer_rtx);
-+ gcc_assert ((ep->to_rtx != stack_pointer_rtx)
-+ || (ep->from < FIRST_PSEUDO_REGISTER
-+ && fixed_regs [ep->from]));
- /* Mark that is not eliminable anymore. */
- elimination_map[ep->from] = NULL;
- for (ep1 = ep + 1; ep1 < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep1++)
---- a/src/gcc/objc/ChangeLog.linaro
-+++ b/src/gcc/objc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/ChangeLog.linaro
-+++ b/src/gcc/ChangeLog.linaro
-@@ -0,0 +1,3211 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+ * LINARO-VERSION: Update.
-+
-+2015-01-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Fix Linaro PR #902
-+
-+ Partial Backport from trunk r211798.
-+ 2014-06-18 Radovan Obradovic <robradovic@mips.com>
-+ Tom de Vries <tom@codesourcery.com>
-+
-+ * config/arm/arm.c (arm_emit_call_insn): Add IP and CC clobbers to
-+ CALL_INSN_FUNCTION_USAGE.
-+
-+ Backport from trunk r209800.
-+ 2014-04-25 Tom de Vries <tom@codesourcery.com>
-+
-+ * expr.c (clobber_reg_mode): New function.
-+ * expr.h (clobber_reg): New function.
-+
-+2015-01-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211783.
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/arm.c (neon_vector_mem_operand): Allow register
-+ POST_MODIFY for neon loads and stores.
-+ (arm_print_operand): Output post-index register for neon loads and
-+ stores.
-+
-+2015-01-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r218451.
-+ 2014-12-06 James Greenhalgh <james.greenhalgh@arm.com>
-+ Sebastian Pop <s.pop@samsung.com>
-+ Brian Rzycki <b.rzycki@samsung.com>
-+
-+ PR tree-optimization/54742
-+ * params.def (max-fsm-thread-path-insns, max-fsm-thread-length,
-+ max-fsm-thread-paths): New.
-+
-+ * doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length,
-+ max-fsm-thread-paths): Documented.
-+
-+ * tree-cfg.c (split_edge_bb_loc): Export.
-+ * tree-cfg.h (split_edge_bb_loc): Declared extern.
-+
-+ * tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the
-+ original value of cond when simplification fails.
-+ (fsm_find_thread_path): New.
-+ (fsm_find_control_statement_thread_paths): New.
-+ (thread_through_normal_block): Call find_control_statement_thread_paths.
-+
-+ * tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print
-+ EDGE_FSM_THREAD.
-+ (verify_seme): New.
-+ (duplicate_seme_region): New.
-+ (thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges
-+ calling duplicate_seme_region.
-+
-+ * tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD.
-+
-+2015-01-13 Michael Collison <michael.collison@linaro.org>
-+
-+ Backport from trunk r217394.
-+ 2014-11-11 Andrew Pinski <apinski@cavium.com>
-+
-+ Bug target/61997
-+ * config.gcc (aarch64*-*-*): Set target_gtfiles to include
-+ aarch64-builtins.c.
-+ * config/aarch64/aarch64-builtins.c: Include gt-aarch64-builtins.h
-+ at the end of the file.
-+
-+2015-01-13 Michael Collison <michael.collison@linaro.org>
-+
-+ Backport from trunk r216267, r216547, r216548, r217072, r217192, r217405,
-+ r217406, r217768.
-+ 2014-11-19 Renlin Li <renlin.li@arm.com>
-+
-+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP_FAST,
-+ __ARM_FEATURE_FMA, __ARM_FP, __ARM_FEATURE_NUMERIC_MAXMIN, __ARM_NEON_FP.
-+
-+ 2014-11-12 Tejas Belagod <tejas.belagod@arm.com>
-+
-+ * Makefile.in (TEXI_GCC_FILES): Remove arm-acle-intrinsics.texi,
-+ arm-neon-intrinsics.texi, aarch64-acle-intrinsics.texi.
-+ * doc/aarch64-acle-intrinsics.texi: Remove.
-+ * doc/arm-acle-intrinsics.texi: Remove.
-+ * doc/arm-neon-intrinsics.texi: Remove.
-+ * doc/extend.texi: Consolidate sections AArch64 intrinsics,
-+ ARM NEON Intrinsics, ARM ACLE Intrinsics into one ARM C Language
-+ Extension section. Add references to public ACLE specification.
-+
-+ 2014-11-06 Renlin Li <renlin.li@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_architecture_version): New.
-+ (processor): New architecture_version field.
-+ (aarch64_override_options): Initialize aarch64_architecture_version.
-+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_ARCH,
-+ __ARM_ARCH_PROFILE, aarch64_arch_name macro.
-+
-+ 2014-11-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix typo in definition
-+ of __ARM_FEATURE_IDIV.
-+
-+ 2014-10-22 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add missing '\'.
-+
-+ 2014-10-22 Renlin Li <renlin.li@arm.com>
-+
-+ * config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define
-+ __ARM_FEATURE_IDIV__.
-+
-+ 2014-10-15 Renlin Li <renlin.li@arm.com>
-+
-+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
-+ __ARM_BIG_ENDIAN, __ARM_SIZEOF_MINIMAL_ENUM. Add __ARM_64BIT_STATE,
-+ __ARM_ARCH_ISA_A64, __ARM_FEATURE_CLZ, __ARM_FEATURE_IDIV,
-+ __ARM_FEATURE_UNALIGNED, __ARM_PCS_AAPCS64, __ARM_SIZEOF_WCHAR_T.
-+
-+2015-01-13 Michael Collison <michael.collison@linaro.org>
-+
-+ Backport from trunk r211789, r211790, r211791, r211792, r211793, r211794,
-+ r211795, r211796, r211797.
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.c (__gnu_uldivmod_helper): Remove.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi-v6m.S (__aeabi_uldivmod): Perform division using
-+ __udivmoddi4.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_ldivmod, __aeabi_uldivmod,
-+ push_for_divide, pop_for_divide): Use .cfi_* directives for DWARF
-+ annotations. Fix DWARF information.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_ldivmod): Perform division using
-+ __udivmoddi4, and fixups for negative operands.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_ldivmod): Optimise stack manipulation.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_uldivmod): Perform division using call
-+ to __udivmoddi4.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_uldivmod): Optimise stack pointer
-+ manipulation.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_uldivmod, __aeabi_ldivmod): Add comment
-+ describing register usage on function entry and exit.
-+
-+ 2014-06-18 Charles Baylis <charles.baylis@linaro.org>
-+
-+ * config/arm/bpabi.S (__aeabi_uldivmod): Fix whitespace.
-+ (__aeabi_ldivmod): Fix whitespace.
-+
-+2015-01-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217593.
-+ 2014-11-14 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64-cores.def (thunderx): Change the scheduler
-+ over to thunderx.
-+ * config/aarch64/aarch64.md: Include thunderx.md.
-+ (generic_sched): Set to no for thunderx.
-+ * config/aarch64/thunderx.md: New file.
-+
-+2015-01-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217717.
-+ 2014-11-18 Felix Yang <felix.yang@huawei.com>
-+
-+ * config/aarch64/aarch64.c (doloop_end): New pattern.
-+ * config/aarch64/aarch64.md (TARGET_CAN_USE_DOLOOP_P): Implement.
-+
-+2015-01-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217661.
-+ 2014-11-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64-cores.def (cortex-a53): Remove
-+ AARCH64_FL_CRYPTO from feature flags.
-+ (cortex-a57): Likewise.
-+ (cortex-a57.cortex-a53): Likewise.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r218319.
-+ 2014-12-03 Andrew Stubbs <ams@codesourcery.com>
-+
-+ Revert:
-+
-+ 2014-09-17 Andrew Stubbs <ams@codesourcery.com>
-+
-+ * config/arm/arm.c (arm_option_override): Reject -mfpu=neon
-+ when architecture is older than ARMv7.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217691.
-+ 2014-11-18 Jiong Wang <jiong.wang@arm.com>
-+
-+ * lra-eliminations.c (update_reg_eliminate): Relax gcc_assert for fixed
-+ registers.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215503.
-+ 2014-09-23 Wilco Dijkstra <wdijkstr@arm.com>
-+
-+ * common/config/aarch64/aarch64-common.c:
-+ (default_options aarch_option_optimization_table):
-+ Default to -fsched-pressure.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211132.
-+ 2014-06-02 Tom de Vries <tom@codesourcery.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_float_const_representable_p): Handle
-+ case that x has VOIDmode.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209620.
-+ 2014-04-22 Vidya Praveen <vidyapraveen@arm.com>
-+
-+ * aarch64.md (float<GPI:mode><GPF:mode>2): Remove.
-+ (floatuns<GPI:mode><GPF:mode>2): Remove.
-+ (<optab><fcvt_target><GPF:mode>2): New pattern for equal width float
-+ and floatuns conversions.
-+ (<optab><fcvt_iesize><GPF:mode>2): New pattern for inequal width float
-+ and floatuns conversions.
-+ * iterators.md (fcvt_target, FCVT_TARGET): Support SF and DF modes.
-+ (w1,w2): New mode attributes for inequal width conversions.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217362, r217546.
-+ 2014-11-14 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ PR target/63724
-+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
-+ numerical immediate handling to...
-+ (aarch64_internal_mov_immediate): ...this. New.
-+ (aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
-+ (aarch64_mov_operand_p): Relax predicate.
-+ * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
-+ (*movsi_aarch64): Turn into define_insn_and_split and new alternative
-+ for 'n'.
-+ (*movdi_aarch64): Likewise.
-+
-+ 2014-11-11 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md
-+ (aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
-+ (aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
-+ are punning between float vectors and integer vectors.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217079, r217080.
-+ 2014-11-04 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ config/arm/neon.md (reduc_smin_<mode> *2): Rename to...
-+ (reduc_smin_scal_<mode> *2): ...this; extract scalar result.
-+ (reduc_smax_<mode> *2): Rename to...
-+ (reduc_smax_scal_<mode> *2): ...this; extract scalar result.
-+ (reduc_umin_<mode> *2): Rename to...
-+ (reduc_umin_scal_<mode> *2): ...this; extract scalar result.
-+ (reduc_umax_<mode> *2): Rename to...
-+ (reduc_umax_scal_<mode> *2): ...this; extract scalar result.
-+
-+ 2014-11-04 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ config/arm/neon.md (reduc_plus_*): Rename to...
-+ (reduc_plus_scal_*): ...this; reduce to temp and extract scalar result.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Fix Backport from trunk r216524 (committed at r218379).
-+ Add missing file: config/aarch64/aarch64-cost-tables.h
-+
-+ * config/aarch64/aarch64-cost-tables.h: New file.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217076.
-+ 2014-11-04 Michael Collison <michael.collison@linaro.org>
-+
-+ * config/aarch64/iterators.md (lconst_atomic): New mode attribute
-+ to support constraints for CONST_INT in atomic operations.
-+ * config/aarch64/atomics.md
-+ (atomic_<atomic_optab><mode>): Use lconst_atomic constraint.
-+ (atomic_nand<mode>): Likewise.
-+ (atomic_fetch_<atomic_optab><mode>): Likewise.
-+ (atomic_fetch_nand<mode>): Likewise.
-+ (atomic_<atomic_optab>_fetch<mode>): Likewise.
-+ (atomic_nand_fetch<mode>): Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217026.
-+ 2014-11-03 Zhenqiang Chen <zhenqiang.chen@arm.com>
-+
-+ * ifcvt.c (noce_emit_cmove, noce_get_alt_condition, noce_get_condition):
-+ Allow CC mode if HAVE_cbranchcc4.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217014.
-+ 2014-11-02 Michael Collison <michael.collison@linaro.org>
-+
-+ * config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO) : Update
-+ to support vector modes.
-+ (CTZ_DEFINED_VALUE_AT_ZERO): Ditto.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216996, r216998, r216999, r217001, r217002, r217003,
-+ r217004, r217742.
-+ 2014-11-18 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ PR target/63937
-+ * target.def (use_by_pieces_infrastructure_p): Take unsigned
-+ HOST_WIDE_INT as the size parameter.
-+ * targhooks.c (default_use_by_pieces_infrastructure_p): Likewise.
-+ * targhooks.h (default_use_by_pieces_infrastructure_p): Likewise.
-+ * config/arc/arc.c (arc_use_by_pieces_infrastructure_p)): Likewise.
-+ * config/mips/mips.c (mips_use_by_pieces_infrastructure_p)): Likewise.
-+ * config/s390/s390.c (s390_use_by_pieces_infrastructure_p)): Likewise.
-+ * config/sh/sh.c (sh_use_by_pieces_infrastructure_p)): Likewise.
-+ * config/aarch64/aarch64.c
-+ (aarch64_use_by_pieces_infrastructure_p)): Likewise.
-+ * doc/tm.texi: Regenerate.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * doc/tm.texi.in (MOVE_BY_PIECES_P): Remove.
-+ (CLEAR_BY_PIECES_P): Likewise.
-+ (SET_BY_PIECES_P): Likewise.
-+ (STORE_BY_PIECES_P): Likewise.
-+ * doc/tm.texi: Regenerate.
-+ * system.h: Poison MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P,
-+ SET_BY_PIECES_P, STORE_BY_PIECES_P.
-+ * expr.c (MOVE_BY_PIECES_P): Remove.
-+ (CLEAR_BY_PIECES_P): Likewise.
-+ (SET_BY_PIECES_P): Likewise.
-+ (STORE_BY_PIECES_P): Likewise.
-+ (can_move_by_pieces): Rewrite in terms of
-+ targetm.use_by_pieces_infrastructure_p.
-+ (emit_block_move_hints): Likewise.
-+ (can_store_by_pieces): Likewise.
-+ (store_by_pieces): Likewise.
-+ (clear_storage_hints): Likewise.
-+ (emit_push_insn): Likewise.
-+ (expand_constructor): Likewise.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_use_by_pieces_infrastructre_p): New.
-+ (TARGET_USE_BY_PIECES_INFRASTRUCTURE): Likewise.
-+ * config/aarch64/aarch64.h (STORE_BY_PIECES_P): Delete.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/mips/mips.h (MOVE_BY_PIECES_P): Remove.
-+ (STORE_BY_PIECES_P): Likewise.
-+ * config/mips/mips.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+ (mips_move_by_pieces_p): Rename to...
-+ (mips_use_by_pieces_infrastructure_p): ...this, use new hook
-+ parameters, use the default hook implementation as a
-+ fall-back.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/sh/sh.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+ (sh_use_by_pieces_infrastructure_p): Likewise.
-+ * config/sh/sh.h (MOVE_BY_PIECES_P): Remove.
-+ (STORE_BY_PIECES_P): Likewise.
-+ (SET_BY_PIECES_P): Likewise.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/arc/arc.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+ (arc_use_by_pieces_infrastructure_p): Likewise.
-+ * confir/arc/arc.h (MOVE_BY_PIECES_P): Delete.
-+ (CAN_MOVE_BY_PIECES): Likewise.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/s390/s390.c (s390_use_by_pieces_infrastructure_p): New.
-+ (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise.
-+ * config/s390/s390.h (MOVE_BY_PIECES_P): Remove.
-+ (CLEAR_BY_PIECES): Likewise.
-+ (SET_BY_PIECES): Likewise.
-+ (STORE_BY_PIECES): Likewise.
-+
-+ 2014-11-01 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * target.def (use_by_pieces_infrastructure_p): New.
-+ * doc/tm.texi.in (MOVE_BY_PIECES_P): Describe that this macro
-+ is deprecated.
-+ (STORE_BY_PIECES_P): Likewise.
-+ (CLEAR_BY_PIECES_P): Likewise.
-+ (SET_BY_PIECES_P): Likewise.
-+ (TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook.
-+ * doc/tm.texi: Regenerate.
-+ * expr.c (MOVE_BY_PIECES_P): Rewrite in terms of
-+ TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.
-+ (STORE_BY_PIECES_P): Likewise.
-+ (CLEAR_BY_PIECES_P): Likewise.
-+ (SET_BY_PIECES_P): Likewise.
-+ (STORE_MAX_PIECES): Move to...
-+ * defaults.h (STORE_MAX_PIECES): ...here.
-+ * targhooks.c (get_move_ratio): New.
-+ (default_use_by_pieces_infrastructure_p): Likewise.
-+ * targhooks.h (default_use_by_pieces_infrastructure_p): New.
-+ * target.h (by_pieces_operation): New.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216765.
-+ 2014-10-27 Jiong Wang <jiong.wang@arm.com>
-+
-+ PR target/63442
-+ * optabs.c (prepare_cmp_insn): Use "ret_mode" instead of "word_mode".
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216630.
-+ 2014-10-24 Felix Yang <felix.yang@huawei.com>
-+ Jiji Jiang <jiangjiji@huawei.com>
-+
-+ PR target/63173
-+ * config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
-+ (__LD3R_FUNC): Ditto.
-+ (__LD4R_FUNC): Ditto.
-+ (vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
-+ vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
-+ vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8,
-+ vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64,
-+ vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64
-+ vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
-+ (vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8
-+ vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32
-+ vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
-+ vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16
-+ vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16
-+ vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
-+ (vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8
-+ vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32
-+ vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32
-+ vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16
-+ vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16
-+ vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
-+ * config/aarch64/aarch64.md (define_c_enum "unspec"): Add
-+ UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
-+ * config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
-+ builtins.
-+ * config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
-+ (aarch64_simd_ld3r<mode>): Likewise.
-+ (aarch64_simd_ld4r<mode>): Likewise.
-+ (aarch64_ld2r<mode>): New expand.
-+ (aarch64_ld3r<mode>): Likewise.
-+ (aarch64_ld4r<mode>): Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217971.
-+ 2014-11-22 Uros Bizjak <ubizjak@gmail.com>
-+
-+ * params.def (PARAM_MAX_COMPLETELY_PEELED_INSNS): Increase to 200.
-+ * config/i386/i386.c (ix86_option_override_internal): Do not increase
-+ PARAM_MAX_COMPLETELY_PEELED_INSNS.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216524.
-+ 2014-10-21 Andrew Pinski <apinski@cavium.com>
-+
-+ * doc/invoke.texi (AARCH64/mtune): Document thunderx as an
-+ available option also.
-+ * config/aarch64/aarch64-cost-tables.h: New file.
-+ * config/aarch64/aarch64-cores.def (thunderx): New core.
-+ * config/aarch64/aarch64-tune.md: Regenerate.
-+ * config/aarch64/aarch64.c: Include aarch64-cost-tables.h instead
-+ of config/arm/aarch-cost-tables.h.
-+ (thunderx_regmove_cost): New variable.
-+ (thunderx_tunings): New variable.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216336.
-+ 2014-10-16 Richard Earnshaw <rearnsha@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_legitimize_address): New function.
-+ (TARGET_LEGITIMIZE_ADDRESS): Redefine.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216253.
-+ 2014-10-15 Renlin Li <renlin.li@arm.com>
-+
-+ * config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215711.
-+ 2014-09-30 Terry Guo <terry.guo@arm.com>
-+
-+ * config/arm/arm-cores.def (cortex-m7): New core name.
-+ * config/arm/arm-fpus.def (fpv5-sp-d16): New fpu name.
-+ (fpv5-d16): Ditto.
-+ * config/arm/arm-tables.opt: Regenerated.
-+ * config/arm/arm-tune.md: Regenerated.
-+ * config/arm/arm.h (TARGET_VFP5): New macro.
-+ * config/arm/bpabi.h (BE8_LINK_SPEC): Include cortex-m7.
-+ * config/arm/vfp.md (<vrint_pattern><SDF:mode>2,
-+ smax<mode>3, smin<mode>3): Enabled for FPU FPv5.
-+ * doc/invoke.texi: Document new cpu and fpu names.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215707, r215842.
-+ 2014-10-03 David Sherwood <david.sherwood@arm.com>
-+
-+ * ira-int.h (ira_allocno): Mark hard_regno as signed.
-+
-+ 2014-09-30 David Sherwood <david.sherwood@arm.com>
-+
-+ * ira-int.h (ira_allocno): Add "wmode" field.
-+ * ira-build.c (create_insn_allocnos): Add new "parent" function
-+ parameter.
-+ * ira-conflicts.c (ira_build_conflicts): Add conflicts for registers
-+ that cannot be accessed in wmode.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215540.
-+ 2014-09-24 Zhenqiang Chen <zhenqiang.chen@arm.com>
-+
-+ PR rtl-optimization/63210
-+ * ira-color.c (assign_hard_reg): Ignore conflict cost if the
-+ HARD_REGNO is not available for CONFLICT_A.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215046.
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/61749
-+ * config/aarch64/aarch64-builtins.c (aarch64_types_quadop_qualifiers):
-+ Use qualifier_immediate for last operand. Rename to...
-+ (aarch64_types_ternop_lane_qualifiers): ... This.
-+ (TYPES_QUADOP): Rename to...
-+ (TYPES_TERNOP_LANE): ... This.
-+ (aarch64_simd_expand_args): Return const0_rtx when encountering user
-+ error. Change return of 0 to return of NULL_RTX.
-+ (aarch64_crc32_expand_builtin): Likewise.
-+ (aarch64_expand_builtin): Return NULL_RTX instead of 0.
-+ ICE when expanding unknown builtin.
-+ * config/aarch64/aarch64-simd-builtins.def (sqdmlal_lane): Use
-+ TERNOP_LANE qualifiers.
-+ (sqdmlsl_lane): Likewise.
-+ (sqdmlal_laneq): Likewise.
-+ (sqdmlsl_laneq): Likewise.
-+ (sqdmlal2_lane): Likewise.
-+ (sqdmlsl2_lane): Likewise.
-+ (sqdmlal2_laneq): Likewise.
-+ (sqdmlsl2_laneq): Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215013.
-+ 2014-09-08 Joseph Myers <joseph@codesourcery.com>
-+
-+ * defaults.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
-+ Remove.
-+ * doc/tm.texi.in (ROUND_TOWARDS_ZERO, LARGEST_EXPONENT_IS_NORMAL):
-+ Remove.
-+ * doc/tm.texi: Regenerate.
-+ * system.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
-+ Poison.
-+ * config/arm/arm.h (LARGEST_EXPONENT_IS_NORMAL): Remove.
-+ * config/cris/cris.h (__make_dp): Remove.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214952.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/arm_neon.h (__GET_HIGH): New macro.
-+ (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16,
-+ vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64,
-+ vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64):
-+ Remove temporary __asm__ and reimplement.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214948, r214949.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
-+ handling cmge, cmgt, cmeq, cmtst.
-+
-+ * config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
-+ cmlt, cmgeu, cmgtu, cmtst): Remove.
-+
-+ * config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
-+ vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
-+ vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
-+ vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.
-+
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
-+ TYPES_TST): Define.
-+ (aarch64_fold_builtin): Update pattern for cmtst.
-+
-+ * config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
-+ Declare.
-+
-+ * config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
-+
-+ * config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
-+ Switch operands, separate out more cases, refactor.
-+
-+ (aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
-+
-+ * config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
-+ argument; rename old version to...
-+ (aarch64_const_vec_all_same_in_range_p): ...this.
-+ (aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
-+
-+ * config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214008.
-+ 2014-08-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Move
-+ one_match > zero_match case to just before simple_sequence.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213382.
-+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vpadd_<suf><8,16,32,64>): Move to
-+ correct alphabetical position.
-+ (vpaddd_f64): Rewrite using builtins.
-+ (vpaddd_s64): Move to correct alphabetical position.
-+ (vpaddd_u64): New.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210735, r215206, r215207, r215208.
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+ for A57.
-+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP
-+ cost to spilling from integer to FP registers.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+ move handling.
-+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+ are now handled correctly.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+ handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+ 2014-05-22 Kugan Vivekanandarajah <kuganv@linaro.org>
-+
-+ * config/aarch64/aarch64.c (aarch64_regno_regclass) : Change CORE_REGS
-+ to GENERAL_REGS.
-+ (aarch64_secondary_reload) : LikeWise.
-+ (aarch64_class_max_nregs) : Remove CORE_REGS.
-+ * config/aarch64/aarch64.h (enum reg_class) : Remove CORE_REGS.
-+ (REG_CLASS_NAMES) : Likewise.
-+ (REG_CLASS_CONTENTS) : LikeWise.
-+ (INDEX_REG_CLASS) : Change CORE_REGS to GENERAL_REGS.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Add Linaro release macros (Linaro only patch.)
-+
-+ * Makefile.in (LINAROVER, LINAROVER_C, LINAROVER_S): Define.
-+ (CFLAGS-cppbuiltin.o): Add LINAROVER macro definition.
-+ (cppbuiltin.o): Depend on $(LINAROVER).
-+ * cppbuiltin.c (parse_linarover): New.
-+ (define_GNUC__): Define __LINARO_RELEASE__ and __LINARO_SPIN__ macros.
-+
-+2014-11-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216229, r216230.
-+ 2014-10-14 Andrew Pinski <apinski@cavium.com>
-+
-+ * explow.c (convert_memory_address_addr_space): Rename to ...
-+ (convert_memory_address_addr_space_1): This. Add in_const argument.
-+ Inside a CONST RTL, permute the conversion and addition of constant
-+ for zero and sign extended pointers.
-+ (convert_memory_address_addr_space): New function.
-+
-+ 2014-10-14 Andrew Pinski <apinski@cavium.com>
-+
-+ Revert:
-+ 2011-08-19 H.J. Lu <hongjiu.lu@intel.com>
-+
-+ PR middle-end/49721
-+ * explow.c (convert_memory_address_addr_space): Also permute the
-+ conversion and addition of constant for zero-extend.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-10-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Revert:
-+ 2014-10-08 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215206, r215207, r215208.
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+ for A57.
-+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP
-+ cost to spilling from integer to FP registers.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+ move handling.
-+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+ are now handled correctly.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+ handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+2014-10-08 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214825, r214826.
-+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/62275
-+ * config/arm/neon.md
-+ (neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
-+ <v_cmp_result>): New pattern.
-+ * config/arm/iterators.md (NEON_VCVT): New int iterator.
-+ * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
-+ vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
-+ vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
-+ * config/arm/arm.c (arm_builtin_vectorized_function): Handle
-+ BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
-+
-+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/62275
-+ * config/arm/iterators.md (FIXUORS): New code iterator.
-+ (VCVT): New int iterator.
-+ (su_optab): New code attribute.
-+ (su): Likewise.
-+ * config/arm/vfp.md (l<vrint_pattern><su_optab><mode>si2): New pattern.
-+
-+2014-10-08 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215471.
-+ 2014-09-22 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/geniterators.sh: New.
-+ * config/aarch64/iterators.md (VDQF_DF): New.
-+ * config/aarch64/t-aarch64: Generate aarch64-builtin-iterators.h.
-+ * config/aarch64/aarch64-builtins.c (BUILTIN_*) Remove.
-+
-+2014-10-08 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215206, r215207, r215208.
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+ for A57.
-+ (cortexa53_regmove_cost): New cost table for A53. Increase GP2FP/FP2GP
-+ cost to spilling from integer to FP registers.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+ move handling.
-+ (generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+ are now handled correctly.
-+
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+ handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+2014-10-07 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214824.
-+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/predicates.md (aarch64_comparison_operation):
-+ New special predicate.
-+ * config/aarch64/aarch64.md (*csinc2<mode>_insn): Use
-+ aarch64_comparison_operation instead of matching an operator.
-+ Update operand numbers.
-+ (csinc3<mode>_insn): Likewise.
-+ (*csinv3<mode>_insn): Likewise.
-+ (*csneg3<mode>_insn): Likewise.
-+ (ffs<mode>2): Update gen_csinc3<mode>_insn callsite.
-+ * config/aarch64/aarch64.c (aarch64_get_condition_code):
-+ Return -1 instead of aborting on invalid condition codes.
-+ (aarch64_print_operand): Update aarch64_get_condition_code callsites
-+ to assert that the returned condition code is valid.
-+ * config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export.
-+
-+2014-10-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ Backport from trunk r209643, r211881.
-+ 2014-06-22 Richard Henderson <rth@redhat.com>
-+
-+ PR target/61565
-+ * compare-elim.c (struct comparison): Add eh_note.
-+ (find_comparison_dom_walker::before_dom_children): Don't eliminate
-+ a redundant comparison in a different EH region. Purge EH edges if
-+ necessary.
-+
-+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-10-06 Charles Baylis <charles.baylis@linaro.org>
-+
-+ Backport from trunk r214945.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Replace
-+ varargs with pointer parameter.
-+ (aarch64_simd_expand_builtin): pass pointer into previous.
-+
-+2014-10-06 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
-+
-+ Backport from trunk r214944.
-+ 2014-09-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/cortex-a53.md (cortex_a53_alu_shift): Add alu_ext,
-+ alus_ext.
-+
-+2014-10-06 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ Backport from trunk r214943.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
-+ * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
-+ * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
-+ Replace temporary asm with call to builtin.
-+ (vrbit_p8, vrbitq_p8): New functions.
-+
-+2014-10-06 Michael Collison <michael.collison@linaro.org>
-+
-+ Backport from trunk r214886.
-+ 2014-09-03 Richard Henderson <rth@redhat.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
-+ (aarch64_popwb_pair_reg): Remove.
-+ (aarch64_set_frame_expr): Remove.
-+ (aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
-+ the restore ops performed by the insns generated.
-+ (aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
-+ insn. Perform the calls_eh_return addition later; do not attempt to
-+ preserve the CFA in that case. Don't use aarch64_set_frame_expr.
-+ (aarch64_expand_prologue): Use REG_CFA_ADJUST_CFA directly, or no
-+ special markup at all. Load cfun->machine->frame.hard_fp_offset
-+ into a local variable.
-+ (aarch64_frame_pointer_required): Don't check calls_alloca.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215385.
-+ 2014-09-19 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.md (stack_protect_test_<mode>): Mark
-+ scratch register as written.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215346.
-+ 2014-09-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/neon.md (*movmisalign<mode>_neon_load): Change type
-+ to neon_load1_1reg<q>.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215321.
-+ 2014-09-17 Andrew Stubbs <ams@codesourcery.com>
-+
-+ * config/arm/arm.c (arm_option_override): Reject -mfpu=neon
-+ when architecture is older than ARMv7.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215260.
-+ 2014-09-14 David Sherwood <david.sherwood@arm.com>
-+
-+ * gcc.target/aarch64/vdup_lane_2.c (force_simd): Emit simd mov.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215205.
-+ 2014-09-12 Wilco Dijkstra <wilco.dijkstra@arm.com>
-+
-+ * gcc/ree.c (combine_reaching_defs): Ensure inserted copy don't change
-+ the number of hard registers.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215136.
-+ 2014-09-10 Xinliang David Li <davidxl@google.com>
-+
-+ PR target/63209
-+ * config/arm/arm.md (movcond_addsi): Handle case where source
-+ and target operands are the same.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215086.
-+ 2014-09-09 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+ Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/aarch64/aarch64-elf-raw.h (ENDFILE_SPEC): Add crtfastmath.o.
-+ * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_MATH_ENDFILE_SPEC):
-+ Define.
-+ (ENDFILE_SPEC): Define and use GNU_USER_TARGET_MATH_ENDFILE_SPEC.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215067.
-+ 2014-09-09 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/arm/arm.c (NEON_COPYSIGNF): New enum.
-+ (arm_init_neon_builtins): Support NEON_COPYSIGNF.
-+ (arm_builtin_vectorized_function): Likewise.
-+ * config/arm/arm_neon_builtins.def: New macro for copysignf.
-+ * config/arm/neon.md (neon_copysignf<mode>): New pattern for vector
-+ copysignf.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215050, r215051, r215052, r215053, r215054,
-+ r215055, r215056.
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.md (vfp_pop_multiple_with_writeback): Use vldm
-+ mnemonic instead of fldmfdd.
-+ * config/arm/arm.c (vfp_output_fstmd): Rename to...
-+ (vfp_output_vstmd): ... This. Convert output to UAL syntax.
-+ Output vpush when address register is SP.
-+ * config/arm/arm-protos.h (vfp_output_fstmd): Rename to...
-+ (vfp_output_vstmd): ... This.
-+ * config/arm/vfp.md (push_multi_vfp): Update call to
-+ vfp_output_vstmd.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/vfp.md (*movcc_vfp): Use UAL syntax.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/vfp.md (*sqrtsf2_vfp): Use UAL assembly syntax.
-+ (*sqrtdf2_vfp): Likewise.
-+ (*cmpsf_vfp): Likewise.
-+ (*cmpsf_trap_vfp): Likewise.
-+ (*cmpdf_vfp): Likewise.
-+ (*cmpdf_trap_vfp): Likewise.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/vfp.md (*extendsfdf2_vfp): Use UAL assembly syntax.
-+ (*truncdfsf2_vfp): Likewise.
-+ (*truncsisf2_vfp): Likewise.
-+ (*truncsidf2_vfp): Likewise.
-+ (fixuns_truncsfsi2): Likewise.
-+ (fixuns_truncdfsi2): Likewise.
-+ (*floatsisf2_vfp): Likewise.
-+ (*floatsidf2_vfp): Likewise.
-+ (floatunssisf2): Likewise.
-+ (floatunssidf2): Likewise.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/vfp.md (*mulsf3_vfp): Use UAL assembly syntax.
-+ (*muldf3_vfp): Likewise.
-+ (*mulsf3negsf_vfp): Likewise.
-+ (*muldf3negdf_vfp): Likewise.
-+ (*mulsf3addsf_vfp): Likewise.
-+ (*muldf3adddf_vfp): Likewise.
-+ (*mulsf3subsf_vfp): Likewise.
-+ (*muldf3subdf_vfp): Likewise.
-+ (*mulsf3negsfaddsf_vfp): Likewise.
-+ (*fmuldf3negdfadddf_vfp): Likewise.
-+ (*mulsf3negsfsubsf_vfp): Likewise.
-+ (*muldf3negdfsubdf_vfp): Likewise.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/vfp.md (*abssf2_vfp): Use UAL assembly syntax.
-+ (*absdf2_vfp): Likewise.
-+ (*negsf2_vfp): Likewise.
-+ (*negdf2_vfp): Likewise.
-+ (*addsf3_vfp): Likewise.
-+ (*adddf3_vfp): Likewise.
-+ (*subsf3_vfp): Likewise.
-+ (*subdf3_vfp): Likewise.
-+ (*divsf3_vfp): Likewise.
-+ (*divdf3_vfp): Likewise.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (output_move_vfp): Use UAL syntax for load/store
-+ multiple.
-+ (arm_print_operand): Don't convert real values to decimal
-+ representation in default case.
-+ (fp_immediate_constant): Delete.
-+ * config/arm/arm-protos.h (fp_immediate_constant): Likewise.
-+ * config/arm/vfp.md (*arm_movsi_vfp): Convert to VFP moves to UAL
-+ syntax.
-+ (*thumb2_movsi_vfp): Likewise.
-+ (*movdi_vfp): Likewise.
-+ (*movdi_vfp_cortexa8): Likewise.
-+ (*movhf_vfp_neon): Likewise.
-+ (*movhf_vfp): Likewise.
-+ (*movsf_vfp): Likewise.
-+ (*thumb2_movsf_vfp): Likewise.
-+ (*movdf_vfp): Likewise.
-+ (*thumb2_movdf_vfp): Likewise.
-+ (*movsfcc_vfp): Likewise.
-+ (*thumb2_movsfcc_vfp): Likewise.
-+ (*movdfcc_vfp): Likewise.
-+ (*thumb2_movdfcc_vfp): Likewise.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214959.
-+ 2014-09-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/cortex-a53.md (cortex_a53_fpalu): Add f_rints, f_rintd,
-+ f_minmaxs, f_minmaxd types.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214947.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
-+ Remove qualifier_const_pointer, update comment.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214940.
-+ 2014-09-05 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.md (sibcall_value_insn): Give operand 1
-+ DImode.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213090.
-+ 2014-07-26 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64.md (*extr_insv_lower_reg<mode>): Remove +
-+ from the read only register.
-+
-+2014-09-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-09-09 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ Backport from trunk r215004.
-+ 2014-09-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ PR target/63190
-+ * config/aarch64/aarch64.md (stack_protect_test_<mode>) Add register
-+ constraint for operand0 and remove write only modifier from operand3.
-+
-+2014-09-09 Michael Collison <michael.collison@linaro.org>
-+
-+ Backport from trunk r212178
-+ 2014-06-30 Joseph Myers <joseph@codesourcery.com>
-+
-+ * var-tracking.c (add_stores): Return instead of asserting if old
-+ and new values for conditional store are the same.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Revert:
-+ 2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213712.
-+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.md (absdi2): Set simd attribute.
-+ (aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
-+ (aarch64_movdi_<mode>high): Likewise.
-+ (aarch64_mov<mode>high_di): Likewise.
-+ (aarch64_movdi_<mode>low): Likewise.
-+ (aarch64_mov<mode>low_di): Likewise.
-+ (aarch64_movtilow_tilow): Likewise.
-+ Add comment explaining usage of fp,simd attributes and of
-+ TARGET_FLOAT and TARGET_SIMD.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213712.
-+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.md (absdi2): Set simd attribute.
-+ (aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
-+ (aarch64_movdi_<mode>high): Likewise.
-+ (aarch64_mov<mode>high_di): Likewise.
-+ (aarch64_movdi_<mode>low): Likewise.
-+ (aarch64_mov<mode>low_di): Likewise.
-+ (aarch64_movtilow_tilow): Likewise.
-+ Add comment explaining usage of fp,simd attributes and of
-+ TARGET_FLOAT and TARGET_SIMD.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214526.
-+ 2014-08-26 Joseph Myers <joseph@codesourcery.com>
-+
-+ PR target/60606
-+ PR target/61330
-+ * varasm.c (make_decl_rtl): Clear DECL_ASSEMBLER_NAME and
-+ DECL_HARD_REGISTER and return for invalid register specifications.
-+ * cfgexpand.c (expand_one_var): If expand_one_hard_reg_var clears
-+ DECL_HARD_REGISTER, call expand_one_error_var.
-+ * config/arm/arm.c (arm_hard_regno_mode_ok): Do not allow
-+ CC_REGNUM with non-MODE_CC modes.
-+ (arm_regno_class): Return NO_REGS for PC_REGNUM.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214503.
-+ 2014-08-26 Evandro Menezes <e.menezes@samsung.com>
-+
-+ * config/arm/aarch64/aarch64.c (generic_addrcost_table): Delete
-+ qi cost; add di cost.
-+ (cortexa57_addrcost_table): Likewise.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213659.
-+ 2014-08-06 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_evpc_dup): Enable for bigendian.
-+ (aarch64_expand_vec_perm_const): Check for dup before zip.
-+
-+2014-09-02 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213651.
-+ 2014-08-06 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_classify_address): Use REG_P and
-+ CONST_INT_P instead of GET_CODE and compare.
-+ (aarch64_select_cc_mode): Likewise.
-+ (aarch64_print_operand): Likewise.
-+ (aarch64_rtx_costs): Likewise.
-+ (aarch64_simd_valid_immediate): Likewise.
-+ (aarch64_simd_check_vect_par_cnst_half): Likewise.
-+ (aarch64_simd_emit_pair_result_insn): Likewise.
-+
-+2014-08-29 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212978.
-+ 2014-07-24 Andreas Schwab <schwab@suse.de>
-+
-+ * lib/target-supports.exp (check_effective_target_arm_nothumb):
-+ Also check for __arm__.
-+
-+2014-08-29 Christophe Lyon <christophe.lyon@linaro.org>
-+
-+ Fix backport from trunk 211440:
-+ * config.gcc (aarch64*-*-*): Restore need_64bit_hwint=yes.
-+
-+ This is necessary to build aarch64* compilers on i686 host.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213627.
-+ 2014-08-05 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c
-+ (aarch64_simd_builtin_type_mode): Delete.
-+ (v8qi_UP): Remap to V8QImode.
-+ (v4hi_UP): Remap to V4HImode.
-+ (v2si_UP): Remap to V2SImode.
-+ (v2sf_UP): Remap to V2SFmode.
-+ (v1df_UP): Remap to V1DFmode.
-+ (di_UP): Remap to DImode.
-+ (df_UP): Remap to DFmode.
-+ (v16qi_UP):V16QImode.
-+ (v8hi_UP): Remap to V8HImode.
-+ (v4si_UP): Remap to V4SImode.
-+ (v4sf_UP): Remap to V4SFmode.
-+ (v2di_UP): Remap to V2DImode.
-+ (v2df_UP): Remap to V2DFmode.
-+ (ti_UP): Remap to TImode.
-+ (ei_UP): Remap to EImode.
-+ (oi_UP): Remap to OImode.
-+ (ci_UP): Map to CImode.
-+ (xi_UP): Remap to XImode.
-+ (si_UP): Remap to SImode.
-+ (sf_UP): Remap to SFmode.
-+ (hi_UP): Remap to HImode.
-+ (qi_UP): Remap to QImode.
-+ (aarch64_simd_builtin_datum): Make mode a machine_mode.
-+ (VAR1): Build builtin name.
-+ (aarch64_init_simd_builtins): Remove dead code.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213713.
-+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.md (*cmov<mode>): Set type attribute to fcsel.
-+ * config/arm/types.md (f_sels, f_seld): Delete.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213711.
-+ 2014-08-07 Ian Bolton <ian.bolton@arm.com>
-+ Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
-+ Use MOVN when one of the half-words is 0xffff.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213632.
-+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/cortex-a15.md (cortex_a15_alu_shift): Add crc type
-+ to reservation.
-+ * config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213630.
-+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.md (clzsi2): Set predicable_short_it attr to no.
-+ (rbitsi2): Likewise.
-+ (*arm_rev): Set predicable and predicable_short_it attributes.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213557.
-+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+ James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * doc/md.texi (clrsb): Document.
-+ (clz): Change reference to x into operand 1.
-+ (ctz): Likewise.
-+ (popcount): Likewise.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213551, r213556.
-+ 2014-08-04 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+ Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * sched-deps.c (try_group_insn): Generalise macro fusion hook usage
-+ to any two insns. Update comment. Rename to sched_macro_fuse_insns.
-+ (sched_analyze_insn): Update use of try_group_insn to
-+ sched_macro_fuse_insns.
-+ * config/i386/i386.c (ix86_macro_fusion_pair_p): Reject 2nd
-+ arguments that are not conditional jumps.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213490.
-+ 2014-08-01 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-simd-builtins.def (dup_lane, get_lane): Delete.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213488.
-+ 2014-08-01 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_classify_address): Accept all offset
-+ for frame access when strict_p is false.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213485, r213486, r213487.
-+ 2014-08-01 Renlin Li <renlin.li@arm.com>
-+ Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (offset_7bit_signed_scaled_p): Rename to
-+ aarch64_offset_7bit_signed_scaled_p, remove static and use it.
-+ * config/aarch64/aarch64-protos.h (aarch64_offset_7bit_signed_scaled_p):
-+ Declaration.
-+ * config/aarch64/predicates.md (aarch64_mem_pair_offset): Define new
-+ predicate.
-+ * config/aarch64/aarch64.md (loadwb_pair, storewb_pair): Use
-+ aarch64_mem_pair_offset.
-+
-+ 2014-08-01 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Fix
-+ offset.
-+ (loadwb_pair<GPI:mode>_<P:mode>): Likewise.
-+ * config/aarch64/aarch64.c (aarch64_gen_loadwb_pair): Likewise.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213379.
-+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c
-+ (aarch64_gimple_fold_builtin): Don't fold reduction operations for
-+ BYTES_BIG_ENDIAN.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213378.
-+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Vary
-+ the generated mask based on BYTES_BIG_ENDIAN.
-+ (aarch64_simd_check_vect_par_cnst_half): New.
-+ * config/aarch64/aarch64-protos.h
-+ (aarch64_simd_check_vect_par_cnst_half): New.
-+ * config/aarch64/predicates.md (vect_par_cnst_hi_half): Refactor
-+ the check out to aarch64_simd_check_vect_par_cnst_half.
-+ (vect_par_cnst_lo_half): Likewise.
-+ * config/aarch64/aarch64-simd.md
-+ (aarch64_simd_move_hi_quad_<mode>): Always use vec_par_cnst_lo_half.
-+ (move_hi_quad_<mode>): Always generate a low mask.
-+
-+2014-08-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212927, r213304.
-+ 2014-07-30 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/arm/arm.c (arm_get_frame_offsets): Adjust condition for
-+ Thumb2.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/arm/arm.c (arm_get_frame_offsets): If both r3 and other
-+ callee-saved registers are available for padding purpose
-+ and r3 is not mandatory, then prefer use those callee-saved
-+ instead of r3.
-+
-+2014-08-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211717, r213692.
-+ 2014-08-07 Kugan Vivekanandarajah <kuganv@linaro.org>
-+
-+ * config/arm/arm.c (bdesc_2arg): Fix typo.
-+ (arm_atomic_assign_expand_fenv): Remove The default implementation.
-+
-+ 2014-06-17 Kugan Vivekanandarajah <kuganv@linaro.org>
-+
-+ * config/arm/arm.c (arm_atomic_assign_expand_fenv): call
-+ default_atomic_assign_expand_fenv for !TARGET_HARD_FLOAT.
-+ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
-+ __builtins_arm_get_fpscr only when TARGET_HARD_FLOAT.
-+ * config/arm/vfp.md (set_fpscr): Make pattern conditional on
-+ TARGET_HARD_FLOAT.
-+ (get_fpscr) : Likewise.
-+
-+2014-08-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212989, r213628.
-+ 2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * convert.c (convert_to_integer): Guard transformation to lrint by
-+ -fno-math-errno.
-+
-+ 2014-07-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR middle-end/61876
-+ * convert.c (convert_to_integer): Do not convert BUILT_IN_ROUND and cast
-+ when flag_errno_math is on.
-+
-+2014-08-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212912, r212913.
-+ 2014-07-22 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle CLRSB, CLZ.
-+ (case UNSPEC): Handle UNSPEC_RBIT.
-+
-+ 2014-07-22 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.md: Delete UNSPEC_CLS.
-+ (clrsb<mode>2): Use clrsb RTL code instead of UNSPEC_CLS.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213555.
-+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/61713
-+ * gcc/optabs.c (expand_atomic_test_and_set): Do not try to emit
-+ move to subtarget in serial version if result is ignored.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213376.
-+ 2014-07-31 Charles Baylis <charles.baylis@linaro.org>
-+
-+ PR target/61948
-+ * config/arm/neon.md (ashldi3_neon): Don't emit arm_ashldi3_1bit unless
-+ constraints are satisfied.
-+ (<shift>di3_neon): Likewise.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211270, r211271, r211273, r211275, r212943,
-+ r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954,
-+ r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999,
-+ r213000.
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function.
-+ (aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function.
-+ (aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_restore_callee_saves)
-+ (aarch64_save_callee_saves): New parameter "skip_wb".
-+ (aarch64_expand_prologue, aarch64_expand_epilogue): Update call site.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and
-+ "wb_candidate2".
-+ * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't
-+ subtract outgoing area size when restoring stack_pointer_rtx.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_popwb_pair_reg)
-+ (aarch64_gen_loadwb_pair): New helper function.
-+ (aarch64_expand_epilogue): Simplify code using new helper functions.
-+ * config/aarch64/aarch64.md (loadwb_pair<GPF:mode>_<P:mode>): Define.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_pushwb_pair_reg)
-+ (aarch64_gen_storewb_pair): New helper function.
-+ (aarch64_expand_prologue): Simplify code using new helper functions.
-+ * config/aarch64/aarch64.md (storewb_pair<GPF:mode>_<P:mode>): Define.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves):
-+ Rename to aarch64_save_callee_saves, remove restore code.
-+ (aarch64_restore_callee_saves): New function.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted.
-+ (aarch64_save_callee_saves): New function to handle reg save
-+ for both core and vectore regs.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_gen_load_pair)
-+ (aarch64_gen_store_pair): New helper function.
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Use new helper functions.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_next_callee_save): New function.
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Use aarch64_next_callee_save.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Hoist calculation of register rtx.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Remove 'increment'.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Use register offset in
-+ cfun->machine->frame.reg_offset.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_save_or_restore_callee_save_registers)
-+ (aarch64_save_or_restore_fprs): Remove base_rtx.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c
-+ (aarch64_save_or_restore_callee_save_registers): Rename 'offset'
-+ to 'start_offset'. Remove local variable 'start_offset'.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change
-+ type to HOST_WIDE_INT.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_prologue)
-+ (aarch64_save_or_restore_fprs)
-+ (aarch64_save_or_restore_callee_save_registers): GNU-Stylize code.
-+
-+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and
-+ frame_size.
-+ * config/aarch64/aarch64.c (aarch64_layout_frame): Initialize
-+ aarch64_frame hard_fp_offset and frame_size.
-+ (aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and
-+ frame_size; remove original_frame_size.
-+ (aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise.
-+ (aarch64_initial_elimination_offset): Remove frame_size and
-+ offset. Use aarch64_frame frame_size.
-+
-+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+ Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_layout_frame): Correct
-+ initialization of R30 offset. Update offset. Iterate core
-+ regisers upto X30. Remove X29, X30 specific code.
-+
-+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+ Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define.
-+ (aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED.
-+ (aarch64_register_saved_on_entry): Adjust test.
-+
-+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.h (machine_function): Move
-+ saved_varargs_size from here...
-+ (aarch64_frameGTY): ... to here.
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_prologue)
-+ (aarch64_expand_epilogue, aarch64_final_eh_return_addr)
-+ (aarch64_initial_elimination_offset)
-+ (aarch64_setup_incoming_varargs): Adjust location of
-+ saved_varargs_size.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212753.
-+ 2014-07-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_frint_unspec_p): New function.
-+ (aarch64_rtx_costs): Handle FIX, UNSIGNED_FIX, UNSPEC.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212752.
-+ 2014-07-17 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
-+ (vmlal_high_lane_s32): Likewise.
-+ (vmlal_high_lane_u16): Likewise.
-+ (vmlal_high_lane_u32): Likewise.
-+ (vmlsl_high_lane_s16): Likewise.
-+ (vmlsl_high_lane_s32): Likewise.
-+ (vmlsl_high_lane_u16): Likewise.
-+ (vmlsl_high_lane_u32): Likewise.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212512.
-+ 2014-07-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/cortex-a15.md (cortex_a15_alu): Handle clz, rbit.
-+ * config/arm/cortex-a5.md (cortex_a5_alu): Likewise.
-+ * config/arm/cortex-a53.md (cortex_a53_alu): Likewise.
-+ * config/arm/cortex-a7.md (cortex_a7_alu_reg): Likewise.
-+ * config/arm/cortex-a9.md (cortex_a9_dp): Likewise.
-+ * config/arm/cortex-m4.md (cortex_m4_alu): Likewise.
-+ * config/arm/cortex-r4.md (cortex_r4_alu): Likewise.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212358.
-+ 2014-07-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (cortexa5_extra_costs): New table.
-+ (arm_cortex_a5_tune): Use cortexa5_extra_costs.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212296.
-+ 2014-07-04 Tom de Vries <tom@codesourcery.com>
-+
-+ * config/aarch64/aarch64-simd.md
-+ (define_insn "vec_unpack_trunc_<mode>"): Fix constraint.
-+
-+2014-08-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212142, r212225.
-+ 2014-07-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Delete unused
-+ variable i.
-+
-+ 2014-06-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md (vec_perm): Enable for bigendian.
-+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm): Remove assert
-+ against bigendian and adjust indices.
-+
-+2014-08-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211779.
-+ 2014-06-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm_neon.h (vadd_f32): Change #ifdef to __FAST_MATH.
-+
-+2014-07-30 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211503.
-+ 2014-06-12 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vmlaq_n_f64, vmlsq_n_f64, vrsrtsq_f64,
-+ vcge_p8, vcgeq_p8, vcgez_p8, vcgez_u8, vcgez_u16, vcgez_u32, vcgez_u64,
-+ vcgezq_p8, vcgezq_u8, vcgezq_u16, vcgezq_u32, vcgezq_u64, vcgezd_u64,
-+ vcgt_p8, vcgtq_p8, vcgtz_p8, vcgtz_u8, vcgtz_u16, vcgtz_u32, vcgtz_u64,
-+ vcgtzq_p8, vcgtzq_u8, vcgtzq_u16, vcgtzq_u32, vcgtzq_u64, vcgtzd_u64,
-+ vcle_p8, vcleq_p8, vclez_p8, vclez_u64, vclezq_p8, vclezd_u64, vclt_p8,
-+ vcltq_p8, vcltz_p8, vcltzq_p8, vcltzd_u64): Remove functions as they are
-+ not in the spec.
-+
-+2014-07-30 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211140.
-+ 2014-06-02 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.md (set_fpcr): Drop ISB after FPCR write.
-+
-+2014-07-29 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-07-20 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Revert:
-+ 2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211129.
-+ 2014-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ PR target/61154
-+ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
-+ * config/arm/arm.md (mov64 splitter): Replace const_double_operand
-+ with immediate_operand.
-+
-+2014-07-19 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211887, r211899.
-+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to
-+ "yes" where needed.
-+
-+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in
-+ vector registers.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211440.
-+ 2014-06-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers.
-+ * Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to
-+ dependencies.
-+ * config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define.
-+ (aarch64_crc_builtin_datum): New struct.
-+ (aarch64_crc_builtin_data): New.
-+ (aarch64_init_crc32_builtins): New function.
-+ (aarch64_init_builtins): Initialise CRC32 builtins when appropriate.
-+ (aarch64_crc32_expand_builtin): New.
-+ (aarch64_expand_builtin): Add CRC32 builtin expansion case.
-+ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
-+ __ARM_FEATURE_CRC32 when appropriate.
-+ (TARGET_CRC32): Define.
-+ * config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H,
-+ UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH,
-+ UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values.
-+ (aarch64_<crc_variant>): New pattern.
-+ * config/aarch64/arm_acle.h: New file.
-+ * config/aarch64/iterators.md (CRC): New int iterator.
-+ (crc_variant, crc_mode): New int attributes.
-+ * doc/aarch64-acle-intrinsics.texi: New file.
-+ * doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics.
-+ Include aarch64-acle-intrinsics.texi.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211174.
-+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
-+ New pattern.
-+ * config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
-+ (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
-+ * config/aarch64/iterators.md (REVERSE): New iterator.
-+ (UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
-+ (rev_op): New int_attribute.
-+ * config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
-+ vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
-+ vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
-+ vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
-+ vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
-+ vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
-+ vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
-+ Replace temporary __asm__ with __builtin_shuffle.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210216, r210218, r210219.
-+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm_neon.h: Update comment.
-+ * config/arm/neon-docgen.ml: Delete.
-+ * config/arm/neon-gen.ml: Delete.
-+ * doc/arm-neon-intrinsics.texi: Update comment.
-+
-+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf
-+ and v4sf versions.
-+ (vand, vorr, veor, vorn, vbic): Remove.
-+ * config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust
-+ iterator.
-+ (neon_vsub_unspec): Likewise.
-+ (neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove.
-+
-+ 2014-05-08 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm_neon.h (vadd_s8): GNU C implementation
-+ (vadd_s16): Likewise.
-+ (vadd_s32): Likewise.
-+ (vadd_f32): Likewise.
-+ (vadd_u8): Likewise.
-+ (vadd_u16): Likewise.
-+ (vadd_u32): Likewise.
-+ (vadd_s64): Likewise.
-+ (vadd_u64): Likewise.
-+ (vaddq_s8): Likewise.
-+ (vaddq_s16): Likewise.
-+ (vaddq_s32): Likewise.
-+ (vaddq_s64): Likewise.
-+ (vaddq_f32): Likewise.
-+ (vaddq_u8): Likewise.
-+ (vaddq_u16): Likewise.
-+ (vaddq_u32): Likewise.
-+ (vaddq_u64): Likewise.
-+ (vmul_s8): Likewise.
-+ (vmul_s16): Likewise.
-+ (vmul_s32): Likewise.
-+ (vmul_f32): Likewise.
-+ (vmul_u8): Likewise.
-+ (vmul_u16): Likewise.
-+ (vmul_u32): Likewise.
-+ (vmul_p8): Likewise.
-+ (vmulq_s8): Likewise.
-+ (vmulq_s16): Likewise.
-+ (vmulq_s32): Likewise.
-+ (vmulq_f32): Likewise.
-+ (vmulq_u8): Likewise.
-+ (vmulq_u16): Likewise.
-+ (vmulq_u32): Likewise.
-+ (vsub_s8): Likewise.
-+ (vsub_s16): Likewise.
-+ (vsub_s32): Likewise.
-+ (vsub_f32): Likewise.
-+ (vsub_u8): Likewise.
-+ (vsub_u16): Likewise.
-+ (vsub_u32): Likewise.
-+ (vsub_s64): Likewise.
-+ (vsub_u64): Likewise.
-+ (vsubq_s8): Likewise.
-+ (vsubq_s16): Likewise.
-+ (vsubq_s32): Likewise.
-+ (vsubq_s64): Likewise.
-+ (vsubq_f32): Likewise.
-+ (vsubq_u8): Likewise.
-+ (vsubq_u16): Likewise.
-+ (vsubq_u32): Likewise.
-+ (vsubq_u64): Likewise.
-+ (vand_s8): Likewise.
-+ (vand_s16): Likewise.
-+ (vand_s32): Likewise.
-+ (vand_u8): Likewise.
-+ (vand_u16): Likewise.
-+ (vand_u32): Likewise.
-+ (vand_s64): Likewise.
-+ (vand_u64): Likewise.
-+ (vandq_s8): Likewise.
-+ (vandq_s16): Likewise.
-+ (vandq_s32): Likewise.
-+ (vandq_s64): Likewise.
-+ (vandq_u8): Likewise.
-+ (vandq_u16): Likewise.
-+ (vandq_u32): Likewise.
-+ (vandq_u64): Likewise.
-+ (vorr_s8): Likewise.
-+ (vorr_s16): Likewise.
-+ (vorr_s32): Likewise.
-+ (vorr_u8): Likewise.
-+ (vorr_u16): Likewise.
-+ (vorr_u32): Likewise.
-+ (vorr_s64): Likewise.
-+ (vorr_u64): Likewise.
-+ (vorrq_s8): Likewise.
-+ (vorrq_s16): Likewise.
-+ (vorrq_s32): Likewise.
-+ (vorrq_s64): Likewise.
-+ (vorrq_u8): Likewise.
-+ (vorrq_u16): Likewise.
-+ (vorrq_u32): Likewise.
-+ (vorrq_u64): Likewise.
-+ (veor_s8): Likewise.
-+ (veor_s16): Likewise.
-+ (veor_s32): Likewise.
-+ (veor_u8): Likewise.
-+ (veor_u16): Likewise.
-+ (veor_u32): Likewise.
-+ (veor_s64): Likewise.
-+ (veor_u64): Likewise.
-+ (veorq_s8): Likewise.
-+ (veorq_s16): Likewise.
-+ (veorq_s32): Likewise.
-+ (veorq_s64): Likewise.
-+ (veorq_u8): Likewise.
-+ (veorq_u16): Likewise.
-+ (veorq_u32): Likewise.
-+ (veorq_u64): Likewise.
-+ (vbic_s8): Likewise.
-+ (vbic_s16): Likewise.
-+ (vbic_s32): Likewise.
-+ (vbic_u8): Likewise.
-+ (vbic_u16): Likewise.
-+ (vbic_u32): Likewise.
-+ (vbic_s64): Likewise.
-+ (vbic_u64): Likewise.
-+ (vbicq_s8): Likewise.
-+ (vbicq_s16): Likewise.
-+ (vbicq_s32): Likewise.
-+ (vbicq_s64): Likewise.
-+ (vbicq_u8): Likewise.
-+ (vbicq_u16): Likewise.
-+ (vbicq_u32): Likewise.
-+ (vbicq_u64): Likewise.
-+ (vorn_s8): Likewise.
-+ (vorn_s16): Likewise.
-+ (vorn_s32): Likewise.
-+ (vorn_u8): Likewise.
-+ (vorn_u16): Likewise.
-+ (vorn_u32): Likewise.
-+ (vorn_s64): Likewise.
-+ (vorn_u64): Likewise.
-+ (vornq_s8): Likewise.
-+ (vornq_s16): Likewise.
-+ (vornq_s32): Likewise.
-+ (vornq_s64): Likewise.
-+ (vornq_u8): Likewise.
-+ (vornq_u16): Likewise.
-+ (vornq_u32): Likewise.
-+ (vornq_u64): Likewise.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210151.
-+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8,
-+ vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32,
-+ vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32,
-+ vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32,
-+ vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8,
-+ vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16,
-+ vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16,
-+ vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209794.
-+ 2014-04-25 Marek Polacek <polacek@redhat.com>
-+
-+ PR c/60114
-+ * c-parser.c (c_parser_initelt): Pass input_location to
-+ process_init_element.
-+ (c_parser_initval): Pass loc to process_init_element.
-+ * c-tree.h (process_init_element): Adjust declaration.
-+ * c-typeck.c (push_init_level): Pass input_location to
-+ process_init_element.
-+ (pop_init_level): Likewise.
-+ (set_designator): Likewise.
-+ (output_init_element): Add location_t parameter. Pass loc to
-+ digest_init.
-+ (output_pending_init_elements): Pass input_location to
-+ output_init_element.
-+ (process_init_element): Add location_t parameter. Pass loc to
-+ output_init_element.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211771.
-+ 2014-06-18 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * genattrtab.c (n_bypassed): New variable.
-+ (process_bypasses): Initialise n_bypassed.
-+ Count number of bypassed reservations.
-+ (make_automaton_attrs): Allocate space for bypassed reservations
-+ rather than number of bypasses.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210861.
-+ 2014-05-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/predicates.md (aarch64_call_insn_operand): New
-+ predicate.
-+ * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints.
-+ * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn):
-+ Adjust for tailcalling through registers.
-+ * config/aarch64/aarch64.h (enum reg_class): New caller save
-+ register class.
-+ (REG_CLASS_NAMES): Likewise.
-+ (REG_CLASS_CONTENTS): Likewise.
-+ * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall):
-+ Allow tailcalling without decls.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211314.
-+ 2014-06-06 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New.
-+ * config/aarch64/aarch64.c (aarch64_move_pointer): New.
-+ (aarch64_progress_pointer): Likewise.
-+ (aarch64_copy_one_part_and_move_pointers): Likewise.
-+ (aarch64_expand_movmen): Likewise.
-+ * config/aarch64/aarch64.h (MOVE_RATIO): Set low.
-+ * config/aarch64/aarch64.md (movmem<mode>): New.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211185, 211186.
-+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc/config/aarch64/aarch64-builtins.c
-+ (aarch64_types_binop_uus_qualifiers,
-+ aarch64_types_shift_to_unsigned_qualifiers,
-+ aarch64_types_unsigned_shiftacc_qualifiers): Define.
-+ * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd,
-+ uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n,
-+ sqshlu_n, uqshl_n): Update qualifiers.
-+ * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32,
-+ vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8,
-+ vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32,
-+ vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8,
-+ vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16,
-+ vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32,
-+ vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64,
-+ vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16,
-+ vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64,
-+ vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16,
-+ vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32,
-+ vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64,
-+ vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8,
-+ vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8,
-+ vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16,
-+ vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32,
-+ vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64,
-+ vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8,
-+ vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8,
-+ vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16,
-+ vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16,
-+ vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32,
-+ vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64,
-+ vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8,
-+ vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8,
-+ vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16,
-+ vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts.
-+
-+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc/config/aarch64/aarch64-builtins.c
-+ (aarch64_types_binop_ssu_qualifiers): New static data.
-+ (TYPES_BINOP_SSU): Define.
-+ * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl,
-+ urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47
-+ * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32,
-+ vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64,
-+ vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50
-+ vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8,
-+ vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52
-+ vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53
-+ vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness
-+ suffix to builtin function name, remove cast. 55
-+ (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32,
-+ vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57
-+ vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211408, 211416.
-+ 2014-06-10 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix
-+ REG_CFA_RESTORE mode.
-+
-+ 2014-06-10 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs)
-+ (aarch64_save_or_restore_callee_save_registers): Fix layout.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211418.
-+ 2014-06-10 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md (move_lo_quad_<mode>):
-+ Change second alternative type to f_mcr.
-+ * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th
-+ and 12th alternatives' types to f_mcr and f_mrc.
-+ (*movdi_aarch64): Same for 12th and 13th alternatives.
-+ (*movsf_aarch64): Change 9th alternatives' type to mov_reg.
-+ (aarch64_movtilow_tilow): Change type to fmov.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211371.
-+ 2014-06-09 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm-modes.def: Remove XFmode.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211268.
-+ 2014-06-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack
-+ layout comment.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211129.
-+ 2014-06-02 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ PR target/61154
-+ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
-+ * config/arm/arm.md (mov64 splitter): Replace const_double_operand
-+ with immediate_operand.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211073.
-+ 2014-05-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw
-+ to mov_imm.
-+ * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211050.
-+ 2014-05-29 Richard Earnshaw <rearnsha@arm.com>
-+ Richard Sandiford <rdsandiford@googlemail.com>
-+
-+ * arm/iterators.md (shiftable_ops): New code iterator.
-+ (t2_binop0, arith_shift_insn): New code attributes.
-+ * arm/predicates.md (shift_nomul_operator): New predicate.
-+ * arm/arm.md (insn_enabled): Delete.
-+ (enabled): Remove insn_enabled test.
-+ (*arith_shiftsi): Delete. Replace with ...
-+ (*<arith_shift_insn>_multsi): ... new pattern.
-+ (*<arith_shift_insn>_shiftsi): ... new pattern.
-+ * config/arm/arm.c (arm_print_operand): Handle operand format 'b'.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210996.
-+ 2014-05-27 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64.md (stack_protect_set_<mode>):
-+ Use <w> for the register in assembly template.
-+ (stack_protect_test): Use the mode of operands[0] for the
-+ result.
-+ (stack_protect_test_<mode>): Use <w> for the register
-+ in assembly template.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210967.
-+ 2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/neon.md (neon_bswap<mode>): New pattern.
-+ * config/arm/arm.c (neon_itype): Add NEON_BSWAP.
-+ (arm_init_neon_builtins): Handle NEON_BSWAP.
-+ Define required type nodes.
-+ (arm_expand_neon_builtin): Handle NEON_BSWAP.
-+ (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
-+ * config/arm/arm_neon_builtins.def (bswap): Define builtins.
-+ * config/arm/iterators.md (VDQHSD): New mode iterator.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210471.
-+ 2014-05-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL
-+ enum name for PARAM_SCHED_PRESSURE_ALGORITHM.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210369.
-+ 2014-05-13 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR.
-+ (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR.
-+ Remove associated type declarations and initialisations.
-+ (arm_expand_neon_builtin): Likewise.
-+ (neon_emit_pair_result_insn): Delete.
-+ * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete.
-+ * config/arm/neon.md (neon_vtrn<mode>): Delete.
-+ (neon_vzip<mode>): Likewise.
-+ (neon_vuzp<mode>): Likewise.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211058, 211177.
-+ 2014-05-29 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
-+ TYPES_BINOPV): New static data.
-+ * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
-+ * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
-+ New patterns.
-+ * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
-+ patterns for EXT.
-+ (aarch64_evpc_ext): New function.
-+
-+ * config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
-+
-+ * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
-+ vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
-+ vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
-+ vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
-+ vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
-+
-+ 2014-06-03 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle
-+ location == 0.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209797.
-+ 2014-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p):
-+ Use HOST_WIDE_INT_C for mask literal.
-+ (aarch_rev16_shleft_mask_imm_p): Likewise.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211148.
-+ 2014-06-02 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER):
-+ /lib/ld-linux32-aarch64.so.1 is used for ILP32.
-+ (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32.
-+ file whose name depends on -mabi= and -mbig-endian.
-+ * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64
-+ better and handle ilp32 too.
-+ (MULTILIB_OPTIONS): Delete.
-+ (MULTILIB_DIRNAMES): Delete.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210828, r211103.
-+ 2014-05-31 Kugan Vivekanandarajah <kuganv@linaro.org>
-+
-+ * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define.
-+ (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR.
-+ (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr
-+ and __builtins_arm_get_fpscr.
-+ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
-+ __builtins_arm_get_fpscr.
-+ (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and
-+ __builtins_arm_ldfpscr.
-+ (arm_atomic_assign_expand_fenv): New function.
-+ * config/arm/vfp.md (set_fpscr): New pattern.
-+ (get_fpscr) : Likewise.
-+ * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and
-+ VUNSPEC_SET_FPSCR.
-+ * doc/extend.texi (AARCH64 Built-in Functions) : Document
-+ __builtins_arm_set_fpscr, __builtins_arm_get_fpscr.
-+
-+ 2014-05-23 Kugan Vivekanandarajah <kuganv@linaro.org>
-+
-+ * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
-+ define.
-+ * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
-+ New function declaration.
-+ * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
-+ AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
-+ AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
-+ (aarch64_init_builtins) : Initialize builtins
-+ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
-+ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
-+ (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
-+ __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
-+ and __builtins_aarch64_set_fpsr.
-+ (aarch64_atomic_assign_expand_fenv): New function.
-+ * config/aarch64/aarch64.md (set_fpcr): New pattern.
-+ (get_fpcr) : Likewise.
-+ (set_fpsr) : Likewise.
-+ (get_fpsr) : Likewise.
-+ (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
-+ and UNSPECV_SET_FPSR.
-+ * doc/extend.texi (AARCH64 Built-in Functions) : Document
-+ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
-+ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210355.
-+ 2014-05-13 Ian Bolton <ian.bolton@arm.com>
-+
-+ * config/aarch64/aarch64-protos.h
-+ (aarch64_hard_regno_caller_save_mode): New prototype.
-+ * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode):
-+ New function.
-+ * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209943.
-+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8,
-+ vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32,
-+ vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32,
-+ vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32,
-+ vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8,
-+ vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16,
-+ vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16,
-+ vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle.
-+
-+2014-06-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-06-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Revert:
-+ 2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209643.
-+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-06-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498,
-+ 210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507,
-+ 210508, 210509, 210510, 210512, 211205, 211206.
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New.
-+ (cpu_addrcost_table): Use it.
-+ * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it.
-+ (aarch64_address_cost): Rewrite using aarch64_classify_address,
-+ move it.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (cortexa57_addrcost_table): New.
-+ (cortexa57_vector_cost): Likewise.
-+ (cortexa57_tunings): Use them.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New.
-+ (TARGET_RTX_COSTS): Call it.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally
-+ emit instructions, return number of instructions which would
-+ be emitted.
-+ (aarch64_add_constant): Update call to aarch64_build_constant.
-+ (aarch64_output_mi_thunk): Likewise.
-+ (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost
-+ a CONST_DOUBLE.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename
-+ to...
-+ (aarch64_strip_extend): ...this, don't strip shifts, check RTX is
-+ well formed.
-+ (aarch64_rtx_mult_cost): New.
-+ (aarch64_rtx_costs): Use it, refactor as appropriate.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philip Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing
-+ for SET RTX.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address
-+ costs when costing loads and stores to memory.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for
-+ logical operations.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost
-+ ZERO_EXTEND and SIGN_EXTEND better.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
-+ rotates and shifts.
-+
-+ 2014-03-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New.
-+ (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
-+ DIV/MOD.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison
-+ operators.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA,
-+ FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+ Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF,
-+ HIGH, LO_SUM.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case
-+ where we were unable to cost an RTX.
-+
-+ 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case.
-+
-+ 2014-06-03 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function.
-+ (aarch64_rtx_costs): Use aarch64_if_then_else_costs.
-+
-+ 2014-06-03 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non
-+ comparisons for OP0.
-+
-+2014-06-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-06-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211211.
-+ 2014-06-04 Bin Cheng <bin.cheng@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_classify_address)
-+ (aarch64_legitimize_reload_address): Support full addressing modes
-+ for vector modes.
-+ * config/aarch64/aarch64.md (mov<mode>, movmisalign<mode>)
-+ (*aarch64_simd_mov<mode>, *aarch64_simd_mov<mode>): Relax predicates.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209906.
-+ 2014-04-29 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8,
-+ vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32,
-+ vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32,
-+ vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32,
-+ vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8,
-+ vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16,
-+ vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16,
-+ vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209897.
-+ 2014-04-29 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * calls.c (initialize_argument_information): Always treat
-+ PUSH_ARGS_REVERSED as 1, simplify code accordingly.
-+ (expand_call): Likewise.
-+ (emit_library_call_calue_1): Likewise.
-+ * expr.c (PUSH_ARGS_REVERSED): Do not define.
-+ (emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify
-+ code accordingly.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209880.
-+ 2014-04-28 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c
-+ (aarch64_types_storestruct_lane_qualifiers): New.
-+ (TYPES_STORESTRUCT_LANE): Likewise.
-+ * config/aarch64/aarch64-simd-builtins.def (st2_lane): New.
-+ (st3_lane): Likewise.
-+ (st4_lane): Likewise.
-+ * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New.
-+ (vec_store_lanesci_lane<mode>): Likewise.
-+ (vec_store_lanesxi_lane<mode>): Likewise.
-+ (aarch64_st2_lane<VQ:mode>): Likewise.
-+ (aarch64_st3_lane<VQ:mode>): Likewise.
-+ (aarch64_st4_lane<VQ:mode>): Likewise.
-+ * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE.
-+ * config/aarch64/arm_neon.h
-+ (__ST2_LANE_FUNC): Rewrite using builtins, update use points to
-+ use new macro arguments.
-+ (__ST3_LANE_FUNC): Likewise.
-+ (__ST4_LANE_FUNC): Likewise.
-+ * config/aarch64/iterators.md (V_TWO_ELEM): New.
-+ (V_THREE_ELEM): Likewise.
-+ (V_FOUR_ELEM): Likewise.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209878.
-+ 2014-04-28 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New.
-+ * config/aarch64/aarch64.c
-+ (aarch64_cannot_change_mode_class): Weaken conditions.
-+ (aarch64_modes_tieable_p): New.
-+ * config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209808.
-+ 2014-04-25 Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/arm/predicates.md (call_insn_operand): Add long_call check.
-+ * config/arm/arm.md (sibcall, sibcall_value): Force the address to
-+ reg for long_call.
-+ * config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call
-+ restriction.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209806.
-+ 2014-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (arm_cortex_a8_tune): Initialise
-+ T16-related fields.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209742, 209749.
-+ 2014-04-24 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian.
-+
-+ 2014-04-24 Tejas Belagod <tejas.belagod@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements
-+ for big-endian.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209736.
-+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c
-+ (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
-+ BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
-+ * config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
-+ * config/aarch64/aarch64-simd-builtins.def: Define vector bswap
-+ builtins.
-+ * config/aarch64/iterator.md (VDQHSD): New mode iterator.
-+ (Vrevsuff): New mode attribute.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209712.
-+ 2014-04-23 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
-+ (stack_protect_set_<mode>, stack_protect_test_<mode>): Add
-+ machine descriptions for Stack Smashing Protector.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209711.
-+ 2014-04-23 Richard Earnshaw <rearnsha@arm.com>
-+
-+ * aarch64.md (<optab>_rol<mode>3): New pattern.
-+ (<optab>_rolsi3_uxtw): Likewise.
-+ * aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209710.
-+ 2014-04-23 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields.
-+ (arm_cortex_a12_tune): Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209706.
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209701, 209702, 209703, 209704, 209705.
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.md (arm_rev16si2): New pattern.
-+ (arm_rev16si2_alt): Likewise.
-+ * config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case.
-+
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+ * config/aarch64/aarch64.md (rev16<mode>2): New pattern.
-+ (rev16<mode>2_alt): Likewise.
-+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
-+ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
-+ (aarch_rev16_shleft_mask_imm_p): Likewise.
-+ (aarch_rev16_p_1): Likewise.
-+ (aarch_rev16_p): Likewise.
-+ * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
-+ (aarch_rev16_shright_mask_imm_p): Likewise.
-+ (aarch_rev16_shleft_mask_imm_p): Likewise.
-+
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
-+ * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify
-+ rev cost.
-+ (cortex_a53_extra_costs): Likewise.
-+ (cortex_a57_extra_costs): Likewise.
-+ * config/arm/arm.c (cortexa9_extra_costs): Likewise.
-+ (cortexa7_extra_costs): Likewise.
-+ (cortexa8_extra_costs): Likewise.
-+ (cortexa12_extra_costs): Likewise.
-+ (cortexa15_extra_costs): Likewise.
-+ (v7m_extra_costs): Likewise.
-+ (arm_new_rtx_costs): Handle BSWAP.
-+
-+ 2013-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (cortexa8_extra_costs): New table.
-+ (arm_cortex_a8_tune): New tuning struct.
-+ * config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct.
-+
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * config/arm/arm.c (arm_new_rtx_costs): Handle FMA.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209659.
-+ 2014-04-22 Richard Henderson <rth@redhat.com>
-+
-+ * config/aarch64/aarch64 (addti3, subti3): New expanders.
-+ (add<GPI>3_compare0): Remove leading * from name.
-+ (add<GPI>3_carryin): Likewise.
-+ (sub<GPI>3_compare0): Likewise.
-+ (sub<GPI>3_carryin): Likewise.
-+ (<su_optab>mulditi3): New expander.
-+ (multi3): New expander.
-+ (madd<GPI>): Remove leading * from name.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209645.
-+ 2014-04-22 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
-+ Handle TLS for ILP32.
-+ * config/aarch64/aarch64.md (tlsie_small): Rename to ...
-+ (tlsie_small_<mode>): this and handle PTR.
-+ (tlsie_small_sidi): New pattern.
-+ (tlsle_small): Change to an expand to handle ILP32.
-+ (tlsle_small_<mode>): New pattern.
-+ (tlsdesc_small): Rename to ...
-+ (tlsdesc_small_<mode>): this and handle PTR.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209643.
-+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209641, 209642.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
-+ (aarch64_types_signed_unsigned_qualifiers): Qualifier added.
-+ (aarch64_types_signed_poly_qualifiers): Likewise.
-+ (aarch64_types_unsigned_signed_qualifiers): Likewise.
-+ (aarch64_types_poly_signed_qualifiers): Likewise.
-+ (TYPES_REINTERP_SS): Type macro added.
-+ (TYPES_REINTERP_SU): Likewise.
-+ (TYPES_REINTERP_SP): Likewise.
-+ (TYPES_REINTERP_US): Likewise.
-+ (TYPES_REINTERP_PS): Likewise.
-+ (aarch64_fold_builtin): New expression folding added.
-+ * config/aarch64/aarch64-simd-builtins.def (REINTERP):
-+ Declarations removed.
-+ (REINTERP_SS): Declarations added.
-+ (REINTERP_US): Likewise.
-+ (REINTERP_PS): Likewise.
-+ (REINTERP_SU): Likewise.
-+ (REINTERP_SP): Likewise.
-+ * config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented.
-+ (vreinterpretq_p8_f64): Likewise.
-+ (vreinterpret_p16_f64): Likewise.
-+ (vreinterpretq_p16_f64): Likewise.
-+ (vreinterpret_f32_f64): Likewise.
-+ (vreinterpretq_f32_f64): Likewise.
-+ (vreinterpret_f64_f32): Likewise.
-+ (vreinterpret_f64_p8): Likewise.
-+ (vreinterpret_f64_p16): Likewise.
-+ (vreinterpret_f64_s8): Likewise.
-+ (vreinterpret_f64_s16): Likewise.
-+ (vreinterpret_f64_s32): Likewise.
-+ (vreinterpret_f64_s64): Likewise.
-+ (vreinterpret_f64_u8): Likewise.
-+ (vreinterpret_f64_u16): Likewise.
-+ (vreinterpret_f64_u32): Likewise.
-+ (vreinterpret_f64_u64): Likewise.
-+ (vreinterpretq_f64_f32): Likewise.
-+ (vreinterpretq_f64_p8): Likewise.
-+ (vreinterpretq_f64_p16): Likewise.
-+ (vreinterpretq_f64_s8): Likewise.
-+ (vreinterpretq_f64_s16): Likewise.
-+ (vreinterpretq_f64_s32): Likewise.
-+ (vreinterpretq_f64_s64): Likewise.
-+ (vreinterpretq_f64_u8): Likewise.
-+ (vreinterpretq_f64_u16): Likewise.
-+ (vreinterpretq_f64_u32): Likewise.
-+ (vreinterpretq_f64_u64): Likewise.
-+ (vreinterpret_s64_f64): Likewise.
-+ (vreinterpretq_s64_f64): Likewise.
-+ (vreinterpret_u64_f64): Likewise.
-+ (vreinterpretq_u64_f64): Likewise.
-+ (vreinterpret_s8_f64): Likewise.
-+ (vreinterpretq_s8_f64): Likewise.
-+ (vreinterpret_s16_f64): Likewise.
-+ (vreinterpretq_s16_f64): Likewise.
-+ (vreinterpret_s32_f64): Likewise.
-+ (vreinterpretq_s32_f64): Likewise.
-+ (vreinterpret_u8_f64): Likewise.
-+ (vreinterpretq_u8_f64): Likewise.
-+ (vreinterpret_u16_f64): Likewise.
-+ (vreinterpretq_u16_f64): Likewise.
-+ (vreinterpret_u32_f64): Likewise.
-+ (vreinterpretq_u32_f64): Likewise.
-+
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
-+ * config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed.
-+ (vreinterpret_p8_s8): Likewise.
-+ * config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast.
-+ (vreinterpret_p8_s16): Likewise.
-+ (vreinterpret_p8_s32): Likewise.
-+ (vreinterpret_p8_s64): Likewise.
-+ (vreinterpret_p8_f32): Likewise.
-+ (vreinterpret_p8_u8): Likewise.
-+ (vreinterpret_p8_u16): Likewise.
-+ (vreinterpret_p8_u32): Likewise.
-+ (vreinterpret_p8_u64): Likewise.
-+ (vreinterpret_p8_p16): Likewise.
-+ (vreinterpretq_p8_s8): Likewise.
-+ (vreinterpretq_p8_s16): Likewise.
-+ (vreinterpretq_p8_s32): Likewise.
-+ (vreinterpretq_p8_s64): Likewise.
-+ (vreinterpretq_p8_f32): Likewise.
-+ (vreinterpretq_p8_u8): Likewise.
-+ (vreinterpretq_p8_u16): Likewise.
-+ (vreinterpretq_p8_u32): Likewise.
-+ (vreinterpretq_p8_u64): Likewise.
-+ (vreinterpretq_p8_p16): Likewise.
-+ (vreinterpret_p16_s8): Likewise.
-+ (vreinterpret_p16_s16): Likewise.
-+ (vreinterpret_p16_s32): Likewise.
-+ (vreinterpret_p16_s64): Likewise.
-+ (vreinterpret_p16_f32): Likewise.
-+ (vreinterpret_p16_u8): Likewise.
-+ (vreinterpret_p16_u16): Likewise.
-+ (vreinterpret_p16_u32): Likewise.
-+ (vreinterpret_p16_u64): Likewise.
-+ (vreinterpret_p16_p8): Likewise.
-+ (vreinterpretq_p16_s8): Likewise.
-+ (vreinterpretq_p16_s16): Likewise.
-+ (vreinterpretq_p16_s32): Likewise.
-+ (vreinterpretq_p16_s64): Likewise.
-+ (vreinterpretq_p16_f32): Likewise.
-+ (vreinterpretq_p16_u8): Likewise.
-+ (vreinterpretq_p16_u16): Likewise.
-+ (vreinterpretq_p16_u32): Likewise.
-+ (vreinterpretq_p16_u64): Likewise.
-+ (vreinterpretq_p16_p8): Likewise.
-+ (vreinterpret_f32_s8): Likewise.
-+ (vreinterpret_f32_s16): Likewise.
-+ (vreinterpret_f32_s32): Likewise.
-+ (vreinterpret_f32_s64): Likewise.
-+ (vreinterpret_f32_u8): Likewise.
-+ (vreinterpret_f32_u16): Likewise.
-+ (vreinterpret_f32_u32): Likewise.
-+ (vreinterpret_f32_u64): Likewise.
-+ (vreinterpret_f32_p8): Likewise.
-+ (vreinterpret_f32_p16): Likewise.
-+ (vreinterpretq_f32_s8): Likewise.
-+ (vreinterpretq_f32_s16): Likewise.
-+ (vreinterpretq_f32_s32): Likewise.
-+ (vreinterpretq_f32_s64): Likewise.
-+ (vreinterpretq_f32_u8): Likewise.
-+ (vreinterpretq_f32_u16): Likewise.
-+ (vreinterpretq_f32_u32): Likewise.
-+ (vreinterpretq_f32_u64): Likewise.
-+ (vreinterpretq_f32_p8): Likewise.
-+ (vreinterpretq_f32_p16): Likewise.
-+ (vreinterpret_s64_s8): Likewise.
-+ (vreinterpret_s64_s16): Likewise.
-+ (vreinterpret_s64_s32): Likewise.
-+ (vreinterpret_s64_f32): Likewise.
-+ (vreinterpret_s64_u8): Likewise.
-+ (vreinterpret_s64_u16): Likewise.
-+ (vreinterpret_s64_u32): Likewise.
-+ (vreinterpret_s64_u64): Likewise.
-+ (vreinterpret_s64_p8): Likewise.
-+ (vreinterpret_s64_p16): Likewise.
-+ (vreinterpretq_s64_s8): Likewise.
-+ (vreinterpretq_s64_s16): Likewise.
-+ (vreinterpretq_s64_s32): Likewise.
-+ (vreinterpretq_s64_f32): Likewise.
-+ (vreinterpretq_s64_u8): Likewise.
-+ (vreinterpretq_s64_u16): Likewise.
-+ (vreinterpretq_s64_u32): Likewise.
-+ (vreinterpretq_s64_u64): Likewise.
-+ (vreinterpretq_s64_p8): Likewise.
-+ (vreinterpretq_s64_p16): Likewise.
-+ (vreinterpret_u64_s8): Likewise.
-+ (vreinterpret_u64_s16): Likewise.
-+ (vreinterpret_u64_s32): Likewise.
-+ (vreinterpret_u64_s64): Likewise.
-+ (vreinterpret_u64_f32): Likewise.
-+ (vreinterpret_u64_u8): Likewise.
-+ (vreinterpret_u64_u16): Likewise.
-+ (vreinterpret_u64_u32): Likewise.
-+ (vreinterpret_u64_p8): Likewise.
-+ (vreinterpret_u64_p16): Likewise.
-+ (vreinterpretq_u64_s8): Likewise.
-+ (vreinterpretq_u64_s16): Likewise.
-+ (vreinterpretq_u64_s32): Likewise.
-+ (vreinterpretq_u64_s64): Likewise.
-+ (vreinterpretq_u64_f32): Likewise.
-+ (vreinterpretq_u64_u8): Likewise.
-+ (vreinterpretq_u64_u16): Likewise.
-+ (vreinterpretq_u64_u32): Likewise.
-+ (vreinterpretq_u64_p8): Likewise.
-+ (vreinterpretq_u64_p16): Likewise.
-+ (vreinterpret_s8_s16): Likewise.
-+ (vreinterpret_s8_s32): Likewise.
-+ (vreinterpret_s8_s64): Likewise.
-+ (vreinterpret_s8_f32): Likewise.
-+ (vreinterpret_s8_u8): Likewise.
-+ (vreinterpret_s8_u16): Likewise.
-+ (vreinterpret_s8_u32): Likewise.
-+ (vreinterpret_s8_u64): Likewise.
-+ (vreinterpret_s8_p8): Likewise.
-+ (vreinterpret_s8_p16): Likewise.
-+ (vreinterpretq_s8_s16): Likewise.
-+ (vreinterpretq_s8_s32): Likewise.
-+ (vreinterpretq_s8_s64): Likewise.
-+ (vreinterpretq_s8_f32): Likewise.
-+ (vreinterpretq_s8_u8): Likewise.
-+ (vreinterpretq_s8_u16): Likewise.
-+ (vreinterpretq_s8_u32): Likewise.
-+ (vreinterpretq_s8_u64): Likewise.
-+ (vreinterpretq_s8_p8): Likewise.
-+ (vreinterpretq_s8_p16): Likewise.
-+ (vreinterpret_s16_s8): Likewise.
-+ (vreinterpret_s16_s32): Likewise.
-+ (vreinterpret_s16_s64): Likewise.
-+ (vreinterpret_s16_f32): Likewise.
-+ (vreinterpret_s16_u8): Likewise.
-+ (vreinterpret_s16_u16): Likewise.
-+ (vreinterpret_s16_u32): Likewise.
-+ (vreinterpret_s16_u64): Likewise.
-+ (vreinterpret_s16_p8): Likewise.
-+ (vreinterpret_s16_p16): Likewise.
-+ (vreinterpretq_s16_s8): Likewise.
-+ (vreinterpretq_s16_s32): Likewise.
-+ (vreinterpretq_s16_s64): Likewise.
-+ (vreinterpretq_s16_f32): Likewise.
-+ (vreinterpretq_s16_u8): Likewise.
-+ (vreinterpretq_s16_u16): Likewise.
-+ (vreinterpretq_s16_u32): Likewise.
-+ (vreinterpretq_s16_u64): Likewise.
-+ (vreinterpretq_s16_p8): Likewise.
-+ (vreinterpretq_s16_p16): Likewise.
-+ (vreinterpret_s32_s8): Likewise.
-+ (vreinterpret_s32_s16): Likewise.
-+ (vreinterpret_s32_s64): Likewise.
-+ (vreinterpret_s32_f32): Likewise.
-+ (vreinterpret_s32_u8): Likewise.
-+ (vreinterpret_s32_u16): Likewise.
-+ (vreinterpret_s32_u32): Likewise.
-+ (vreinterpret_s32_u64): Likewise.
-+ (vreinterpret_s32_p8): Likewise.
-+ (vreinterpret_s32_p16): Likewise.
-+ (vreinterpretq_s32_s8): Likewise.
-+ (vreinterpretq_s32_s16): Likewise.
-+ (vreinterpretq_s32_s64): Likewise.
-+ (vreinterpretq_s32_f32): Likewise.
-+ (vreinterpretq_s32_u8): Likewise.
-+ (vreinterpretq_s32_u16): Likewise.
-+ (vreinterpretq_s32_u32): Likewise.
-+ (vreinterpretq_s32_u64): Likewise.
-+ (vreinterpretq_s32_p8): Likewise.
-+ (vreinterpretq_s32_p16): Likewise.
-+ (vreinterpret_u8_s8): Likewise.
-+ (vreinterpret_u8_s16): Likewise.
-+ (vreinterpret_u8_s32): Likewise.
-+ (vreinterpret_u8_s64): Likewise.
-+ (vreinterpret_u8_f32): Likewise.
-+ (vreinterpret_u8_u16): Likewise.
-+ (vreinterpret_u8_u32): Likewise.
-+ (vreinterpret_u8_u64): Likewise.
-+ (vreinterpret_u8_p8): Likewise.
-+ (vreinterpret_u8_p16): Likewise.
-+ (vreinterpretq_u8_s8): Likewise.
-+ (vreinterpretq_u8_s16): Likewise.
-+ (vreinterpretq_u8_s32): Likewise.
-+ (vreinterpretq_u8_s64): Likewise.
-+ (vreinterpretq_u8_f32): Likewise.
-+ (vreinterpretq_u8_u16): Likewise.
-+ (vreinterpretq_u8_u32): Likewise.
-+ (vreinterpretq_u8_u64): Likewise.
-+ (vreinterpretq_u8_p8): Likewise.
-+ (vreinterpretq_u8_p16): Likewise.
-+ (vreinterpret_u16_s8): Likewise.
-+ (vreinterpret_u16_s16): Likewise.
-+ (vreinterpret_u16_s32): Likewise.
-+ (vreinterpret_u16_s64): Likewise.
-+ (vreinterpret_u16_f32): Likewise.
-+ (vreinterpret_u16_u8): Likewise.
-+ (vreinterpret_u16_u32): Likewise.
-+ (vreinterpret_u16_u64): Likewise.
-+ (vreinterpret_u16_p8): Likewise.
-+ (vreinterpret_u16_p16): Likewise.
-+ (vreinterpretq_u16_s8): Likewise.
-+ (vreinterpretq_u16_s16): Likewise.
-+ (vreinterpretq_u16_s32): Likewise.
-+ (vreinterpretq_u16_s64): Likewise.
-+ (vreinterpretq_u16_f32): Likewise.
-+ (vreinterpretq_u16_u8): Likewise.
-+ (vreinterpretq_u16_u32): Likewise.
-+ (vreinterpretq_u16_u64): Likewise.
-+ (vreinterpretq_u16_p8): Likewise.
-+ (vreinterpretq_u16_p16): Likewise.
-+ (vreinterpret_u32_s8): Likewise.
-+ (vreinterpret_u32_s16): Likewise.
-+ (vreinterpret_u32_s32): Likewise.
-+ (vreinterpret_u32_s64): Likewise.
-+ (vreinterpret_u32_f32): Likewise.
-+ (vreinterpret_u32_u8): Likewise.
-+ (vreinterpret_u32_u16): Likewise.
-+ (vreinterpret_u32_u64): Likewise.
-+ (vreinterpret_u32_p8): Likewise.
-+ (vreinterpret_u32_p16): Likewise.
-+ (vreinterpretq_u32_s8): Likewise.
-+ (vreinterpretq_u32_s16): Likewise.
-+ (vreinterpretq_u32_s32): Likewise.
-+ (vreinterpretq_u32_s64): Likewise.
-+ (vreinterpretq_u32_f32): Likewise.
-+ (vreinterpretq_u32_u8): Likewise.
-+ (vreinterpretq_u32_u16): Likewise.
-+ (vreinterpretq_u32_u64): Likewise.
-+ (vreinterpretq_u32_p8): Likewise.
-+ (vreinterpretq_u32_p16): Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209640.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * gcc/config/aarch64/aarch64-simd.md (aarch64_s<optab><mode>):
-+ Pattern extended.
-+ * config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator
-+ extended.
-+ (sqabs): Likewise.
-+ * config/aarch64/arm_neon.h (vqneg_s64): New intrinsic.
-+ (vqnegd_s64): Likewise.
-+ (vqabs_s64): Likewise.
-+ (vqabsd_s64): Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209627, 209636.
-+ 2014-04-22 Renlin <renlin.li@arm.com>
-+ Jiong Wang <jiong.wang@arm.com>
-+
-+ * config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset".
-+ * config/aarch64/aarch64.c (aarch64_layout_frame)
-+ (aarch64_initial_elimination_offset): Likewise.
-+
-+ 2014-04-22 Marcus Shawcroft <marcus.shawcroft@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_initial_elimination_offset):
-+ Fix indentation.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209618.
-+ 2014-04-22 Renlin Li <Renlin.Li@arm.com>
-+
-+ * config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust
-+ the output asm format.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209617.
-+ 2014-04-22 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * config/aarch64/aarch64-simd.md
-+ (aarch64_cm<optab>di): Always split.
-+ (*aarch64_cm<optab>di): New.
-+ (aarch64_cmtstdi): Always split.
-+ (*aarch64_cmtstdi): New.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209615.
-+ 2014-04-22 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+
-+ * config/arm/arm.c (arm_hard_regno_mode_ok): Loosen
-+ restrictions on core registers for DImode values in Thumb2.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209613, r209614.
-+ 2014-04-22 Ian Bolton <ian.bolton@arm.com>
-+
-+ * config/arm/arm.md (*anddi_notdi_zesidi): New pattern.
-+ * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.
-+
-+ 2014-04-22 Ian Bolton <ian.bolton@arm.com>
-+
-+ * config/arm/thumb2.md (*iordi_notdi_di): New pattern.
-+ (*iordi_notzesidi_di): Likewise.
-+ (*iordi_notsesidi_di): Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209561.
-+ 2014-04-22 Ian Bolton <ian.bolton@arm.com>
-+
-+ * config/arm/arm-protos.h (tune_params): New struct members.
-+ * config/arm/arm.c: Initialise tune_params per processor.
-+ (thumb2_reorg): Suppress conversion from t32 to t16 when optimizing
-+ for speed, based on new tune_params.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209559.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
-+ added.
-+ * config/aarch64/aarch64-simd-builtins.def (frintn): Use added
-+ macro.
-+ * config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
-+ corrected.
-+ * config/aarch64/aarch64.md (<frint_pattern>): Likewise.
-+ * config/aarch64/arm_neon.h (vrnd_f64): Added.
-+ (vrnda_f64): Likewise.
-+ (vrndi_f64): Likewise.
-+ (vrndm_f64): Likewise.
-+ (vrndn_f64): Likewise.
-+ (vrndp_f64): Likewise.
-+ (vrndx_f64): Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209419.
-+ 2014-04-15 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR rtl-optimization/60663
-+ * config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case,
-+ avoid 0 cost.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209457.
-+ 2014-04-16 Andrew Pinski <apinski@cavium.com>
-+
-+ * config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32
-+ definition.
-+
-+2014-05-19 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+ * LINARO-VERSION: Update.
-+
-+2014-05-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209889.
-+ 2014-04-29 Zhenqiang Chen <zhenqiang.chen@linaro.org>
-+
-+ * config/aarch64/aarch64.md (mov<mode>cc): New for GPF.
-+
-+2014-05-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209556.
-+ 2014-04-22 Zhenqiang Chen <zhenqiang.chen@linaro.org>
-+
-+ * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure
-+ GET_MODE_SIZE argument is enum machine_mode.
-+
-+2014-04-28 Yvan Roux <yvan.roux@linaro.org>
-+
-+ * LINARO-VERSION: Bump version.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
-+ * LINARO-VERSION: New file.
-+ * configure.ac: Add Linaro version string.
---- a/src/gcc/testsuite/gcc.target/arm/pr44788.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr44788.c
-@@ -2,6 +2,8 @@
- /* { dg-require-effective-target arm_thumb2_ok } */
- /* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -march=armv7-a -mfpu=vfp3 -mfloat-abi=softfp" } */
-
-+extern void foo (float *);
-+
- void joint_decode(float* mlt_buffer1, int t) {
- int i;
- float decode_buffer[1060];
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
-@@ -5,8 +5,11 @@
-
- #define N 32
-
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
- int i = 0;
- /* Vectorizable. */
---- a/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+ int i = 0;
-+ /* Vectorizable. */
-+ for (i = 0; i < N; i++)
-+ output[i] = __builtin_lceilf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
-@@ -3,7 +3,7 @@
- /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
- /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
-
--extern void baz (float);
-+extern void bar (float);
-
- void
- foo (float *p, float a, int n)
-@@ -13,4 +13,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fldmdbs" } } */
-+/* { dg-final { scan-assembler "vldmdb.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+ register unsigned int r[50] asm ("r1"); /* { dg-error "suitable for a register" } */
-+ return r[1];
-+}
---- a/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
-+++ b/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile } */
-+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
- /* { dg-options "-O1" } */
-
- unsigned long long or64 (unsigned long long input)
---- a/src/gcc/testsuite/gcc.target/arm/pr58784.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c
-@@ -11,6 +11,9 @@
- char stepsRemoved;
- ptp_tlv_t tlv[1];
- } ptp_message_announce_t;
-+
-+extern void f (ptp_message_announce_t *);
-+
- int ptplib_send_announce(int sequenceId, int i)
- {
- ptp_message_announce_t tx_packet;
---- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
-@@ -0,0 +1,65 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef long long s64int;
-+typedef int s32int;
-+typedef unsigned long long u64int;
-+typedef unsigned int u32int;
-+
-+s64int
-+iordi_di_notdi (s64int a, s64int b)
-+{
-+ return (a | ~b);
-+}
-+
-+s64int
-+iordi_di_notzesidi (s64int a, u32int b)
-+{
-+ return (a | ~(u64int) b);
-+}
-+
-+s64int
-+iordi_notdi_zesidi (s64int a, u32int b)
-+{
-+ return (~a | (u64int) b);
-+}
-+
-+s64int
-+iordi_di_notsesidi (s64int a, s32int b)
-+{
-+ return (a | ~(s64int) b);
-+}
-+
-+int main ()
-+{
-+ s64int a64 = 0xdeadbeef00000000ll;
-+ s64int b64 = 0x000000004f4f0112ll;
-+ s64int c64 = 0xdeadbeef000f0000ll;
-+
-+ u32int c32 = 0x01124f4f;
-+ s32int d32 = 0xabbaface;
-+
-+ s64int z = iordi_di_notdi (a64, b64);
-+ if (z != 0xffffffffb0b0feedll)
-+ abort ();
-+
-+ z = iordi_di_notzesidi (a64, c32);
-+ if (z != 0xfffffffffeedb0b0ll)
-+ abort ();
-+
-+ z = iordi_notdi_zesidi (c64, c32);
-+ if (z != 0x21524110fff2ffffll)
-+ abort ();
-+
-+ z = iordi_di_notsesidi (a64, d32);
-+ if (z != 0xdeadbeef54450531ll)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
-@@ -3,7 +3,7 @@
- /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
- /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
-
--extern void baz (float);
-+extern void bar (float);
-
- void
- foo (float *p, float a, int n)
-@@ -13,4 +13,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fldmias" } } */
-+/* { dg-final { scan-assembler "vldmia.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/cold-lc.c
-+++ b/src/gcc/testsuite/gcc.target/arm/cold-lc.c
-@@ -7,6 +7,7 @@
- struct task_struct *task;
- };
- extern struct thread_info *current_thread_info (void);
-+extern int show_stack (struct task_struct *, unsigned long *);
-
- void dump_stack (void)
- {
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
-@@ -13,4 +13,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fldmdbd" } } */
-+/* { dg-final { scan-assembler "vldmdb.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
-@@ -12,4 +12,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fstmdbs" } } */
-+/* { dg-final { scan-assembler "vstmdb.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
-@@ -13,4 +13,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fldmiad" } } */
-+/* { dg-final { scan-assembler "vldmia.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
-@@ -12,4 +12,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fstmias" } } */
-+/* { dg-final { scan-assembler "vstmia.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
-@@ -12,4 +12,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fstmdbd" } } */
-+/* { dg-final { scan-assembler "vstmdb.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+ return __builtin_lceilf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+ return __builtin_lceil (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
-@@ -12,4 +12,4 @@
- while (n--);
- }
-
--/* { dg-final { scan-assembler "fstmiad" } } */
-+/* { dg-final { scan-assembler "vstmia.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s64.x"
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0. */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u64.x"
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0. */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32p8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s64.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_p16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qp16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u64.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_p16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32s16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32s16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32u16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32u16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64p16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64p16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
-@@ -0,0 +1,35 @@
-+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
-+
-+# This program is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3 of the License, or
-+# (at your option) any later version.
-+#
-+# This program is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3. If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an ARM target.
-+if ![istarget arm*-*-*] then {
-+ return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+ "" ""
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpp16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_p16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_p16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qp16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnp8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16s8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32u8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64p8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpp8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipp16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
-@@ -0,0 +1,26 @@
-+/* Test the `vextp64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_crypto_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_crypto } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly64x1_t in1 = {0};
-+ poly64x1_t in2 = {1};
-+ poly64x1_t actual = vext_p64 (in1, in2, 0);
-+ if (actual != in1)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0.
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnp16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_f32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qf32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipp8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_f32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
-@@ -0,0 +1,33 @@
-+/* Test the `vextQp64' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_crypto_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_crypto } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+poly64x2_t
-+test_vextq_p64_1 (poly64x2_t a, poly64x2_t b)
-+{
-+ return vextq_p64(a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ poly64x2_t in1 = {0, 1};
-+ poly64x2_t in2 = {2, 3};
-+ poly64x2_t actual = test_vextq_p64_1 (in1, in2);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != i + 1)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16p8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16p8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQp16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64f32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64f32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32p16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32p16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_p8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_f32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextp8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_p8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32s8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32s8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_s16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qs16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnf32' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_u16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qu16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu16' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16u8' ARM Neon intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16u8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+ int i = 0;
-+ /* Vectorizable. */
-+ for (i = 0; i < N; i++)
-+ output[i] = __builtin_lfloorf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr51835.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr51835.c
-@@ -13,5 +13,5 @@
- return (unsigned int)d;
- }
-
--/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
--/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
-+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
-+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
---- a/src/gcc/testsuite/gcc.target/arm/20031108-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/20031108-1.c
-@@ -20,6 +20,9 @@
-
- Rec_Pointer Ptr_Glob;
-
-+extern int Proc_7 (int, int, int *);
-+
-+void
- Proc_1 (Ptr_Val_Par)
- Rec_Pointer Ptr_Val_Par;
- {
---- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
-@@ -11,6 +11,8 @@
-
- #define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
-
-+extern void foo (int *, int *);
-+
- void
- bar (uint32_t *ptr, int y)
- {
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
-@@ -5,8 +5,11 @@
-
- #define N 32
-
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
- int i = 0;
- /* Vectorizable. */
---- a/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
-@@ -4,6 +4,8 @@
-
- #include <stdio.h>
-
-+extern int lseek(int, long, int);
-+
- int getFileStartAndLength (int fd, int *start_, size_t *length_)
- {
- int start, end;
---- a/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
-+++ b/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile } */
-+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
- /* { dg-options "-O1" } */
-
- unsigned long long xor64 (unsigned long long input)
---- a/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+ int i = 0;
-+ /* Vectorizable. */
-+ for (i = 0; i < N; i++)
-+ output[i] = __builtin_lroundf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
-+++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
-@@ -0,0 +1,12 @@
-+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */
-+/* { dg-options "-O2 -march=armv5te -marm" } */
-+/* { dg-final { scan-assembler "bx" } } */
-+/* { dg-final { scan-assembler-not "blx" } } */
-+
-+int lcal (int) __attribute__ ((long_call));
-+
-+int
-+dec (int a)
-+{
-+ return lcal (a);
-+}
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
-@@ -5,8 +5,11 @@
-
- #define N 32
-
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
- int i = 0;
- /* Vectorizable. */
---- a/src/gcc/testsuite/gcc.target/arm/pr61948.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr61948.c
-@@ -0,0 +1,16 @@
-+/* PR target/61948 */
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-require-effective-target arm_thumb2_ok } */
-+/* { dg-options "-O2 -mthumb" } */
-+/* { dg-add-options arm_neon } */
-+
-+long long f (long long *c)
-+{
-+ long long t = c[0];
-+ asm ("nop" : : : "r0", "r3", "r4", "r5",
-+ "r6", "r7", "r8", "r9",
-+ "r10", "r11", "r12", "memory");
-+ return t >> 1;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/arm/pr51968.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr51968.c
-@@ -1,6 +1,6 @@
- /* PR target/51968 */
- /* { dg-do compile } */
--/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
-+/* { dg-options "-O2 -Wno-implicit-function-declaration -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
- /* { dg-require-effective-target arm_neon_ok } */
-
- typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8)));
---- a/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a -ffast-math" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+ return __builtin_lroundf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvta.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+ return __builtin_lround (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvta.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60650.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60650.c
-@@ -20,6 +20,10 @@
- int a, c, d;
- long long e;
-
-+extern int foo1 (struct btrfs_root *, int, int, int);
-+extern int foo2 (struct btrfs_root *, int, int);
-+
-+int
- truncate_one_csum (struct btrfs_root *p1, long long p2, long long p3)
- {
- int f, g, i = p1->fs_info->sb->s_blocksize_bits;
---- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c
-@@ -11,40 +11,40 @@
-
- void test_sf() {
- /* abssf2_vfp */
-- /* { dg-final { scan-assembler "fabss" } } */
-+ /* { dg-final { scan-assembler "vabs.f32" } } */
- f1 = fabsf (f1);
- /* negsf2_vfp */
-- /* { dg-final { scan-assembler "fnegs" } } */
-+ /* { dg-final { scan-assembler "vneg.f32" } } */
- f1 = -f1;
- /* addsf3_vfp */
-- /* { dg-final { scan-assembler "fadds" } } */
-+ /* { dg-final { scan-assembler "vadd.f32" } } */
- f1 = f2 + f3;
- /* subsf3_vfp */
-- /* { dg-final { scan-assembler "fsubs" } } */
-+ /* { dg-final { scan-assembler "vsub.f32" } } */
- f1 = f2 - f3;
- /* divsf3_vfp */
-- /* { dg-final { scan-assembler "fdivs" } } */
-+ /* { dg-final { scan-assembler "vdiv.f32" } } */
- f1 = f2 / f3;
- /* mulsf3_vfp */
-- /* { dg-final { scan-assembler "fmuls" } } */
-+ /* { dg-final { scan-assembler "vmul.f32" } } */
- f1 = f2 * f3;
- /* mulsf3negsf_vfp */
-- /* { dg-final { scan-assembler "fnmuls" } } */
-+ /* { dg-final { scan-assembler "vnmul.f32" } } */
- f1 = -f2 * f3;
- /* mulsf3addsf_vfp */
-- /* { dg-final { scan-assembler "fmacs" } } */
-+ /* { dg-final { scan-assembler "vmla.f32" } } */
- f1 = f2 * f3 + f1;
- /* mulsf3subsf_vfp */
-- /* { dg-final { scan-assembler "fmscs" } } */
-+ /* { dg-final { scan-assembler "vnmls.f32" } } */
- f1 = f2 * f3 - f1;
- /* mulsf3negsfaddsf_vfp */
-- /* { dg-final { scan-assembler "fnmacs" } } */
-+ /* { dg-final { scan-assembler "vmls.f32" } } */
- f1 = f2 - f3 * f1;
- /* mulsf3negsfsubsf_vfp */
-- /* { dg-final { scan-assembler "fnmscs" } } */
-+ /* { dg-final { scan-assembler "vnmla.f32" } } */
- f1 = -f2 * f3 - f1;
- /* sqrtsf2_vfp */
-- /* { dg-final { scan-assembler "fsqrts" } } */
-+ /* { dg-final { scan-assembler "vsqrt.f32" } } */
- f1 = sqrtf (f1);
- }
-
-@@ -52,40 +52,40 @@
-
- void test_df() {
- /* absdf2_vfp */
-- /* { dg-final { scan-assembler "fabsd" } } */
-+ /* { dg-final { scan-assembler "vabs.f64" } } */
- d1 = fabs (d1);
- /* negdf2_vfp */
-- /* { dg-final { scan-assembler "fnegd" } } */
-+ /* { dg-final { scan-assembler "vneg.f64" } } */
- d1 = -d1;
- /* adddf3_vfp */
-- /* { dg-final { scan-assembler "faddd" } } */
-+ /* { dg-final { scan-assembler "vadd.f64" } } */
- d1 = d2 + d3;
- /* subdf3_vfp */
-- /* { dg-final { scan-assembler "fsubd" } } */
-+ /* { dg-final { scan-assembler "vsub.f64" } } */
- d1 = d2 - d3;
- /* divdf3_vfp */
-- /* { dg-final { scan-assembler "fdivd" } } */
-+ /* { dg-final { scan-assembler "vdiv.f64" } } */
- d1 = d2 / d3;
- /* muldf3_vfp */
-- /* { dg-final { scan-assembler "fmuld" } } */
-+ /* { dg-final { scan-assembler "vmul.f64" } } */
- d1 = d2 * d3;
- /* muldf3negdf_vfp */
-- /* { dg-final { scan-assembler "fnmuld" } } */
-+ /* { dg-final { scan-assembler "vnmul.f64" } } */
- d1 = -d2 * d3;
- /* muldf3adddf_vfp */
-- /* { dg-final { scan-assembler "fmacd" } } */
-+ /* { dg-final { scan-assembler "vmla.f64" } } */
- d1 = d2 * d3 + d1;
- /* muldf3subdf_vfp */
-- /* { dg-final { scan-assembler "fmscd" } } */
-+ /* { dg-final { scan-assembler "vnmls.f64" } } */
- d1 = d2 * d3 - d1;
- /* muldf3negdfadddf_vfp */
-- /* { dg-final { scan-assembler "fnmacd" } } */
-+ /* { dg-final { scan-assembler "vmls.f64" } } */
- d1 = d2 - d3 * d1;
- /* muldf3negdfsubdf_vfp */
-- /* { dg-final { scan-assembler "fnmscd" } } */
-+ /* { dg-final { scan-assembler "vnmla.f64" } } */
- d1 = -d2 * d3 - d1;
- /* sqrtdf2_vfp */
-- /* { dg-final { scan-assembler "fsqrtd" } } */
-+ /* { dg-final { scan-assembler "vsqrt.f64" } } */
- d1 = sqrt (d1);
- }
-
-@@ -94,46 +94,46 @@
-
- void test_convert () {
- /* extendsfdf2_vfp */
-- /* { dg-final { scan-assembler "fcvtds" } } */
-+ /* { dg-final { scan-assembler "vcvt.f64.f32" } } */
- d1 = f1;
- /* truncdfsf2_vfp */
-- /* { dg-final { scan-assembler "fcvtsd" } } */
-+ /* { dg-final { scan-assembler "vcvt.f32.f64" } } */
- f1 = d1;
- /* truncsisf2_vfp */
-- /* { dg-final { scan-assembler "ftosizs" } } */
-+ /* { dg-final { scan-assembler "vcvt.s32.f32" } } */
- i1 = f1;
- /* truncsidf2_vfp */
-- /* { dg-final { scan-assembler "ftosizd" } } */
-+ /* { dg-final { scan-assembler "vcvt.s32.f64" } } */
- i1 = d1;
- /* fixuns_truncsfsi2 */
-- /* { dg-final { scan-assembler "ftouizs" } } */
-+ /* { dg-final { scan-assembler "vcvt.u32.f32" } } */
- u1 = f1;
- /* fixuns_truncdfsi2 */
-- /* { dg-final { scan-assembler "ftouizd" } } */
-+ /* { dg-final { scan-assembler "vcvt.u32.f64" } } */
- u1 = d1;
- /* floatsisf2_vfp */
-- /* { dg-final { scan-assembler "fsitos" } } */
-+ /* { dg-final { scan-assembler "vcvt.f32.s32" } } */
- f1 = i1;
- /* floatsidf2_vfp */
-- /* { dg-final { scan-assembler "fsitod" } } */
-+ /* { dg-final { scan-assembler "vcvt.f64.s32" } } */
- d1 = i1;
- /* floatunssisf2 */
-- /* { dg-final { scan-assembler "fuitos" } } */
-+ /* { dg-final { scan-assembler "vcvt.f32.u32" } } */
- f1 = u1;
- /* floatunssidf2 */
-- /* { dg-final { scan-assembler "fuitod" } } */
-+ /* { dg-final { scan-assembler "vcvt.f64.u32" } } */
- d1 = u1;
- }
-
- void test_ldst (float f[], double d[]) {
-- /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
-- /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-+ /* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */
-+ /* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
- /* { dg-final { scan-assembler "add.+ r0, #1024" } } */
-- /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\]\\\]\n" } } */
-+ /* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */
- f[256] = f[255] + f[-255];
-
-- /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
-- /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-- /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
-+ /* { dg-final { scan-assembler "vldr.64.+ \\\[r1, #1016\\\]" } } */
-+ /* { dg-final { scan-assembler "vldr.64.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-+ /* { dg-final { scan-assembler "vstr.64.+ \\\[r1, #256\\\]" } } */
- d[32] = d[127] + d[-127];
- }
---- a/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
-@@ -0,0 +1,36 @@
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_hw } */
-+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
-+/* { dg-add-options "arm_neon" } */
-+
-+extern void abort ();
-+
-+#define N 16
-+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
-+ -12.5f, -15.6f, -18.7f, -21.8f,
-+ 24.9f, 27.1f, 30.2f, 33.3f,
-+ 36.4f, 39.5f, 42.6f, 45.7f};
-+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
-+ -9.0f, 1.0f, -2.0f, 3.0f,
-+ -4.0f, -5.0f, 6.0f, 7.0f,
-+ -8.0f, -9.0f, 10.0f, 11.0f};
-+float r[N];
-+
-+int
-+main (void)
-+{
-+ int i;
-+
-+ for (i = 0; i < N; i++)
-+ r[i] = __builtin_copysignf (a[i], b[i]);
-+
-+ /* check results: */
-+ for (i = 0; i < N; i++)
-+ if (r[i] != __builtin_copysignf (a[i], b[i]))
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/rev16.c
-+++ b/src/gcc/testsuite/gcc.target/arm/rev16.c
-@@ -0,0 +1,35 @@
-+/* { dg-options "-O2" } */
-+/* { dg-do run } */
-+
-+extern void abort (void);
-+
-+typedef unsigned int __u32;
-+
-+__u32
-+__rev16_32_alt (__u32 x)
-+{
-+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
-+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
-+}
-+
-+__u32
-+__rev16_32 (__u32 x)
-+{
-+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
-+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
-+}
-+
-+int
-+main (void)
-+{
-+ volatile __u32 in32 = 0x12345678;
-+ volatile __u32 expected32 = 0x34127856;
-+
-+ if (__rev16_32 (in32) != expected32)
-+ abort ();
-+
-+ if (__rev16_32_alt (in32) != expected32)
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
-@@ -0,0 +1,65 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef long long s64int;
-+typedef int s32int;
-+typedef unsigned long long u64int;
-+typedef unsigned int u32int;
-+
-+s64int
-+anddi_di_notdi (s64int a, s64int b)
-+{
-+ return (a & ~b);
-+}
-+
-+s64int
-+anddi_di_notzesidi (s64int a, u32int b)
-+{
-+ return (a & ~(u64int) b);
-+}
-+
-+s64int
-+anddi_notdi_zesidi (s64int a, u32int b)
-+{
-+ return (~a & (u64int) b);
-+}
-+
-+s64int
-+anddi_di_notsesidi (s64int a, s32int b)
-+{
-+ return (a & ~(s64int) b);
-+}
-+
-+int main ()
-+{
-+ s64int a64 = 0xdeadbeef0000ffffll;
-+ s64int b64 = 0x000000005f470112ll;
-+ s64int c64 = 0xdeadbeef300f0000ll;
-+
-+ u32int c32 = 0x01124f4f;
-+ s32int d32 = 0xabbaface;
-+
-+ s64int z = anddi_di_notdi (c64, b64);
-+ if (z != 0xdeadbeef20080000ll)
-+ abort ();
-+
-+ z = anddi_di_notzesidi (a64, c32);
-+ if (z != 0xdeadbeef0000b0b0ll)
-+ abort ();
-+
-+ z = anddi_notdi_zesidi (c64, c32);
-+ if (z != 0x0000000001104f4fll)
-+ abort ();
-+
-+ z = anddi_di_notsesidi (a64, d32);
-+ if (z != 0x0000000000000531ll)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "bic\t" 6 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr63210.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr63210.c
-@@ -0,0 +1,12 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-mthumb -Os " } */
-+/* { dg-require-effective-target arm_thumb1_ok } */
-+
-+int foo1 (int c);
-+int foo2 (int c);
-+
-+int test (int c)
-+{
-+ return (foo1 (c) || foo2 (c));
-+}
-+/* { dg-final { object-size text <= 28 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+ register unsigned pc asm ("pc"); /* { dg-error "not general enough" } */
-+
-+ return pc > 0x12345678;
-+}
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
-@@ -5,8 +5,11 @@
-
- #define N 32
-
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
- int i = 0;
- /* Vectorizable. */
---- a/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
-@@ -4,17 +4,19 @@
- int a, h, j;
- long long d, e, i;
- int f;
-+int
- fn1 (void *p1, int p2)
- {
- switch (p2)
- case 8:
- {
-- register b = *(long long *) p1, c asm ("r2");
-+ register int b = *(long long *) p1, c asm ("r2");
- asm ("%0": "=r" (a), "=r" (c):"r" (b), "r" (0));
- *(long long *) p1 = c;
- }
- }
-
-+int
- fn2 ()
- {
- int k;
-@@ -27,8 +29,8 @@
- case 0:
- (
- {
-- register l asm ("r4");
-- register m asm ("r0");
-+ register int l asm ("r4");
-+ register int m asm ("r0");
- asm (" .err .endif\n\t": "=r" (h), "=r" (j):"r" (m),
- "r"
- (l));;
---- a/src/gcc/testsuite/gcc.target/arm/pr55642.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr55642.c
-@@ -2,6 +2,8 @@
- /* { dg-do compile } */
- /* { dg-require-effective-target arm_thumb2_ok } */
-
-+extern int abs (int);
-+
- int
- foo (int v)
- {
---- a/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+ return __builtin_lfloorf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+ return __builtin_lfloor (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+ register unsigned int r asm ("cc"); /* { dg-error "not general enough|suitable for data type" } */
-+ return r;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * withoug outgoing.
-+ * total frame size <= 256.
-+ * number of callee-save reg == 1.
-+ * optimized code should use "str !" for stack adjustment. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test1, 200, )
-+t_frame_run (test1)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
-@@ -0,0 +1,17 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * with outgoing.
-+ * total frame size > 512.
-+ area except outgoing <= 512
-+ * number of callee-saved reg = 1.
-+ * Split stack adjustment into two subtractions.
-+ the first subtractions couldn't be optimized
-+ into "str !" as it's > 256. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test9, 480, , 24, a[8], a[9], a[10])
-+t_frame_run (test9)
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
-@@ -0,0 +1,97 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT, STRUCT) \
-+VARIANT (uint8, , 8, _u8, 6, STRUCT) \
-+VARIANT (uint16, , 4, _u16, 3, STRUCT) \
-+VARIANT (uint32, , 2, _u32, 1, STRUCT) \
-+VARIANT (uint64, , 1, _u64, 0, STRUCT) \
-+VARIANT (int8, , 8, _s8, 5, STRUCT) \
-+VARIANT (int16, , 4, _s16, 2, STRUCT) \
-+VARIANT (int32, , 2, _s32, 0, STRUCT) \
-+VARIANT (int64, , 1, _s64, 0, STRUCT) \
-+VARIANT (poly8, , 8, _p8, 7, STRUCT) \
-+VARIANT (poly16, , 4, _p16, 1, STRUCT) \
-+VARIANT (float32, , 2, _f32, 1, STRUCT) \
-+VARIANT (float64, , 1, _f64, 0, STRUCT) \
-+VARIANT (uint8, q, 16, _u8, 14, STRUCT) \
-+VARIANT (uint16, q, 8, _u16, 4, STRUCT) \
-+VARIANT (uint32, q, 4, _u32, 3, STRUCT) \
-+VARIANT (uint64, q, 2, _u64, 0, STRUCT) \
-+VARIANT (int8, q, 16, _s8, 13, STRUCT) \
-+VARIANT (int16, q, 8, _s16, 6, STRUCT) \
-+VARIANT (int32, q, 4, _s32, 2, STRUCT) \
-+VARIANT (int64, q, 2, _s64, 1, STRUCT) \
-+VARIANT (poly8, q, 16, _p8, 12, STRUCT) \
-+VARIANT (poly16, q, 8, _p16, 5, STRUCT) \
-+VARIANT (float32, q, 4, _f32, 1, STRUCT)\
-+VARIANT (float64, q, 2, _f64, 0, STRUCT)
-+
-+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
-+int \
-+test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data, \
-+ const BASE##_t *overwrite) \
-+{ \
-+ BASE##x##ELTS##x##STRUCT##_t vectors; \
-+ BASE##_t temp[ELTS]; \
-+ int i,j; \
-+ for (i = 0; i < STRUCT; i++, data += ELTS) \
-+ vectors.val[i] = vld1##Q##SUFFIX (data); \
-+ vectors = vld##STRUCT##Q##_lane##SUFFIX (overwrite, vectors, LANE); \
-+ while (--i >= 0) \
-+ { \
-+ vst1##Q##SUFFIX (temp, vectors.val[i]); \
-+ data -= ELTS; /* Point at value loaded before vldN_lane. */ \
-+ for (j = 0; j < ELTS; j++) \
-+ if (temp[j] != (j == LANE ? overwrite[i] : data[j])) \
-+ return 1; \
-+ } \
-+ return 0; \
-+}
-+
-+
-+/* Tests of vld2_dup and vld2q_dup. */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vld3_dup and vld3q_dup. */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vld4_dup and vld4q_dup. */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT) \
-+ if (test_vld##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data, \
-+ BASE##_data) != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ /* Original data for all vector formats. */
-+ uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
-+ 0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
-+ 0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
-+ 0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
-+
-+ /* Data with which vldN_lane will overwrite some of previous. */
-+ uint8_t uint8_data[4] = { 7, 11, 13, 17 };
-+ uint16_t uint16_data[4] = { 257, 263, 269, 271 };
-+ uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
-+ uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
-+ 0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+ int8_t int8_data[4] = { -1, 3, -5, 7 };
-+ int16_t int16_data[4] = { 257, -259, 261, -263 };
-+ int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+ int64_t *int64_data = (int64_t *)uint64_data;
-+ poly8_t poly8_data[4] = { 0, 7, 13, 18, };
-+ poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
-+ float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+ float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
-+
-+ VARIANTS (CHECK, 2);
-+ VARIANTS (CHECK, 3);
-+ VARIANTS (CHECK, 4);
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
-@@ -0,0 +1,79 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX) \
-+int __attribute__ ((noinline)) \
-+test_vld##STRUCT##SUFFIX () \
-+{ \
-+ BASE##_t data[ELTS * STRUCT]; \
-+ BASE##_t temp[ELTS]; \
-+ BASE##x##ELTS##x##STRUCT##_t vectors; \
-+ int i,j; \
-+ for (i = 0; i < STRUCT * ELTS; i++) \
-+ data [i] = (BASE##_t) 2*i + 1; \
-+ asm volatile ("" : : : "memory"); \
-+ vectors = vld##STRUCT##SUFFIX (data); \
-+ for (i = 0; i < STRUCT; i++) \
-+ { \
-+ vst1##SUFFIX (temp, vectors.val[i]); \
-+ asm volatile ("" : : : "memory"); \
-+ for (j = 0; j < ELTS; j++) \
-+ if (temp[j] != data[i + STRUCT*j]) \
-+ return 1; \
-+ } \
-+ return 0; \
-+}
-+
-+#define VARIANTS(VARIANT, STRUCT) \
-+VARIANT (uint8, 8, STRUCT, _u8) \
-+VARIANT (uint16, 4, STRUCT, _u16) \
-+VARIANT (uint32, 2, STRUCT, _u32) \
-+VARIANT (uint64, 1, STRUCT, _u64) \
-+VARIANT (int8, 8, STRUCT, _s8) \
-+VARIANT (int16, 4, STRUCT, _s16) \
-+VARIANT (int32, 2, STRUCT, _s32) \
-+VARIANT (int64, 1, STRUCT, _s64) \
-+VARIANT (poly8, 8, STRUCT, _p8) \
-+VARIANT (poly16, 4, STRUCT, _p16) \
-+VARIANT (float32, 2, STRUCT, _f32) \
-+VARIANT (float64, 1, STRUCT, _f64) \
-+VARIANT (uint8, 16, STRUCT, q_u8) \
-+VARIANT (uint16, 8, STRUCT, q_u16) \
-+VARIANT (uint32, 4, STRUCT, q_u32) \
-+VARIANT (uint64, 2, STRUCT, q_u64) \
-+VARIANT (int8, 16, STRUCT, q_s8) \
-+VARIANT (int16, 8, STRUCT, q_s16) \
-+VARIANT (int32, 4, STRUCT, q_s32) \
-+VARIANT (int64, 2, STRUCT, q_s64) \
-+VARIANT (poly8, 16, STRUCT, q_p8) \
-+VARIANT (poly16, 8, STRUCT, q_p16) \
-+VARIANT (float32, 4, STRUCT, q_f32) \
-+VARIANT (float64, 2, STRUCT, q_f64)
-+
-+/* Tests of vld2 and vld2q. */
-+VARIANTS (TESTMETH, 2)
-+
-+/* Tests of vld3 and vld3q. */
-+VARIANTS (TESTMETH, 3)
-+
-+/* Tests of vld4 and vld4q. */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, ELTS, STRUCT, SUFFIX) \
-+ if (test_vld##STRUCT##SUFFIX () != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ VARIANTS (CHECK, 2)
-+ VARIANTS (CHECK, 3)
-+ VARIANTS (CHECK, 4)
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
-@@ -0,0 +1,54 @@
-+/* Test vqabs_s64 intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+int __attribute__ ((noinline))
-+test_vqabs_s64 (int64x1_t passed, int64_t expected)
-+{
-+ return vget_lane_s64 (vqabs_s64 (passed), 0) != expected;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vqabsd_s64 (int64_t passed, int64_t expected)
-+{
-+ return vqabsd_s64 (passed) != expected;
-+}
-+
-+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */
-+
-+int
-+main (int argc, char **argv)
-+{
-+ /* Basic test. */
-+ if (test_vqabs_s64 (vcreate_s64 (-1), 1))
-+ abort ();
-+ if (test_vqabsd_s64 (-1, 1))
-+ abort ();
-+
-+ /* Getting absolute value of min int64_t.
-+ Note, exact result cannot be represented in int64_t,
-+ so max int64_t is expected. */
-+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
-+ abort ();
-+ if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff))
-+ abort ();
-+
-+ /* Another input that gets max int64_t. */
-+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff))
-+ abort ();
-+ if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff))
-+ abort ();
-+
-+ /* Checking that large positive numbers stay the same. */
-+ if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff))
-+ abort ();
-+ if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff))
-+ abort ();
-+
-+ return 0;
-+}
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
-@@ -0,0 +1,35 @@
-+# Copyright (C) 2014 Free Software Foundation, Inc.
-+
-+# This program is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3 of the License, or
-+# (at your option) any later version.
-+#
-+# This program is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3. If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an AArch64 target.
-+if ![istarget aarch64*-*-*] then {
-+ return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+ "" ""
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32b ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32b (uint32_t arg0, uint8_t arg1)
-+{
-+ return __crc32b (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32d ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32d (uint32_t arg0, uint64_t arg1)
-+{
-+ return __crc32d (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cb ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cb (uint32_t arg0, uint8_t arg1)
-+{
-+ return __crc32cb (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cd ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cd (uint32_t arg0, uint64_t arg1)
-+{
-+ return __crc32cd (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32w ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32w (uint32_t arg0, uint32_t arg1)
-+{
-+ return __crc32w (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32h ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32h (uint32_t arg0, uint16_t arg1)
-+{
-+ return __crc32h (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cw ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cw (uint32_t arg0, uint32_t arg1)
-+{
-+ return __crc32cw (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32ch ACLE intrinsic. */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32ch (uint32_t arg0, uint16_t arg1)
-+{
-+ return __crc32ch (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
-@@ -0,0 +1,18 @@
-+/* Verify:
-+ * without outgoing.
-+ * total frame size > 512.
-+ * number of callee-save reg >= 2.
-+ * split the stack adjustment into two substractions,
-+ the second could be optimized into "stp !". */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test13, 700, )
-+t_frame_run (test13)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * without outgoing.
-+ * total frame size <= 256.
-+ * number of callee-save regs >= 2.
-+ * optimized code should use "stp !" for stack adjustment. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test2, 200, "x19")
-+t_frame_run (test2)
-+
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-rtl-expand" } */
-+
-+extern void initialize_array (unsigned char *, int);
-+
-+int
-+test15 (void)
-+{
-+ unsigned char a[480];
-+
-+ initialize_array (a, 480);
-+
-+ if (a[0] == 0x10)
-+ return 1;
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-rtl-dump "\\(mem\[^\\n\]*\\(plus\[^\\n\]*virtual-stack-vars" "expand" } } */
-+
-+/* { dg-final { cleanup-rtl-dump "expand" } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
-@@ -0,0 +1,596 @@
-+/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define ABS(a) __builtin_fabs (a)
-+#define ISNAN(a) __builtin_isnan (a)
-+
-+#define DOUBLE_EQUALS(a, b, epsilon) \
-+( \
-+ ((a) == (b)) \
-+ || (ISNAN (a) && ISNAN (b)) \
-+ || (ABS (a - b) < epsilon) \
-+)
-+
-+/* Pi accurate up to 16 digits.
-+ Further digits are a closest binary approximation. */
-+#define PI_F64 3.14159265358979311599796346854
-+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
-+ 0x400921FB54442D18. */
-+
-+/* E accurate up to 16 digits.
-+ Further digits are a closest binary approximation. */
-+#define E_F64 2.71828182845904509079559829843
-+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
-+ 0x4005BF0A8B145769. */
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vreinterpret_f32_f64 (float64x1_t __a)
-+{
-+ return vreinterpret_f32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f32_f64 ()
-+{
-+ float64x1_t a;
-+ float32x2_t b;
-+ float64_t c[1] = { PI_F64 };
-+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */
-+ float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
-+ float32_t e[2];
-+ int i;
-+
-+ a = vld1_f64 (c);
-+ b = wrap_vreinterpret_f32_f64 (a);
-+ vst1_f32 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vreinterpret_s8_f64 (float64x1_t __a)
-+{
-+ return vreinterpret_s8_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s8_f64 ()
-+{
-+ float64x1_t a;
-+ int8x8_t b;
-+ float64_t c[1] = { PI_F64 };
-+ int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
-+ int8_t e[8];
-+ int i;
-+
-+ a = vld1_f64 (c);
-+ b = wrap_vreinterpret_s8_f64 (a);
-+ vst1_s8 (e, b);
-+ for (i = 0; i < 8; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vreinterpret_s16_f64 (float64x1_t __a)
-+{
-+ return vreinterpret_s16_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s16_f64 ()
-+{
-+ float64x1_t a;
-+ int16x4_t b;
-+ float64_t c[1] = { PI_F64 };
-+ int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
-+ int16_t e[4];
-+ int i;
-+
-+ a = vld1_f64 (c);
-+ b = wrap_vreinterpret_s16_f64 (a);
-+ vst1_s16 (e, b);
-+ for (i = 0; i < 4; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vreinterpret_s32_f64 (float64x1_t __a)
-+{
-+ return vreinterpret_s32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s32_f64 ()
-+{
-+ float64x1_t a;
-+ int32x2_t b;
-+ float64_t c[1] = { PI_F64 };
-+ int32_t d[2] = { 0x54442D18, 0x400921FB };
-+ int32_t e[2];
-+ int i;
-+
-+ a = vld1_f64 (c);
-+ b = wrap_vreinterpret_s32_f64 (a);
-+ vst1_s32 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_s64_f64 (float64x1_t __a)
-+{
-+ return vreinterpret_s64_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s64_f64 ()
-+{
-+ float64x1_t a;
-+ int64x1_t b;
-+ float64_t c[1] = { PI_F64 };
-+ int64_t d[1] = { 0x400921FB54442D18 };
-+ int64_t e[1];
-+ int i;
-+
-+ a = vld1_f64 (c);
-+ b = wrap_vreinterpret_s64_f64 (a);
-+ vst1_s64 (e, b);
-+ if (d[0] != e[0])
-+ return 1;
-+ return 0;
-+};
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f32_f64 (float64x2_t __a)
-+{
-+ return vreinterpretq_f32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f32_f64 ()
-+{
-+ float64x2_t a;
-+ float32x4_t b;
-+ float64_t c[2] = { PI_F64, E_F64 };
-+
-+ /* Values corresponding to f32 reinterpret of
-+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */
-+ float32_t d[4] = { 3.3702805504E12,
-+ 2.1426990032196044921875E0,
-+ -2.8569523269651966444143014594E-32,
-+ 2.089785099029541015625E0 };
-+ float32_t e[4];
-+ int i;
-+
-+ a = vld1q_f64 (c);
-+ b = wrap_vreinterpretq_f32_f64 (a);
-+ vst1q_f32 (e, b);
-+ for (i = 0; i < 4; i++)
-+ {
-+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
-+ return 1;
-+ }
-+ return 0;
-+};
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s8_f64 (float64x2_t __a)
-+{
-+ return vreinterpretq_s8_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s8_f64 ()
-+{
-+ float64x2_t a;
-+ int8x16_t b;
-+ float64_t c[2] = { PI_F64, E_F64 };
-+ int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
-+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
-+ int8_t e[16];
-+ int i;
-+
-+ a = vld1q_f64 (c);
-+ b = wrap_vreinterpretq_s8_f64 (a);
-+ vst1q_s8 (e, b);
-+ for (i = 0; i < 16; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s16_f64 (float64x2_t __a)
-+{
-+ return vreinterpretq_s16_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s16_f64 ()
-+{
-+ float64x2_t a;
-+ int16x8_t b;
-+ float64_t c[2] = { PI_F64, E_F64 };
-+ int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
-+ 0x5769, 0x8B14, 0xBF0A, 0x4005 };
-+ int16_t e[8];
-+ int i;
-+
-+ a = vld1q_f64 (c);
-+ b = wrap_vreinterpretq_s16_f64 (a);
-+ vst1q_s16 (e, b);
-+ for (i = 0; i < 8; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s32_f64 (float64x2_t __a)
-+{
-+ return vreinterpretq_s32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s32_f64 ()
-+{
-+ float64x2_t a;
-+ int32x4_t b;
-+ float64_t c[2] = { PI_F64, E_F64 };
-+ int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
-+ int32_t e[4];
-+ int i;
-+
-+ a = vld1q_f64 (c);
-+ b = wrap_vreinterpretq_s32_f64 (a);
-+ vst1q_s32 (e, b);
-+ for (i = 0; i < 4; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s64_f64 (float64x2_t __a)
-+{
-+ return vreinterpretq_s64_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s64_f64 ()
-+{
-+ float64x2_t a;
-+ int64x2_t b;
-+ float64_t c[2] = { PI_F64, E_F64 };
-+ int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
-+ int64_t e[2];
-+ int i;
-+
-+ a = vld1q_f64 (c);
-+ b = wrap_vreinterpretq_s64_f64 (a);
-+ vst1q_s64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (d[i] != e[i])
-+ return 1;
-+ return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_f32 (float32x2_t __a)
-+{
-+ return vreinterpret_f64_f32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_f32 ()
-+{
-+ float32x2_t a;
-+ float64x1_t b;
-+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */
-+ float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
-+ float64_t d[1] = { PI_F64 };
-+ float64_t e[1];
-+ int i;
-+
-+ a = vld1_f32 (c);
-+ b = wrap_vreinterpret_f64_f32 (a);
-+ vst1_f64 (e, b);
-+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s8 (int8x8_t __a)
-+{
-+ return vreinterpret_f64_s8 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s8 ()
-+{
-+ int8x8_t a;
-+ float64x1_t b;
-+ int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
-+ float64_t d[1] = { PI_F64 };
-+ float64_t e[1];
-+ int i;
-+
-+ a = vld1_s8 (c);
-+ b = wrap_vreinterpret_f64_s8 (a);
-+ vst1_f64 (e, b);
-+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s16 (int16x4_t __a)
-+{
-+ return vreinterpret_f64_s16 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s16 ()
-+{
-+ int16x4_t a;
-+ float64x1_t b;
-+ int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
-+ float64_t d[1] = { PI_F64 };
-+ float64_t e[1];
-+ int i;
-+
-+ a = vld1_s16 (c);
-+ b = wrap_vreinterpret_f64_s16 (a);
-+ vst1_f64 (e, b);
-+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s32 (int32x2_t __a)
-+{
-+ return vreinterpret_f64_s32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s32 ()
-+{
-+ int32x2_t a;
-+ float64x1_t b;
-+ int32_t c[2] = { 0x54442D18, 0x400921FB };
-+ float64_t d[1] = { PI_F64 };
-+ float64_t e[1];
-+ int i;
-+
-+ a = vld1_s32 (c);
-+ b = wrap_vreinterpret_f64_s32 (a);
-+ vst1_f64 (e, b);
-+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s64 (int64x1_t __a)
-+{
-+ return vreinterpret_f64_s64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s64 ()
-+{
-+ int64x1_t a;
-+ float64x1_t b;
-+ int64_t c[1] = { 0x400921FB54442D18 };
-+ float64_t d[1] = { PI_F64 };
-+ float64_t e[1];
-+
-+ a = vld1_s64 (c);
-+ b = wrap_vreinterpret_f64_s64 (a);
-+ vst1_f64 (e, b);
-+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_f32 (float32x4_t __a)
-+{
-+ return vreinterpretq_f64_f32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_f32 ()
-+{
-+ float32x4_t a;
-+ float64x2_t b;
-+ /* Values corresponding to f32 reinterpret of
-+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */
-+ float32_t c[4] = { 3.3702805504E12,
-+ 2.1426990032196044921875E0,
-+ -2.8569523269651966444143014594E-32,
-+ 2.089785099029541015625E0 };
-+
-+ float64_t d[2] = { PI_F64, E_F64 };
-+ float64_t e[2];
-+ int i;
-+
-+ a = vld1q_f32 (c);
-+ b = wrap_vreinterpretq_f64_f32 (a);
-+ vst1q_f64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s8 (int8x16_t __a)
-+{
-+ return vreinterpretq_f64_s8 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s8 ()
-+{
-+ int8x16_t a;
-+ float64x2_t b;
-+ int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
-+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
-+ float64_t d[2] = { PI_F64, E_F64 };
-+ float64_t e[2];
-+ int i;
-+
-+ a = vld1q_s8 (c);
-+ b = wrap_vreinterpretq_f64_s8 (a);
-+ vst1q_f64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s16 (int16x8_t __a)
-+{
-+ return vreinterpretq_f64_s16 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s16 ()
-+{
-+ int16x8_t a;
-+ float64x2_t b;
-+ int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
-+ 0x5769, 0x8B14, 0xBF0A, 0x4005 };
-+ float64_t d[2] = { PI_F64, E_F64 };
-+ float64_t e[2];
-+ int i;
-+
-+ a = vld1q_s16 (c);
-+ b = wrap_vreinterpretq_f64_s16 (a);
-+ vst1q_f64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s32 (int32x4_t __a)
-+{
-+ return vreinterpretq_f64_s32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s32 ()
-+{
-+ int32x4_t a;
-+ float64x2_t b;
-+ int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
-+ float64_t d[2] = { PI_F64, E_F64 };
-+ float64_t e[2];
-+ int i;
-+
-+ a = vld1q_s32 (c);
-+ b = wrap_vreinterpretq_f64_s32 (a);
-+ vst1q_f64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s64 (int64x2_t __a)
-+{
-+ return vreinterpretq_f64_s64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s64 ()
-+{
-+ int64x2_t a;
-+ float64x2_t b;
-+ int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
-+ float64_t d[2] = { PI_F64, E_F64 };
-+ float64_t e[2];
-+ int i;
-+
-+ a = vld1q_s64 (c);
-+ b = wrap_vreinterpretq_f64_s64 (a);
-+ vst1q_f64 (e, b);
-+ for (i = 0; i < 2; i++)
-+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+ return 1;
-+ return 0;
-+};
-+
-+int
-+main (int argc, char **argv)
-+{
-+ if (test_vreinterpret_f32_f64 ())
-+ abort ();
-+
-+ if (test_vreinterpret_s8_f64 ())
-+ abort ();
-+ if (test_vreinterpret_s16_f64 ())
-+ abort ();
-+ if (test_vreinterpret_s32_f64 ())
-+ abort ();
-+ if (test_vreinterpret_s64_f64 ())
-+ abort ();
-+
-+ if (test_vreinterpretq_f32_f64 ())
-+ abort ();
-+
-+ if (test_vreinterpretq_s8_f64 ())
-+ abort ();
-+ if (test_vreinterpretq_s16_f64 ())
-+ abort ();
-+ if (test_vreinterpretq_s32_f64 ())
-+ abort ();
-+ if (test_vreinterpretq_s64_f64 ())
-+ abort ();
-+
-+ if (test_vreinterpret_f64_f32 ())
-+ abort ();
-+
-+ if (test_vreinterpret_f64_s8 ())
-+ abort ();
-+ if (test_vreinterpret_f64_s16 ())
-+ abort ();
-+ if (test_vreinterpret_f64_s32 ())
-+ abort ();
-+ if (test_vreinterpret_f64_s64 ())
-+ abort ();
-+
-+ if (test_vreinterpretq_f64_f32 ())
-+ abort ();
-+
-+ if (test_vreinterpretq_f64_s8 ())
-+ abort ();
-+ if (test_vreinterpretq_f64_s16 ())
-+ abort ();
-+ if (test_vreinterpretq_f64_s32 ())
-+ abort ();
-+ if (test_vreinterpretq_f64_s64 ())
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
-@@ -21,6 +21,6 @@
- leaf ();
- }
-
--/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-
- /* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
-@@ -2,6 +2,7 @@
- typedef unsigned int *__restrict__ pRUINT;
- typedef long long *__restrict__ pRINT64;
- typedef unsigned long long *__restrict__ pRUINT64;
-+extern int abs (int j);
-
- void test_orn (pRUINT a, pRUINT b, pRUINT c)
- {
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
-@@ -0,0 +1,12 @@
-+/* Verify:
-+ * with outgoing.
-+ * total frame size > 512.
-+ * number of callee-save reg >= 2. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test14, 700, , 8, a[8])
-+t_frame_run (test14)
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
-@@ -0,0 +1,14 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * without outgoing.
-+ * total frame size <= 512 but > 256.
-+ * number of callee-save reg == 1.
-+ * we can't use "str !" to optimize stack adjustment. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test3, 400, )
-+t_frame_run (test3)
---- a/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
-@@ -2,10 +2,13 @@
- /* { dg-do compile } */
-
- extern int __finite (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __finitef (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __signbit (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __signbitf (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
- int
- __ecvt_r (value, ndigit, decpt, sign, buf, len)
- double value;
-- int ndigit, *decpt, *sign;
-+ int ndigit, *decpt, *sign, len;
- char *buf;
- {
- if ((sizeof (value) == sizeof (float) ? __finitef (value) : __finite (value)) && value != 0.0)
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_s64 AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+int64_t in[SIZE] = { -4l, 4l, -2l, 2l, -1l, 1l };
-+
-+int
-+main (void)
-+{
-+ int i;
-+
-+ for (i = 0; i < SIZE / 2; ++i)
-+ if (vpaddd_s64 (vld1q_s64 (in + 2 * i)) != 0)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vextq_s16_1 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 1);
-+}
-+
-+int16x8_t
-+test_vextq_s16_2 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 2);
-+}
-+
-+int16x8_t
-+test_vextq_s16_3 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 3);
-+}
-+
-+int16x8_t
-+test_vextq_s16_4 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 4);
-+}
-+
-+int16x8_t
-+test_vextq_s16_5 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 5);
-+}
-+
-+int16x8_t
-+test_vextq_s16_6 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 6);
-+}
-+
-+int16x8_t
-+test_vextq_s16_7 (int16x8_t a, int16x8_t b)
-+{
-+ return vextq_s16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ int16x8_t in1 = vld1q_s16 (arr1);
-+ int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ int16x8_t in2 = vld1q_s16 (arr2);
-+ int16_t exp[8];
-+ int16x8_t expected;
-+ int16x8_t actual = test_vextq_s16_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s16_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_s16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_u64 AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+uint64_t in[SIZE] = { 4ul, 4ul, 2ul, 2ul, 1ul, 1ul };
-+
-+int
-+main (void)
-+{
-+ int i;
-+
-+ for (i = 0; i < SIZE / 2; ++i)
-+ if (vpaddd_u64 (vld1q_u64 (in + 2 * i)) != 2 * in[2 * i])
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vext_u8_1 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 1);
-+}
-+
-+uint8x8_t
-+test_vext_u8_2 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 2);
-+}
-+
-+uint8x8_t
-+test_vext_u8_3 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 3);
-+}
-+
-+uint8x8_t
-+test_vext_u8_4 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 4);
-+}
-+
-+uint8x8_t
-+test_vext_u8_5 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 5);
-+}
-+
-+uint8x8_t
-+test_vext_u8_6 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 6);
-+}
-+
-+uint8x8_t
-+test_vext_u8_7 (uint8x8_t a, uint8x8_t b)
-+{
-+ return vext_u8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ uint8x8_t in1 = vld1_u8 (arr1);
-+ uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ uint8x8_t in2 = vld1_u8 (arr2);
-+ uint8_t exp[8];
-+ uint8x8_t expected;
-+ uint8x8_t actual = test_vext_u8_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u8_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1_u8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vextq_u16_1 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 1);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_2 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 2);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_3 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 3);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_4 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 4);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_5 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 5);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_6 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 6);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_7 (uint16x8_t a, uint16x8_t b)
-+{
-+ return vextq_u16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ uint16x8_t in1 = vld1q_u16 (arr1);
-+ uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ uint16x8_t in2 = vld1q_u16 (arr2);
-+ uint16_t exp[8];
-+ uint16x8_t expected;
-+ uint16x8_t actual = test_vextq_u16_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u16_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_u16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vuzpqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+ return vuzpq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+ int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ int16x8_t expect1 = vld1q_s16 (exp1);
-+ int16x8_t expect2 = vld1q_s16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+ return vuzpq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+ uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ uint16x8_t expect1 = vld1q_u16 (exp1);
-+ uint16x8_t expect2 = vld1q_u16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vuzpu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+ return vuzp_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second));
-+ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ uint8x8_t expect1 = vld1_u8 (exp1);
-+ uint8x8_t expect2 = vld1_u8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev64qu8 (uint8x16_t _arg)
-+{
-+ return vrev64q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x16_t reversed = test_vrev64qu8 (inorder);
-+ uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vuzps32 (int32x2_t _a, int32x2_t _b)
-+{
-+ return vuzp_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2};
-+ int32_t second[] = {3, 4};
-+ int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second));
-+ int32_t exp1[] = {1, 3};
-+ int32_t exp2[] = {2, 4};
-+ int32x2_t expect1 = vld1_s32 (exp1);
-+ int32x2_t expect2 = vld1_s32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
-@@ -0,0 +1,17 @@
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int64_t arr1[] = {0};
-+ int64x1_t in1 = vld1_s64 (arr1);
-+ int64_t arr2[] = {1};
-+ int64x1_t in2 = vld1_s64 (arr2);
-+ int64x1_t actual = vext_s64 (in1, in2, 0);
-+ if (actual != in1)
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vuzpu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+ return vuzp_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2};
-+ uint32_t second[] = {3, 4};
-+ uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second));
-+ uint32_t exp1[] = {1, 3};
-+ uint32_t exp2[] = {2, 4};
-+ uint32x2_t expect1 = vld1_u32 (exp1);
-+ uint32x2_t expect2 = vld1_u32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
-@@ -0,0 +1,17 @@
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint64_t arr1[] = {0};
-+ uint64x1_t in1 = vld1_u64 (arr1);
-+ uint64_t arr2[] = {1};
-+ uint64x1_t in2 = vld1_u64 (arr2);
-+ uint64x1_t actual = vext_u64 (in1, in2, 0);
-+ if (actual != in1)
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vzipqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+ return vzipq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+ int16x8_t res1 = result.val[0], res2 = result.val[1];
-+ int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ int16x8_t expected1 = vld1q_s16 (exp1);
-+ int16x8_t expected2 = vld1q_s16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vzipf32 (float32x2_t _a, float32x2_t _b)
-+{
-+ return vzip_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2};
-+ float32_t second[] = {3, 4};
-+ float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second));
-+ float32x2_t res1 = result.val[0], res2 = result.val[1];
-+ float32_t exp1[] = {1, 3};
-+ float32_t exp2[] = {2, 4};
-+ float32x2_t expected1 = vld1_f32 (exp1);
-+ float32x2_t expected2 = vld1_f32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vzipu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+ return vzip_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second));
-+ uint8x8_t res1 = result.val[0], res2 = result.val[1];
-+ uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ uint8x8_t expected1 = vld1_u8 (exp1);
-+ uint8x8_t expected2 = vld1_u8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vzipqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+ return vzipq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+ uint16x8_t res1 = result.val[0], res2 = result.val[1];
-+ uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ uint16x8_t expected1 = vld1q_u16 (exp1);
-+ uint16x8_t expected2 = vld1q_u16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqp16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vext_p8_1 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 1);
-+}
-+
-+poly8x8_t
-+test_vext_p8_2 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 2);
-+}
-+
-+poly8x8_t
-+test_vext_p8_3 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 3);
-+}
-+
-+poly8x8_t
-+test_vext_p8_4 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 4);
-+}
-+
-+poly8x8_t
-+test_vext_p8_5 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 5);
-+}
-+
-+poly8x8_t
-+test_vext_p8_6 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 6);
-+}
-+
-+poly8x8_t
-+test_vext_p8_7 (poly8x8_t a, poly8x8_t b)
-+{
-+ return vext_p8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ poly8x8_t in1 = vld1_p8 (arr1);
-+ poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ poly8x8_t in2 = vld1_p8 (arr2);
-+ poly8_t exp[8];
-+ poly8x8_t expected;
-+ poly8x8_t actual = test_vext_p8_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p8_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1_p8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32s16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vuzpp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+ return vuzp_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second));
-+ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ poly8x8_t expect1 = vld1_p8 (exp1);
-+ poly8x8_t expect2 = vld1_p8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqp8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
-@@ -0,0 +1,45 @@
-+# Specific regression driver for AArch64 SIMD instructions.
-+# Copyright (C) 2014 Free Software Foundation, Inc.
-+# Contributed by ARM Ltd.
-+#
-+# This file is part of GCC.
-+#
-+# GCC is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3, or (at your option)
-+# any later version.
-+#
-+# GCC is distributed in the hope that it will be useful, but
-+# WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+# General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3. If not see
-+# <http://www.gnu.org/licenses/>. */
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an AArch64 target.
-+if {![istarget aarch64*-*-*] } then {
-+ return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# If a testcase doesn't have special options, use these.
-+global DEFAULT_CFLAGS
-+if ![info exists DEFAULT_CFLAGS] then {
-+ set DEFAULT_CFLAGS " -ansi -pedantic-errors"
-+}
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+ "" $DEFAULT_CFLAGS
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vtrns16 (int16x4_t _a, int16x4_t _b)
-+{
-+ return vtrn_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4};
-+ int16_t second[] = {5, 6, 7, 8};
-+ int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second));
-+ int16x4_t res1 = result.val[0], res2 = result.val[1];
-+ int16_t exp1[] = {1, 5, 3, 7};
-+ int16_t exp2[] = {2, 6, 4, 8};
-+ int16x4_t expected1 = vld1_s16 (exp1);
-+ int16x4_t expected2 = vld1_s16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev64qp8 (poly8x16_t _arg)
-+{
-+ return vrev64q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x16_t reversed = test_vrev64qp8 (inorder);
-+ poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vtrnu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+ return vtrn_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4};
-+ uint16_t second[] = {5, 6, 7, 8};
-+ uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second));
-+ uint16x4_t res1 = result.val[0], res2 = result.val[1];
-+ uint16_t exp1[] = {1, 5, 3, 7};
-+ uint16_t exp2[] = {2, 6, 4, 8};
-+ uint16x4_t expected1 = vld1_u16 (exp1);
-+ uint16x4_t expected2 = vld1_u16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vext_p16_1 (poly16x4_t a, poly16x4_t b)
-+{
-+ return vext_p16 (a, b, 1);
-+}
-+
-+poly16x4_t
-+test_vext_p16_2 (poly16x4_t a, poly16x4_t b)
-+{
-+ return vext_p16 (a, b, 2);
-+}
-+
-+poly16x4_t
-+test_vext_p16_3 (poly16x4_t a, poly16x4_t b)
-+{
-+ return vext_p16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ poly16_t arr1[] = {0, 1, 2, 3};
-+ poly16x4_t in1 = vld1_p16 (arr1);
-+ poly16_t arr2[] = {4, 5, 6, 7};
-+ poly16x4_t in2 = vld1_p16 (arr2);
-+ poly16_t exp[4];
-+ poly16x4_t expected;
-+ poly16x4_t actual = test_vext_p16_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_p16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p16_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_p16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_p16_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_p16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpp16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vzipqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+ return vzipq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+ uint8x16_t res1 = result.val[0], res2 = result.val[1];
-+ uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+ uint8_t exp2[] =
-+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+ uint8x16_t expected1 = vld1q_u8 (exp1);
-+ uint8x16_t expected2 = vld1q_u8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vextu64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u64.x"
-+
-+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely
-+ return its first argument, so it is legitimate to optimize it out. */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qp16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+float32x4_t
-+test_vextq_f32_1 (float32x4_t a, float32x4_t b)
-+{
-+ return vextq_f32 (a, b, 1);
-+}
-+
-+float32x4_t
-+test_vextq_f32_2 (float32x4_t a, float32x4_t b)
-+{
-+ return vextq_f32 (a, b, 2);
-+}
-+
-+float32x4_t
-+test_vextq_f32_3 (float32x4_t a, float32x4_t b)
-+{
-+ return vextq_f32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ float32_t arr1[] = {0, 1, 2, 3};
-+ float32x4_t in1 = vld1q_f32 (arr1);
-+ float32_t arr2[] = {4, 5, 6, 7};
-+ float32x4_t in2 = vld1q_f32 (arr2);
-+ float32_t exp[4];
-+ float32x4_t expected;
-+ float32x4_t actual = test_vextq_f32_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_f32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_f32_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_f32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_f32_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_f32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqp16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnp8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vextq_u8_1 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 1);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_2 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 2);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_3 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 3);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_4 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 4);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_5 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 5);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_6 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 6);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_7 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 7);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_8 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 8);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_9 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 9);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_10 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 10);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_11 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 11);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_12 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 12);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_13 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 13);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_14 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 14);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_15 (uint8x16_t a, uint8x16_t b)
-+{
-+ return vextq_u8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+ uint8x16_t in1 = vld1q_u8 (arr1);
-+ uint8_t arr2[] =
-+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+ uint8x16_t in2 = vld1q_u8 (arr2);
-+ uint8_t exp[16];
-+ uint8x16_t expected;
-+ uint8x16_t actual = test_vextq_u8_1 (in1, in2);
-+
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_2 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_3 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_4 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_5 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_6 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_7 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_8 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 8;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_9 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 9;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_10 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 10;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_11 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 11;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_12 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 12;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_13 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 13;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_14 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 14;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u8_15 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 15;
-+ expected = vld1q_u8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vuzpqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+ return vuzpq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2, 3, 4};
-+ float32_t second[] = {5, 6, 7, 8};
-+ float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+ float32_t exp1[] = {1, 3, 5, 7};
-+ float32_t exp2[] = {2, 4, 6, 8};
-+ float32x4_t expect1 = vld1q_f32 (exp1);
-+ float32x4_t expect2 = vld1q_f32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vzipp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+ return vzip_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second));
-+ poly8x8_t res1 = result.val[0], res2 = result.val[1];
-+ poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ poly8x8_t expected1 = vld1_p8 (exp1);
-+ poly8x8_t expected2 = vld1_p8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vtrnqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+ return vtrnq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2, 3, 4};
-+ int32_t second[] = {5, 6, 7, 8};
-+ int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+ int32x4_t res1 = result.val[0], res2 = result.val[1];
-+ int32_t exp1[] = {1, 5, 3, 7};
-+ int32_t exp2[] = {2, 6, 4, 8};
-+ int32x4_t expected1 = vld1q_s32 (exp1);
-+ int32x4_t expected2 = vld1q_s32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
-@@ -0,0 +1,131 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline" } */
-+/* Stops the test_xxx methods being inlined into main, thus preventing constant
-+ propagation. */
-+
-+#include "int_comparisons.x"
-+
-+extern void abort (void);
-+
-+#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort ()
-+
-+#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
-+ BASETYPE##_t _a[2] = {2, 3}; \
-+ BASETYPE##x2_t a = vld1##SUFFIX (_a); \
-+ BASETYPE##_t _b[2] = {1, 3}; \
-+ BASETYPE##x2_t b = vld1##SUFFIX (_b); \
-+ RESTYPE res[2]; \
-+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1); \
-+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \
-+}
-+
-+#define CHECK4(T, R0, R1, R2, R3) \
-+ if (res[0] != (T)R0 || res[1] != (T)R1 \
-+ || res[2] != (T)R2 || res[3] != (T)R3) abort ()
-+
-+#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
-+ BASETYPE##_t _a[4] = {1, 2, 3, 4}; \
-+ BASETYPE##x4_t a = vld1##SUFFIX (_a); \
-+ BASETYPE##_t _b[4] = {4, 2, 1, 3}; \
-+ BASETYPE##x4_t b = vld1##SUFFIX (_b); \
-+ RESTYPE res[4]; \
-+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, -1, 0, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, -1, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, 0, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, 0, -1, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, 0, 0, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
-+ CHECK4 (RESTYPE, 0, -1, -1, 0); \
-+}
-+
-+#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7) \
-+ if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
-+ || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
-+ || res[7] != (T)R7) abort ()
-+
-+#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
-+ BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8}; \
-+ BASETYPE##x8_t a = vld1##SUFFIX (_a); \
-+ BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9}; \
-+ BASETYPE##x8_t b = vld1##SUFFIX (_b); \
-+ RESTYPE res[8]; \
-+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
-+ CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
-+}
-+
-+/* 16-way tests use same 8 values twice. */
-+#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7) \
-+ if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
-+ || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
-+ || res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1 \
-+ || res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4 \
-+ || res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort ()
-+
-+#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
-+ BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \
-+ BASETYPE##x16_t a = vld1##SUFFIX (_a); \
-+ BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \
-+ BASETYPE##x16_t b = vld1##SUFFIX (_b); \
-+ RESTYPE res[16]; \
-+ vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
-+ vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
-+ vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
-+ CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ TEST2 (int32, _s32, uint32_t, _u32);
-+ TEST2 (uint32, _u32, uint32_t, _u32);
-+ TEST2 (int64, q_s64, uint64_t, q_u64);
-+ TEST2 (uint64, q_u64, uint64_t, q_u64);
-+
-+ TEST4 (int16, _s16, uint16_t, _u16);
-+ TEST4 (uint16, _u16, uint16_t, _u16);
-+ TEST4 (int32, q_s32, uint32_t, q_u32);
-+ TEST4 (uint32, q_u32, uint32_t, q_u32);
-+
-+ TEST8 (int8, _s8, uint8_t, _u8);
-+ TEST8 (uint8, _u8, uint8_t, _u8);
-+ TEST8 (int16, q_s16, uint16_t, q_u16);
-+ TEST8 (uint16, q_u16, uint16_t, q_u16);
-+
-+ TEST16 (int8, q_s8, uint8_t, q_u8);
-+ TEST16 (uint8, q_u8, uint8_t, q_u8);
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+ return vtrnq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2, 3, 4};
-+ uint32_t second[] = {5, 6, 7, 8};
-+ uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+ uint32x4_t res1 = result.val[0], res2 = result.val[1];
-+ uint32_t exp1[] = {1, 5, 3, 7};
-+ uint32_t exp2[] = {2, 6, 4, 8};
-+ uint32x4_t expected1 = vld1q_u32 (exp1);
-+ uint32x4_t expected2 = vld1q_u32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int32x4_t
-+test_vrev64qs32 (int32x4_t _arg)
-+{
-+ return vrev64q_s32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32x4_t inorder = {1, 2, 3, 4};
-+ int32x4_t reversed = test_vrev64qs32 (inorder);
-+ int32x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vtrnu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+ return vtrn_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second));
-+ uint8x8_t res1 = result.val[0], res2 = result.val[1];
-+ uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ uint8x8_t expected1 = vld1_u8 (exp1);
-+ uint8x8_t expected2 = vld1_u8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint32x4_t
-+test_vrev64qu32 (uint32x4_t _arg)
-+{
-+ return vrev64q_u32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32x4_t inorder = {1, 2, 3, 4};
-+ uint32x4_t reversed = test_vrev64qu32 (inorder);
-+ uint32x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s64.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vext_s8_1 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 1);
-+}
-+
-+int8x8_t
-+test_vext_s8_2 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 2);
-+}
-+
-+int8x8_t
-+test_vext_s8_3 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 3);
-+}
-+
-+int8x8_t
-+test_vext_s8_4 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 4);
-+}
-+
-+int8x8_t
-+test_vext_s8_5 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 5);
-+}
-+
-+int8x8_t
-+test_vext_s8_6 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 6);
-+}
-+
-+int8x8_t
-+test_vext_s8_7 (int8x8_t a, int8x8_t b)
-+{
-+ return vext_s8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ int8x8_t in1 = vld1_s8 (arr1);
-+ int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ int8x8_t in2 = vld1_s8 (arr2);
-+ int8_t exp[8];
-+ int8x8_t expected;
-+ int8x8_t actual = test_vext_s8_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s8_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1_s8 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnp16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vuzps8 (int8x8_t _a, int8x8_t _b)
-+{
-+ return vuzp_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second));
-+ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ int8x8_t expect1 = vld1_s8 (exp1);
-+ int8x8_t expect2 = vld1_s8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vzipqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+ return vzipq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+ poly8x16_t res1 = result.val[0], res2 = result.val[1];
-+ poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+ poly8_t exp2[] =
-+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+ poly8x16_t expected1 = vld1q_p8 (exp1);
-+ poly8x16_t expected2 = vld1q_p8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextp16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_p16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vrev32s16 (int16x4_t _arg)
-+{
-+ return vrev32_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16x4_t inorder = {1, 2, 3, 4};
-+ int16x4_t reversed = test_vrev32s16 (inorder);
-+ int16x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vrev32u16 (uint16x4_t _arg)
-+{
-+ return vrev32_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16x4_t inorder = {1, 2, 3, 4};
-+ uint16x4_t reversed = test_vrev32u16 (inorder);
-+ uint16x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vrev64p16 (poly16x4_t _arg)
-+{
-+ return vrev64_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16x4_t inorder = {1, 2, 3, 4};
-+ poly16x4_t reversed = test_vrev64p16 (inorder);
-+ poly16x4_t expected = {4, 3, 2, 1};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qf32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vzipqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+ return vzipq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2, 3, 4};
-+ float32_t second[] = {5, 6, 7, 8};
-+ float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+ float32x4_t res1 = result.val[0], res2 = result.val[1];
-+ float32_t exp1[] = {1, 5, 2, 6};
-+ float32_t exp2[] = {3, 7, 4, 8};
-+ float32x4_t expected1 = vld1q_f32 (exp1);
-+ float32x4_t expected2 = vld1q_f32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vextq_p8_1 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 1);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_2 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 2);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_3 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 3);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_4 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 4);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_5 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 5);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_6 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 6);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_7 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 7);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_8 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 8);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_9 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 9);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_10 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 10);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_11 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 11);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_12 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 12);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_13 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 13);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_14 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 14);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_15 (poly8x16_t a, poly8x16_t b)
-+{
-+ return vextq_p8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+ poly8x16_t in1 = vld1q_p8 (arr1);
-+ poly8_t arr2[] =
-+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+ poly8x16_t in2 = vld1q_p8 (arr2);
-+ poly8_t exp[16];
-+ poly8x16_t expected;
-+ poly8x16_t actual = test_vextq_p8_1 (in1, in2);
-+
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_2 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_3 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_4 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_5 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_6 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_7 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_8 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 8;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_9 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 9;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_10 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 10;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_11 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 11;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_12 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 12;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_13 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 13;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_14 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 14;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p8_15 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 15;
-+ expected = vld1q_p8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev64qs8 (int8x16_t _arg)
-+{
-+ return vrev64q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x16_t reversed = test_vrev64qs8 (inorder);
-+ int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vuzps16 (int16x4_t _a, int16x4_t _b)
-+{
-+ return vuzp_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4};
-+ int16_t second[] = {5, 6, 7, 8};
-+ int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second));
-+ int16_t exp1[] = {1, 3, 5, 7};
-+ int16_t exp2[] = {2, 4, 6, 8};
-+ int16x4_t expect1 = vld1_s16 (exp1);
-+ int16x4_t expect2 = vld1_s16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vuzpu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+ return vuzp_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4};
-+ uint16_t second[] = {5, 6, 7, 8};
-+ uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second));
-+ uint16_t exp1[] = {1, 3, 5, 7};
-+ uint16_t exp2[] = {2, 4, 6, 8};
-+ uint16x4_t expect1 = vld1_u16 (exp1);
-+ uint16x4_t expect2 = vld1_u16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vtrnp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+ return vtrn_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second));
-+ poly8x8_t res1 = result.val[0], res2 = result.val[1];
-+ poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ poly8x8_t expected1 = vld1_p8 (exp1);
-+ poly8x8_t expected2 = vld1_p8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vrev32qs16 (int16x8_t _arg)
-+{
-+ return vrev32q_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int16x8_t reversed = test_vrev32qs16 (inorder);
-+ int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64f32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vzips8 (int8x8_t _a, int8x8_t _b)
-+{
-+ return vzip_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second));
-+ int8x8_t res1 = result.val[0], res2 = result.val[1];
-+ int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ int8x8_t expected1 = vld1_s8 (exp1);
-+ int8x8_t expected2 = vld1_s8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vrev32qu16 (uint16x8_t _arg)
-+{
-+ return vrev32q_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint16x8_t reversed = test_vrev32qu16 (inorder);
-+ uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vtrnf32 (float32x2_t _a, float32x2_t _b)
-+{
-+ return vtrn_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2};
-+ float32_t second[] = {3, 4};
-+ float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second));
-+ float32x2_t res1 = result.val[0], res2 = result.val[1];
-+ float32_t exp1[] = {1, 3};
-+ float32_t exp2[] = {2, 4};
-+ float32x2_t expected1 = vld1_f32 (exp1);
-+ float32x2_t expected2 = vld1_f32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev16u8 (uint8x8_t _arg)
-+{
-+ return vrev16_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8x8_t reversed = test_vrev16u8 (inorder);
-+ uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+int64x2_t
-+test_vextq_s64_1 (int64x2_t a, int64x2_t b)
-+{
-+ return vextq_s64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int64_t arr1[] = {0, 1};
-+ int64x2_t in1 = vld1q_s64 (arr1);
-+ int64_t arr2[] = {2, 3};
-+ int64x2_t in2 = vld1q_s64 (arr2);
-+ int64_t exp[2];
-+ int64x2_t expected;
-+ int64x2_t actual = test_vextq_s64_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_s64 (exp);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vzipp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+ return vzip_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4};
-+ poly16_t second[] = {5, 6, 7, 8};
-+ poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second));
-+ poly16x4_t res1 = result.val[0], res2 = result.val[1];
-+ poly16_t exp1[] = {1, 5, 2, 6};
-+ poly16_t exp2[] = {3, 7, 4, 8};
-+ poly16x4_t expected1 = vld1_p16 (exp1);
-+ poly16x4_t expected2 = vld1_p16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+uint64x2_t
-+test_vextq_u64_1 (uint64x2_t a, uint64x2_t b)
-+{
-+ return vextq_u64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint64_t arr1[] = {0, 1};
-+ uint64x2_t in1 = vld1q_u64 (arr1);
-+ uint64_t arr2[] = {2, 3};
-+ uint64x2_t in2 = vld1q_u64 (arr2);
-+ uint64_t exp[2];
-+ uint64x2_t expected;
-+ uint64x2_t actual = test_vextq_u64_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_u64 (exp);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vzipqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+ return vzipq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+ int8x16_t res1 = result.val[0], res2 = result.val[1];
-+ int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+ int8_t exp2[] =
-+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+ int8x16_t expected1 = vld1q_s8 (exp1);
-+ int8x16_t expected2 = vld1q_s8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+ return vtrnq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+ uint8x16_t res1 = result.val[0], res2 = result.val[1];
-+ uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+ uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+ uint8x16_t expected1 = vld1q_u8 (exp1);
-+ uint8x16_t expected2 = vld1q_u8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+int32x2_t
-+test_vext_s32_1 (int32x2_t a, int32x2_t b)
-+{
-+ return vext_s32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int32_t arr1[] = {0, 1};
-+ int32x2_t in1 = vld1_s32 (arr1);
-+ int32_t arr2[] = {2, 3};
-+ int32x2_t in2 = vld1_s32 (arr2);
-+ int32_t exp[2];
-+ int32x2_t expected;
-+ int32x2_t actual = test_vext_s32_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_s32 (exp);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+uint32x2_t
-+test_vext_u32_1 (uint32x2_t a, uint32x2_t b)
-+{
-+ return vext_u32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint32_t arr1[] = {0, 1};
-+ uint32x2_t in1 = vld1_u32 (arr1);
-+ uint32_t arr2[] = {2, 3};
-+ uint32x2_t in2 = vld1_u32 (arr2);
-+ uint32_t exp[2];
-+ uint32x2_t expected;
-+ uint32x2_t actual = test_vext_u32_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_u32 (exp);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vextq_s8_1 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 1);
-+}
-+
-+int8x16_t
-+test_vextq_s8_2 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 2);
-+}
-+
-+int8x16_t
-+test_vextq_s8_3 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 3);
-+}
-+
-+int8x16_t
-+test_vextq_s8_4 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 4);
-+}
-+
-+int8x16_t
-+test_vextq_s8_5 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 5);
-+}
-+
-+int8x16_t
-+test_vextq_s8_6 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 6);
-+}
-+
-+int8x16_t
-+test_vextq_s8_7 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 7);
-+}
-+
-+int8x16_t
-+test_vextq_s8_8 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 8);
-+}
-+
-+int8x16_t
-+test_vextq_s8_9 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 9);
-+}
-+
-+int8x16_t
-+test_vextq_s8_10 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 10);
-+}
-+
-+int8x16_t
-+test_vextq_s8_11 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 11);
-+}
-+
-+int8x16_t
-+test_vextq_s8_12 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 12);
-+}
-+
-+int8x16_t
-+test_vextq_s8_13 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 13);
-+}
-+
-+int8x16_t
-+test_vextq_s8_14 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 14);
-+}
-+
-+int8x16_t
-+test_vextq_s8_15 (int8x16_t a, int8x16_t b)
-+{
-+ return vextq_s8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+ int8x16_t in1 = vld1q_s8 (arr1);
-+ int8_t arr2[] =
-+ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+ int8x16_t in2 = vld1q_s8 (arr2);
-+ int8_t exp[16];
-+ int8x16_t expected;
-+ int8x16_t actual = test_vextq_s8_1 (in1, in2);
-+
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_2 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_3 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_4 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_5 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_6 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_7 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_8 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 8;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_9 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 9;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_10 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 10;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_11 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 11;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_12 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 12;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_13 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 13;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_14 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 14;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s8_15 (in1, in2);
-+ for (i = 0; i < 16; i++)
-+ exp[i] = i + 15;
-+ expected = vld1q_s8 (exp);
-+ for (i = 0; i < 16; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
-@@ -0,0 +1,36 @@
-+/* Test the `vextq_f64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+extern void abort (void);
-+#include <stdio.h>
-+
-+float64x2_t
-+test_vextq_f64_1 (float64x2_t a, float64x2_t b)
-+{
-+ return vextq_f64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ float64_t arr1[] = {0, 1};
-+ float64x2_t in1 = vld1q_f64 (arr1);
-+ float64_t arr2[] = {2, 3};
-+ float64x2_t in2 = vld1q_f64 (arr2);
-+ float64_t exp[] = {1, 2};
-+ float64x2_t expected = vld1q_f64 (exp);
-+ float64x2_t actual = test_vextq_f64_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_f64 AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+float64_t in[SIZE] = { -4.0, 4.0, -2.0, 2.0, -1.0, 1.0 };
-+
-+int
-+main (void)
-+{
-+ int i;
-+
-+ for (i = 0; i < SIZE / 2; ++i)
-+ if (vpaddd_f64 (vld1q_f64 (in + 2 * i)) != 0.0)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler "faddp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qs16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev16p8 (poly8x8_t _arg)
-+{
-+ return vrev16_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8x8_t reversed = test_vrev16p8 (inorder);
-+ poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextp8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_p8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vtrns8 (int8x8_t _a, int8x8_t _b)
-+{
-+ return vtrn_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second));
-+ int8x8_t res1 = result.val[0], res2 = result.val[1];
-+ int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ int8x8_t expected1 = vld1_s8 (exp1);
-+ int8x8_t expected2 = vld1_s8 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vtrnqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+ return vtrnq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+ int16x8_t res1 = result.val[0], res2 = result.val[1];
-+ int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ int16x8_t expected1 = vld1q_s16 (exp1);
-+ int16x8_t expected2 = vld1q_s16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+ return vtrnq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+ uint16x8_t res1 = result.val[0], res2 = result.val[1];
-+ uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ uint16x8_t expected1 = vld1q_u16 (exp1);
-+ uint16x8_t expected2 = vld1q_u16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vextq_p16_1 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 1);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_2 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 2);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_3 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 3);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_4 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 4);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_5 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 5);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_6 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 6);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_7 (poly16x8_t a, poly16x8_t b)
-+{
-+ return vextq_p16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+ poly16x8_t in1 = vld1q_p16 (arr1);
-+ poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+ poly16x8_t in2 = vld1q_p16 (arr2);
-+ poly16_t exp[8];
-+ poly16x8_t expected;
-+ poly16x8_t actual = test_vextq_p16_1 (in1, in2);
-+
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_2 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_3 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_4 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 4;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_5 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 5;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_6 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 6;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_p16_7 (in1, in2);
-+ for (i = 0; i < 8; i++)
-+ exp[i] = i + 7;
-+ expected = vld1q_p16 (exp);
-+ for (i = 0; i < 8; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vrev64qs16 (int16x8_t _arg)
-+{
-+ return vrev64q_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int16x8_t reversed = test_vrev64qs16 (inorder);
-+ int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vrev64qu16 (uint16x8_t _arg)
-+{
-+ return vrev64q_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint16x8_t reversed = test_vrev64qu16 (inorder);
-+ uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev64u8 (uint8x8_t _arg)
-+{
-+ return vrev64_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8x8_t reversed = test_vrev64u8 (inorder);
-+ uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+ return vuzpq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+ poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+ poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+ poly16x8_t expect1 = vld1q_p16 (exp1);
-+ poly16x8_t expect2 = vld1q_p16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vuzpf32 (float32x2_t _a, float32x2_t _b)
-+{
-+ return vuzp_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2};
-+ float32_t second[] = {3, 4};
-+ float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second));
-+ float32_t exp1[] = {1, 3};
-+ float32_t exp2[] = {2, 4};
-+ float32x2_t expect1 = vld1_f32 (exp1);
-+ float32x2_t expect2 = vld1_f32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqf32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+ return vtrnq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+ poly8x16_t res1 = result.val[0], res2 = result.val[1];
-+ poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+ poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+ poly8x16_t expected1 = vld1q_p8 (exp1);
-+ poly8x16_t expected2 = vld1q_p8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int32x2_t
-+test_vrev64s32 (int32x2_t _arg)
-+{
-+ return vrev64_s32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32x2_t inorder = {1, 2};
-+ int32x2_t reversed = test_vrev64s32 (inorder);
-+ int32x2_t expected = {2, 1};
-+
-+ for (i = 0; i < 2; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev32qu8 (uint8x16_t _arg)
-+{
-+ return vrev32q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x16_t reversed = test_vrev32qu8 (inorder);
-+ uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint32x2_t
-+test_vrev64u32 (uint32x2_t _arg)
-+{
-+ return vrev64_u32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32x2_t inorder = {1, 2};
-+ uint32x2_t reversed = test_vrev64u32 (inorder);
-+ uint32x2_t expected = {2, 1};
-+
-+ for (i = 0; i < 2; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQf32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_f32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev16qu8 (uint8x16_t _arg)
-+{
-+ return vrev16q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8x16_t reversed = test_vrev16qu8 (inorder);
-+ uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqp8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qp16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vzipqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+ return vzipq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+ poly16x8_t res1 = result.val[0], res2 = result.val[1];
-+ poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+ poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+ poly16x8_t expected1 = vld1q_p16 (exp1);
-+ poly16x8_t expected2 = vld1q_p16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+ return vuzpq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ uint8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+ uint8x16_t expect1 = vld1q_u8 (exp1);
-+ uint8x16_t expect2 = vld1q_u8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev64p8 (poly8x8_t _arg)
-+{
-+ return vrev64_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8x8_t reversed = test_vrev64p8 (inorder);
-+ poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev32u8 (uint8x8_t _arg)
-+{
-+ return vrev32_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ uint8x8_t reversed = test_vrev32u8 (inorder);
-+ uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev16s8 (int8x8_t _arg)
-+{
-+ return vrev16_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8x8_t reversed = test_vrev16s8 (inorder);
-+ int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
-@@ -0,0 +1,68 @@
-+/* test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics
-+ in arm_neon.h (excluding the 64x1 variants as these generally produce scalar
-+ not vector ops). */
-+#include "arm_neon.h"
-+
-+#define DONT_FORCE(X)
-+
-+#define FORCE_SIMD(V1) asm volatile ("mov %d0, %1.d[0]" \
-+ : "=w"(V1) \
-+ : "w"(V1) \
-+ : /* No clobbers */);
-+
-+#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
-+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a) \
-+{ \
-+ uint##SIZE##_t res; \
-+ FORCE (a); \
-+ res = v##OP##SUFFIX (a); \
-+ FORCE (res); \
-+ return res; \
-+}
-+
-+#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
-+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \
-+{ \
-+ uint##SIZE##_t res; \
-+ FORCE (a); \
-+ FORCE (b); \
-+ res = v##OP##SUFFIX (a, b); \
-+ FORCE (res); \
-+ return res; \
-+}
-+
-+#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE)
-+
-+#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \
-+UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE)
-+
-+ALL_OPS (8x8, int, _s8, DONT_FORCE)
-+ALL_OPS (16x4, int, _s16, DONT_FORCE)
-+ALL_OPS (32x2, int, _s32, DONT_FORCE)
-+ALL_OPS (64x1, int, _s64, DONT_FORCE)
-+ALL_OPS (64, int, d_s64, FORCE_SIMD)
-+ALL_OPS (8x16, int, q_s8, DONT_FORCE)
-+ALL_OPS (16x8, int, q_s16, DONT_FORCE)
-+ALL_OPS (32x4, int, q_s32, DONT_FORCE)
-+ALL_OPS (64x2, int, q_s64, DONT_FORCE)
-+UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE)
-+UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE)
-+UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE)
-+UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE)
-+UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD)
-+UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE)
-+UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE)
-+UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE)
-+UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE)
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqp8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64p16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32u16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev32qp8 (poly8x16_t _arg)
-+{
-+ return vrev32q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x16_t reversed = test_vrev32qp8 (inorder);
-+ poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vtrnp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+ return vtrn_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4};
-+ poly16_t second[] = {5, 6, 7, 8};
-+ poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second));
-+ poly16x4_t res1 = result.val[0], res2 = result.val[1];
-+ poly16_t exp1[] = {1, 5, 3, 7};
-+ poly16_t exp2[] = {2, 6, 4, 8};
-+ poly16x4_t expected1 = vld1_p16 (exp1);
-+ poly16x4_t expected2 = vld1_p16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vzips32 (int32x2_t _a, int32x2_t _b)
-+{
-+ return vzip_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2};
-+ int32_t second[] = {3, 4};
-+ int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second));
-+ int32x2_t res1 = result.val[0], res2 = result.val[1];
-+ int32_t exp1[] = {1, 3};
-+ int32_t exp2[] = {2, 4};
-+ int32x2_t expected1 = vld1_s32 (exp1);
-+ int32x2_t expected2 = vld1_s32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev16qp8 (poly8x16_t _arg)
-+{
-+ return vrev16q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8x16_t reversed = test_vrev16qp8 (inorder);
-+ poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vzipu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+ return vzip_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2};
-+ uint32_t second[] = {3, 4};
-+ uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second));
-+ uint32x2_t res1 = result.val[0], res2 = result.val[1];
-+ uint32_t exp1[] = {1, 3};
-+ uint32_t exp2[] = {2, 4};
-+ uint32x2_t expected1 = vld1_u32 (exp1);
-+ uint32x2_t expected2 = vld1_u32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vtrnqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+ return vtrnq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32_t first[] = {1, 2, 3, 4};
-+ float32_t second[] = {5, 6, 7, 8};
-+ float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+ float32x4_t res1 = result.val[0], res2 = result.val[1];
-+ float32_t exp1[] = {1, 5, 3, 7};
-+ float32_t exp2[] = {2, 6, 4, 8};
-+ float32x4_t expected1 = vld1q_f32 (exp1);
-+ float32x4_t expected2 = vld1q_f32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vtrnqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+ return vtrnq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+ int8x16_t res1 = result.val[0], res2 = result.val[1];
-+ int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+ int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+ int8x16_t expected1 = vld1q_s8 (exp1);
-+ int8x16_t expected2 = vld1q_s8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vexts64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s64.x"
-+
-+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely
-+ return its first argument, so it is legitimate to optimize it out. */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+float32x4_t
-+test_vrev64qf32 (float32x4_t _arg)
-+{
-+ return vrev64q_f32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32x4_t inorder = {1, 2, 3, 4};
-+ float32x4_t reversed = test_vrev64qf32 (inorder);
-+ float32x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vext_s16_1 (int16x4_t a, int16x4_t b)
-+{
-+ return vext_s16 (a, b, 1);
-+}
-+
-+int16x4_t
-+test_vext_s16_2 (int16x4_t a, int16x4_t b)
-+{
-+ return vext_s16 (a, b, 2);
-+}
-+
-+int16x4_t
-+test_vext_s16_3 (int16x4_t a, int16x4_t b)
-+{
-+ return vext_s16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int16_t arr1[] = {0, 1, 2, 3};
-+ int16x4_t in1 = vld1_s16 (arr1);
-+ int16_t arr2[] = {4, 5, 6, 7};
-+ int16x4_t in2 = vld1_s16 (arr2);
-+ int16_t exp[4];
-+ int16x4_t expected;
-+ int16x4_t actual = test_vext_s16_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_s16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s16_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_s16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_s16_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_s16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vext_u16_1 (uint16x4_t a, uint16x4_t b)
-+{
-+ return vext_u16 (a, b, 1);
-+}
-+
-+uint16x4_t
-+test_vext_u16_2 (uint16x4_t a, uint16x4_t b)
-+{
-+ return vext_u16 (a, b, 2);
-+}
-+
-+uint16x4_t
-+test_vext_u16_3 (uint16x4_t a, uint16x4_t b)
-+{
-+ return vext_u16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint16_t arr1[] = {0, 1, 2, 3};
-+ uint16x4_t in1 = vld1_u16 (arr1);
-+ uint16_t arr2[] = {4, 5, 6, 7};
-+ uint16x4_t in2 = vld1_u16 (arr2);
-+ uint16_t exp[4];
-+ uint16x4_t expected;
-+ uint16x4_t actual = test_vext_u16_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_u16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u16_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1_u16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vext_u16_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1_u16 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+ return vuzpq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ poly8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+ poly8x16_t expect1 = vld1q_p8 (exp1);
-+ poly8x16_t expect2 = vld1q_p8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev32p8 (poly8x8_t _arg)
-+{
-+ return vrev32_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly8x8_t reversed = test_vrev32p8 (inorder);
-+ poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev64s8 (int8x8_t _arg)
-+{
-+ return vrev64_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8x8_t reversed = test_vrev64s8 (inorder);
-+ int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpp8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+int32x4_t
-+test_vextq_s32_1 (int32x4_t a, int32x4_t b)
-+{
-+ return vextq_s32 (a, b, 1);
-+}
-+
-+int32x4_t
-+test_vextq_s32_2 (int32x4_t a, int32x4_t b)
-+{
-+ return vextq_s32 (a, b, 2);
-+}
-+
-+int32x4_t
-+test_vextq_s32_3 (int32x4_t a, int32x4_t b)
-+{
-+ return vextq_s32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ int32_t arr1[] = {0, 1, 2, 3};
-+ int32x4_t in1 = vld1q_s32 (arr1);
-+ int32_t arr2[] = {4, 5, 6, 7};
-+ int32x4_t in2 = vld1q_s32 (arr2);
-+ int32_t exp[4];
-+ int32x4_t expected;
-+ int32x4_t actual = test_vextq_s32_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_s32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s32_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_s32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_s32_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_s32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+uint32x4_t
-+test_vextq_u32_1 (uint32x4_t a, uint32x4_t b)
-+{
-+ return vextq_u32 (a, b, 1);
-+}
-+
-+uint32x4_t
-+test_vextq_u32_2 (uint32x4_t a, uint32x4_t b)
-+{
-+ return vextq_u32 (a, b, 2);
-+}
-+
-+uint32x4_t
-+test_vextq_u32_3 (uint32x4_t a, uint32x4_t b)
-+{
-+ return vextq_u32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ uint32_t arr1[] = {0, 1, 2, 3};
-+ uint32x4_t in1 = vld1q_u32 (arr1);
-+ uint32_t arr2[] = {4, 5, 6, 7};
-+ uint32x4_t in2 = vld1q_u32 (arr2);
-+ uint32_t exp[4];
-+ uint32x4_t expected;
-+ uint32x4_t actual = test_vextq_u32_1 (in1, in2);
-+
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 1;
-+ expected = vld1q_u32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u32_2 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 2;
-+ expected = vld1q_u32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ actual = test_vextq_u32_3 (in1, in2);
-+ for (i = 0; i < 4; i++)
-+ exp[i] = i + 3;
-+ expected = vld1q_u32 (exp);
-+ for (i = 0; i < 4; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u64.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipp16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vuzpqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+ return vuzpq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2, 3, 4};
-+ int32_t second[] = {5, 6, 7, 8};
-+ int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+ int32_t exp1[] = {1, 3, 5, 7};
-+ int32_t exp2[] = {2, 4, 6, 8};
-+ int32x4_t expect1 = vld1q_s32 (exp1);
-+ int32x4_t expect2 = vld1q_s32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vrev32p16 (poly16x4_t _arg)
-+{
-+ return vrev32_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16x4_t inorder = {1, 2, 3, 4};
-+ poly16x4_t reversed = test_vrev32p16 (inorder);
-+ poly16x4_t expected = {2, 1, 4, 3};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+ return vuzpq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2, 3, 4};
-+ uint32_t second[] = {5, 6, 7, 8};
-+ uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+ uint32_t exp1[] = {1, 3, 5, 7};
-+ uint32_t exp2[] = {2, 4, 6, 8};
-+ uint32x4_t expect1 = vld1q_u32 (exp1);
-+ uint32x4_t expect2 = vld1q_u32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
-@@ -0,0 +1,56 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+uint64_t in1 = 0x0123456789abcdefULL;
-+uint64_t expected1 = 0x80c4a2e691d5b3f7ULL;
-+
-+#define TEST8(BASETYPE, SUFFIX) \
-+void test8_##SUFFIX () \
-+{ \
-+ BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \
-+ uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \
-+ if (res != expected1) abort (); \
-+}
-+
-+uint64_t in2 = 0xdeadbeefcafebabeULL;
-+uint64_t expected2 = 0x7bb57df7537f5d7dULL;
-+
-+#define TEST16(BASETYPE, SUFFIX) \
-+void test16_##SUFFIX () \
-+{ \
-+ BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \
-+ vcreate_##SUFFIX (in2)); \
-+ uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \
-+ uint64_t res1 = vgetq_lane_u64 (res, 0); \
-+ uint64_t res2 = vgetq_lane_u64 (res, 1); \
-+ if (res1 != expected1 || res2 != expected2) abort (); \
-+}
-+
-+TEST8 (poly, p8);
-+TEST8 (int, s8);
-+TEST8 (uint, u8);
-+
-+TEST16 (poly, p8);
-+TEST16 (int, s8);
-+TEST16 (uint, u8);
-+
-+int
-+main (int argc, char **argv)
-+{
-+ test8_p8 ();
-+ test8_s8 ();
-+ test8_u8 ();
-+ test16_p8 ();
-+ test16_s8 ();
-+ test16_u8 ();
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */
-+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev32qs8 (int8x16_t _arg)
-+{
-+ return vrev32q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x16_t reversed = test_vrev32qs8 (inorder);
-+ int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev16qs8 (int8x16_t _arg)
-+{
-+ return vrev16q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8x16_t reversed = test_vrev16qs8 (inorder);
-+ int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+ for (i = 0; i < 16; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vrev64s16 (int16x4_t _arg)
-+{
-+ return vrev64_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16x4_t inorder = {1, 2, 3, 4};
-+ int16x4_t reversed = test_vrev64s16 (inorder);
-+ int16x4_t expected = {4, 3, 2, 1};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vrev64u16 (uint16x4_t _arg)
-+{
-+ return vrev64_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16x4_t inorder = {1, 2, 3, 4};
-+ uint16x4_t reversed = test_vrev64u16 (inorder);
-+ uint16x4_t expected = {4, 3, 2, 1};
-+
-+ for (i = 0; i < 4; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vuzpp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+ return vuzp_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4};
-+ poly16_t second[] = {5, 6, 7, 8};
-+ poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second));
-+ poly16_t exp1[] = {1, 3, 5, 7};
-+ poly16_t exp2[] = {2, 4, 6, 8};
-+ poly16x4_t expect1 = vld1_p16 (exp1);
-+ poly16x4_t expect2 = vld1_p16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqf32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_p8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipp8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqp16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vrev32qp16 (poly16x8_t _arg)
-+{
-+ return vrev32q_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly16x8_t reversed = test_vrev32qp16 (inorder);
-+ poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vuzpqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+ return vuzpq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+ int8_t second[] =
-+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+ int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+ int8x16_t expect1 = vld1q_s8 (exp1);
-+ int8x16_t expect2 = vld1q_s8 (exp2);
-+
-+ for (i = 0; i < 16; i++)
-+ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vzipqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+ return vzipq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2, 3, 4};
-+ int32_t second[] = {5, 6, 7, 8};
-+ int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+ int32x4_t res1 = result.val[0], res2 = result.val[1];
-+ int32_t exp1[] = {1, 5, 2, 6};
-+ int32_t exp2[] = {3, 7, 4, 8};
-+ int32x4_t expected1 = vld1q_s32 (exp1);
-+ int32x4_t expected2 = vld1q_s32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev32s8 (int8x8_t _arg)
-+{
-+ return vrev32_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ int8x8_t reversed = test_vrev32s8 (inorder);
-+ int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQp16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_p16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vzipqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+ return vzipq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2, 3, 4};
-+ uint32_t second[] = {5, 6, 7, 8};
-+ uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+ uint32x4_t res1 = result.val[0], res2 = result.val[1];
-+ uint32_t exp1[] = {1, 5, 2, 6};
-+ uint32_t exp2[] = {3, 7, 4, 8};
-+ uint32x4_t expected1 = vld1q_u32 (exp1);
-+ uint32x4_t expected2 = vld1q_u32 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_p16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32p16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+float32x2_t
-+test_vext_f32_1 (float32x2_t a, float32x2_t b)
-+{
-+ return vext_f32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ float32_t arr1[] = {0, 1};
-+ float32x2_t in1 = vld1_f32 (arr1);
-+ float32_t arr2[] = {2, 3};
-+ float32x2_t in2 = vld1_f32 (arr2);
-+ float32_t exp[2];
-+ float32x2_t expected;
-+ float32x2_t actual = test_vext_f32_1 (in1, in2);
-+
-+ for (i = 0; i < 2; i++)
-+ exp[i] = i + 1;
-+ expected = vld1_f32 (exp);
-+ for (i = 0; i < 2; i++)
-+ if (actual[i] != expected[i])
-+ abort ();
-+
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
-@@ -0,0 +1,25 @@
-+/* Test the `vextf64' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i, off;
-+ float64x1_t in1 = {0};
-+ float64x1_t in2 = {1};
-+ float64x1_t actual = vext_f64 (in1, in2, 0);
-+ if (actual != in1)
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* Do not scan-assembler. An EXT instruction could be emitted, but would merely
-+ return its first argument, so it is legitimate to optimize it out. */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vtrns32 (int32x2_t _a, int32x2_t _b)
-+{
-+ return vtrn_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int32_t first[] = {1, 2};
-+ int32_t second[] = {3, 4};
-+ int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second));
-+ int32x2_t res1 = result.val[0], res2 = result.val[1];
-+ int32_t exp1[] = {1, 3};
-+ int32_t exp2[] = {2, 4};
-+ int32x2_t expected1 = vld1_s32 (exp1);
-+ int32x2_t expected2 = vld1_s32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vzips16 (int16x4_t _a, int16x4_t _b)
-+{
-+ return vzip_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ int16_t first[] = {1, 2, 3, 4};
-+ int16_t second[] = {5, 6, 7, 8};
-+ int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second));
-+ int16x4_t res1 = result.val[0], res2 = result.val[1];
-+ int16_t exp1[] = {1, 5, 2, 6};
-+ int16_t exp2[] = {3, 7, 4, 8};
-+ int16x4_t expected1 = vld1_s16 (exp1);
-+ int16x4_t expected2 = vld1_s16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQp8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_p8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vtrnu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+ return vtrn_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint32_t first[] = {1, 2};
-+ uint32_t second[] = {3, 4};
-+ uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second));
-+ uint32x2_t res1 = result.val[0], res2 = result.val[1];
-+ uint32_t exp1[] = {1, 3};
-+ uint32_t exp2[] = {2, 4};
-+ uint32x2_t expected1 = vld1_u32 (exp1);
-+ uint32x2_t expected2 = vld1_u32 (exp2);
-+
-+ for (i = 0; i < 2; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vzipu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+ return vzip_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ uint16_t first[] = {1, 2, 3, 4};
-+ uint16_t second[] = {5, 6, 7, 8};
-+ uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second));
-+ uint16x4_t res1 = result.val[0], res2 = result.val[1];
-+ uint16_t exp1[] = {1, 5, 2, 6};
-+ uint16_t exp2[] = {3, 7, 4, 8};
-+ uint16x4_t expected1 = vld1_u16 (exp1);
-+ uint16x4_t expected2 = vld1_u16 (exp2);
-+
-+ for (i = 0; i < 4; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_s8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_f32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qu16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u16' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u8' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+ return vtrnq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+ poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+ poly16x8_t res1 = result.val[0], res2 = result.val[1];
-+ poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+ poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+ poly16x8_t expected1 = vld1q_p16 (exp1);
-+ poly16x8_t expected2 = vld1q_p16 (exp2);
-+
-+ for (i = 0; i < 8; i++)
-+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
-@@ -0,0 +1,47 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+/* Scan-assembler test, so, incorporate as little other code as possible. */
-+
-+#include "arm_neon.h"
-+#include "int_comparisons.x"
-+
-+/* Operations on all 18 integer types: (q?)_[su](8|16|32|64), d_[su]64.
-+ (d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'. */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned). */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned). */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* Comparisons against immediate zero, on the 8 signed integer types only. */
-+
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* For int64_t and int64x1_t, combine_simplify_rtx failure of
-+ https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html
-+ prevents generation of cmge....#0, instead producing mvn + sshr. */
-+/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent,
-+ so allow either. cmgez issue above results in extra 2 * sshr....63. */
-+/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */
-+
-+// All should have been compiled into single insns without inverting result:
-+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vrev64qp16 (poly16x8_t _arg)
-+{
-+ return vrev64q_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+ poly16x8_t reversed = test_vrev64qp16 (inorder);
-+ poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+ for (i = 0; i < 8; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextf32' AArch64 SIMD intrinsic. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_f32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+float32x2_t
-+test_vrev64f32 (float32x2_t _arg)
-+{
-+ return vrev64_f32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ int i;
-+ float32x2_t inorder = {1, 2};
-+ float32x2_t reversed = test_vrev64f32 (inorder);
-+ float32x2_t expected = {2, 1};
-+
-+ for (i = 0; i < 2; i++)
-+ if (reversed[i] != expected[i])
-+ abort ();
-+ return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
-@@ -0,0 +1,430 @@
-+/* Test vdup_lane intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps -O1" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_f32_0 (float32x2_t a)
-+{
-+ return vdup_lane_f32 (a, 0);
-+}
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_f32_1 (float32x2_t a)
-+{
-+ return vdup_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_f32 ()
-+{
-+ float32x2_t a;
-+ float32x2_t b;
-+ int i;
-+ float32_t c[2] = { 0.0 , 3.14 };
-+ float32_t d[2];
-+
-+ a = vld1_f32 (c);
-+ b = wrap_vdup_lane_f32_0 (a);
-+ vst1_f32 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdup_lane_f32_1 (a);
-+ vst1_f32 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_f32_0 (float32x2_t a)
-+{
-+ return vdupq_lane_f32 (a, 0);
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_f32_1 (float32x2_t a)
-+{
-+ return vdupq_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_f32 ()
-+{
-+ float32x2_t a;
-+ float32x4_t b;
-+ int i;
-+ float32_t c[2] = { 0.0 , 3.14 };
-+ float32_t d[4];
-+
-+ a = vld1_f32 (c);
-+ b = wrap_vdupq_lane_f32_0 (a);
-+ vst1q_f32 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdupq_lane_f32_1 (a);
-+ vst1q_f32 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_lane_s8_0 (int8x8_t a)
-+{
-+ return vdup_lane_s8 (a, 0);
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_lane_s8_1 (int8x8_t a)
-+{
-+ return vdup_lane_s8 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s8 ()
-+{
-+ int8x8_t a;
-+ int8x8_t b;
-+ int i;
-+ /* Only two first cases are interesting. */
-+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+ int8_t d[8];
-+
-+ a = vld1_s8 (c);
-+ b = wrap_vdup_lane_s8_0 (a);
-+ vst1_s8 (d, b);
-+ for (i = 0; i < 8; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdup_lane_s8_1 (a);
-+ vst1_s8 (d, b);
-+ for (i = 0; i < 8; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s8_0 (int8x8_t a)
-+{
-+ return vdupq_lane_s8 (a, 0);
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s8_1 (int8x8_t a)
-+{
-+ return vdupq_lane_s8 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s8 ()
-+{
-+ int8x8_t a;
-+ int8x16_t b;
-+ int i;
-+ /* Only two first cases are interesting. */
-+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+ int8_t d[16];
-+
-+ a = vld1_s8 (c);
-+ b = wrap_vdupq_lane_s8_0 (a);
-+ vst1q_s8 (d, b);
-+ for (i = 0; i < 16; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdupq_lane_s8_1 (a);
-+ vst1q_s8 (d, b);
-+ for (i = 0; i < 16; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_lane_s16_0 (int16x4_t a)
-+{
-+ return vdup_lane_s16 (a, 0);
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_lane_s16_1 (int16x4_t a)
-+{
-+ return vdup_lane_s16 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s16 ()
-+{
-+ int16x4_t a;
-+ int16x4_t b;
-+ int i;
-+ /* Only two first cases are interesting. */
-+ int16_t c[4] = { 0, 1, 2, 3 };
-+ int16_t d[4];
-+
-+ a = vld1_s16 (c);
-+ b = wrap_vdup_lane_s16_0 (a);
-+ vst1_s16 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdup_lane_s16_1 (a);
-+ vst1_s16 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s16_0 (int16x4_t a)
-+{
-+ return vdupq_lane_s16 (a, 0);
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s16_1 (int16x4_t a)
-+{
-+ return vdupq_lane_s16 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s16 ()
-+{
-+ int16x4_t a;
-+ int16x8_t b;
-+ int i;
-+ /* Only two first cases are interesting. */
-+ int16_t c[4] = { 0, 1, 2, 3 };
-+ int16_t d[8];
-+
-+ a = vld1_s16 (c);
-+ b = wrap_vdupq_lane_s16_0 (a);
-+ vst1q_s16 (d, b);
-+ for (i = 0; i < 8; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdupq_lane_s16_1 (a);
-+ vst1q_s16 (d, b);
-+ for (i = 0; i < 8; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_s32_0 (int32x2_t a)
-+{
-+ return vdup_lane_s32 (a, 0);
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_s32_1 (int32x2_t a)
-+{
-+ return vdup_lane_s32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s32 ()
-+{
-+ int32x2_t a;
-+ int32x2_t b;
-+ int i;
-+ int32_t c[2] = { 0, 1 };
-+ int32_t d[2];
-+
-+ a = vld1_s32 (c);
-+ b = wrap_vdup_lane_s32_0 (a);
-+ vst1_s32 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdup_lane_s32_1 (a);
-+ vst1_s32 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s32_0 (int32x2_t a)
-+{
-+ return vdupq_lane_s32 (a, 0);
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s32_1 (int32x2_t a)
-+{
-+ return vdupq_lane_s32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s32 ()
-+{
-+ int32x2_t a;
-+ int32x4_t b;
-+ int i;
-+ int32_t c[2] = { 0, 1 };
-+ int32_t d[4];
-+
-+ a = vld1_s32 (c);
-+ b = wrap_vdupq_lane_s32_0 (a);
-+ vst1q_s32 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ b = wrap_vdupq_lane_s32_1 (a);
-+ vst1q_s32 (d, b);
-+ for (i = 0; i < 4; i++)
-+ if (c[1] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_lane_s64_0 (int64x1_t a)
-+{
-+ return vdup_lane_s64 (a, 0);
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_lane_s64_1 (int64x1_t a)
-+{
-+ return vdup_lane_s64 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s64 ()
-+{
-+ int64x1_t a;
-+ int64x1_t b;
-+ int64_t c[1];
-+ int64_t d[1];
-+
-+ c[0] = 0;
-+ a = vld1_s64 (c);
-+ b = wrap_vdup_lane_s64_0 (a);
-+ vst1_s64 (d, b);
-+ if (c[0] != d[0])
-+ return 1;
-+
-+ c[0] = 1;
-+ a = vld1_s64 (c);
-+ b = wrap_vdup_lane_s64_1 (a);
-+ vst1_s64 (d, b);
-+ if (c[0] != d[0])
-+ return 1;
-+ return 0;
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s64_0 (int64x1_t a)
-+{
-+ return vdupq_lane_s64 (a, 0);
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s64_1 (int64x1_t a)
-+{
-+ return vdupq_lane_s64 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s64 ()
-+{
-+ int64x1_t a;
-+ int64x2_t b;
-+ int i;
-+ int64_t c[1];
-+ int64_t d[2];
-+
-+ c[0] = 0;
-+ a = vld1_s64 (c);
-+ b = wrap_vdupq_lane_s64_0 (a);
-+ vst1q_s64 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+
-+ c[0] = 1;
-+ a = vld1_s64 (c);
-+ b = wrap_vdupq_lane_s64_1 (a);
-+ vst1q_s64 (d, b);
-+ for (i = 0; i < 2; i++)
-+ if (c[0] != d[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int
-+main ()
-+{
-+
-+ if (test_vdup_lane_f32 ())
-+ abort ();
-+ if (test_vdup_lane_s8 ())
-+ abort ();
-+ if (test_vdup_lane_s16 ())
-+ abort ();
-+ if (test_vdup_lane_s32 ())
-+ abort ();
-+ if (test_vdup_lane_s64 ())
-+ abort ();
-+ if (test_vdupq_lane_f32 ())
-+ abort ();
-+ if (test_vdupq_lane_s8 ())
-+ abort ();
-+ if (test_vdupq_lane_s16 ())
-+ abort ();
-+ if (test_vdupq_lane_s32 ())
-+ abort ();
-+ if (test_vdupq_lane_s64 ())
-+ abort ();
-+
-+ return 0;
-+}
-+
-+/* Asm check for test_vdup_lane_s8. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdupq_lane_s8. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdup_lane_s16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
-+/* Asm check for test_vdup_lane_s16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdupq_lane_s16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
-+/* Asm check for test_vdupq_lane_s16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
-+
-+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+ * with outgoing.
-+ * total frame size > 512.
-+ area except outgoing <= 512
-+ * number of callee-save reg >= 2.
-+ * split the stack adjustment into two substractions,
-+ the first could be optimized into "stp !". */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test15, 480, , 8, a[8])
-+t_frame_run (test15)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
-@@ -0,0 +1,17 @@
-+/* Test if a BSL-like instruction can be generated from a C idiom. */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Folds to BIF. */
-+
-+uint32x4_t
-+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask)
-+{
-+ return (mask & a) | (~mask & b);
-+}
-+
-+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
-@@ -0,0 +1,619 @@
-+/* Test vdup_lane intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "-O1 --save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_f32 (float32_t a)
-+{
-+ return vdup_n_f32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_f32 ()
-+{
-+ float32_t a = 1.0;
-+ float32x2_t b;
-+ float32_t c[2];
-+ int i;
-+
-+ b = wrap_vdup_n_f32 (a);
-+ vst1_f32 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_f32 (float32_t a)
-+{
-+ return vdupq_n_f32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_f32 ()
-+{
-+ float32_t a = 1.0;
-+ float32x4_t b;
-+ float32_t c[4];
-+ int i;
-+
-+ b = wrap_vdupq_n_f32 (a);
-+ vst1q_f32 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_f64 (float64_t a)
-+{
-+ return vdup_n_f64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_f64 ()
-+{
-+ float64_t a = 1.0;
-+ float64x1_t b;
-+ float64_t c[1];
-+ int i;
-+
-+ b = wrap_vdup_n_f64 (a);
-+ vst1_f64 (c, b);
-+ for (i = 0; i < 1; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_f64 (float64_t a)
-+{
-+ return vdupq_n_f64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_f64 ()
-+{
-+ float64_t a = 1.0;
-+ float64x2_t b;
-+ float64_t c[2];
-+ int i;
-+
-+ b = wrap_vdupq_n_f64 (a);
-+ vst1q_f64 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+poly8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_p8 (poly8_t a)
-+{
-+ return vdup_n_p8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_p8 ()
-+{
-+ poly8_t a = 1;
-+ poly8x8_t b;
-+ poly8_t c[8];
-+ int i;
-+
-+ b = wrap_vdup_n_p8 (a);
-+ vst1_p8 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+poly8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_p8 (poly8_t a)
-+{
-+ return vdupq_n_p8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_p8 ()
-+{
-+ poly8_t a = 1;
-+ poly8x16_t b;
-+ poly8_t c[16];
-+ int i;
-+
-+ b = wrap_vdupq_n_p8 (a);
-+ vst1q_p8 (c, b);
-+ for (i = 0; i < 16; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_s8 (int8_t a)
-+{
-+ return vdup_n_s8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s8 ()
-+{
-+ int8_t a = 1;
-+ int8x8_t b;
-+ int8_t c[8];
-+ int i;
-+
-+ b = wrap_vdup_n_s8 (a);
-+ vst1_s8 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_s8 (int8_t a)
-+{
-+ return vdupq_n_s8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s8 ()
-+{
-+ int8_t a = 1;
-+ int8x16_t b;
-+ int8_t c[16];
-+ int i;
-+
-+ b = wrap_vdupq_n_s8 (a);
-+ vst1q_s8 (c, b);
-+ for (i = 0; i < 16; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_u8 (uint8_t a)
-+{
-+ return vdup_n_u8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u8 ()
-+{
-+ uint8_t a = 1;
-+ uint8x8_t b;
-+ uint8_t c[8];
-+ int i;
-+
-+ b = wrap_vdup_n_u8 (a);
-+ vst1_u8 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_u8 (uint8_t a)
-+{
-+ return vdupq_n_u8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u8 ()
-+{
-+ uint8_t a = 1;
-+ uint8x16_t b;
-+ uint8_t c[16];
-+ int i;
-+
-+ b = wrap_vdupq_n_u8 (a);
-+ vst1q_u8 (c, b);
-+ for (i = 0; i < 16; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+poly16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_p16 (poly16_t a)
-+{
-+ return vdup_n_p16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_p16 ()
-+{
-+ poly16_t a = 1;
-+ poly16x4_t b;
-+ poly16_t c[4];
-+ int i;
-+
-+ b = wrap_vdup_n_p16 (a);
-+ vst1_p16 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+poly16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_p16 (poly16_t a)
-+{
-+ return vdupq_n_p16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_p16 ()
-+{
-+ poly16_t a = 1;
-+ poly16x8_t b;
-+ poly16_t c[8];
-+ int i;
-+
-+ b = wrap_vdupq_n_p16 (a);
-+ vst1q_p16 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_s16 (int16_t a)
-+{
-+ return vdup_n_s16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s16 ()
-+{
-+ int16_t a = 1;
-+ int16x4_t b;
-+ int16_t c[4];
-+ int i;
-+
-+ b = wrap_vdup_n_s16 (a);
-+ vst1_s16 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_s16 (int16_t a)
-+{
-+ return vdupq_n_s16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s16 ()
-+{
-+ int16_t a = 1;
-+ int16x8_t b;
-+ int16_t c[8];
-+ int i;
-+
-+ b = wrap_vdupq_n_s16 (a);
-+ vst1q_s16 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_u16 (uint16_t a)
-+{
-+ return vdup_n_u16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u16 ()
-+{
-+ uint16_t a = 1;
-+ uint16x4_t b;
-+ uint16_t c[4];
-+ int i;
-+
-+ b = wrap_vdup_n_u16 (a);
-+ vst1_u16 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_u16 (uint16_t a)
-+{
-+ return vdupq_n_u16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u16 ()
-+{
-+ uint16_t a = 1;
-+ uint16x8_t b;
-+ uint16_t c[8];
-+ int i;
-+
-+ b = wrap_vdupq_n_u16 (a);
-+ vst1q_u16 (c, b);
-+ for (i = 0; i < 8; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_s32 (int32_t a)
-+{
-+ return vdup_n_s32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s32 ()
-+{
-+ int32_t a = 1;
-+ int32x2_t b;
-+ int32_t c[2];
-+ int i;
-+
-+ b = wrap_vdup_n_s32 (a);
-+ vst1_s32 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_s32 (int32_t a)
-+{
-+ return vdupq_n_s32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s32 ()
-+{
-+ int32_t a = 1;
-+ int32x4_t b;
-+ int32_t c[4];
-+ int i;
-+
-+ b = wrap_vdupq_n_s32 (a);
-+ vst1q_s32 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_u32 (uint32_t a)
-+{
-+ return vdup_n_u32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u32 ()
-+{
-+ uint32_t a = 1;
-+ uint32x2_t b;
-+ uint32_t c[2];
-+ int i;
-+
-+ b = wrap_vdup_n_u32 (a);
-+ vst1_u32 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_u32 (uint32_t a)
-+{
-+ return vdupq_n_u32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u32 ()
-+{
-+ uint32_t a = 1;
-+ uint32x4_t b;
-+ uint32_t c[4];
-+ int i;
-+
-+ b = wrap_vdupq_n_u32 (a);
-+ vst1q_u32 (c, b);
-+ for (i = 0; i < 4; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_s64 (int64_t a)
-+{
-+ return vdup_n_s64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s64 ()
-+{
-+ int64_t a = 1;
-+ int64x1_t b;
-+ int64_t c[1];
-+ int i;
-+
-+ b = wrap_vdup_n_s64 (a);
-+ vst1_s64 (c, b);
-+ for (i = 0; i < 1; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_s64 (int64_t a)
-+{
-+ return vdupq_n_s64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s64 ()
-+{
-+ int64_t a = 1;
-+ int64x2_t b;
-+ int64_t c[2];
-+ int i;
-+
-+ b = wrap_vdupq_n_s64 (a);
-+ vst1q_s64 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_u64 (uint64_t a)
-+{
-+ return vdup_n_u64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u64 ()
-+{
-+ uint64_t a = 1;
-+ uint64x1_t b;
-+ uint64_t c[1];
-+ int i;
-+
-+ b = wrap_vdup_n_u64 (a);
-+ vst1_u64 (c, b);
-+ for (i = 0; i < 1; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+uint64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_u64 (uint64_t a)
-+{
-+ return vdupq_n_u64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u64 ()
-+{
-+ uint64_t a = 1;
-+ uint64x2_t b;
-+ uint64_t c[2];
-+ int i;
-+
-+ b = wrap_vdupq_n_u64 (a);
-+ vst1q_u64 (c, b);
-+ for (i = 0; i < 2; i++)
-+ if (a != c[i])
-+ return 1;
-+ return 0;
-+}
-+
-+int
-+main ()
-+{
-+ if (test_vdup_n_f32 ())
-+ abort ();
-+ if (test_vdup_n_f64 ())
-+ abort ();
-+ if (test_vdup_n_p8 ())
-+ abort ();
-+ if (test_vdup_n_u8 ())
-+ abort ();
-+ if (test_vdup_n_s8 ())
-+ abort ();
-+ if (test_vdup_n_p16 ())
-+ abort ();
-+ if (test_vdup_n_s16 ())
-+ abort ();
-+ if (test_vdup_n_u16 ())
-+ abort ();
-+ if (test_vdup_n_s32 ())
-+ abort ();
-+ if (test_vdup_n_u32 ())
-+ abort ();
-+ if (test_vdup_n_s64 ())
-+ abort ();
-+ if (test_vdup_n_u64 ())
-+ abort ();
-+ if (test_vdupq_n_f32 ())
-+ abort ();
-+ if (test_vdupq_n_f64 ())
-+ abort ();
-+ if (test_vdupq_n_p8 ())
-+ abort ();
-+ if (test_vdupq_n_u8 ())
-+ abort ();
-+ if (test_vdupq_n_s8 ())
-+ abort ();
-+ if (test_vdupq_n_p16 ())
-+ abort ();
-+ if (test_vdupq_n_s16 ())
-+ abort ();
-+ if (test_vdupq_n_u16 ())
-+ abort ();
-+ if (test_vdupq_n_s32 ())
-+ abort ();
-+ if (test_vdupq_n_u32 ())
-+ abort ();
-+ if (test_vdupq_n_s64 ())
-+ abort ();
-+ if (test_vdupq_n_u64 ())
-+ abort ();
-+ return 0;
-+}
-+
-+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64.
-+ Cannot force floating point value in general purpose regester. */
-+
-+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
-+
-+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
-+
-+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out.
-+ Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
-+ are not practical. */
-+
-+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * without outgoing.
-+ * total frame size <= 512 but > 256.
-+ * number of callee-save reg >= 2.
-+ * we can use "stp !" to optimize stack adjustment. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test4, 400, "x19")
-+t_frame_run (test4)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
-@@ -0,0 +1,22 @@
-+/* { dg-do compile } */
-+/* { dg-options " -O2 " } */
-+
-+float
-+f_1 (float a, float b, float c, float d)
-+{
-+ if (a > 0.0)
-+ return c;
-+ else
-+ return 2.0;
-+}
-+
-+double
-+f_2 (double a, double b, double c, double d)
-+{
-+ if (a > b)
-+ return c;
-+ else
-+ return d;
-+}
-+
-+/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
-@@ -8,11 +8,11 @@
-
-
- #define DEFN_SETV(type) \
-- set_vector_##type (pR##type a, type n) \
-- { \
-- int i; \
-- for (i=0; i<16; i++) \
-- a[i] = n; \
-+ void set_vector_##type (pR##type a, type n) \
-+ { \
-+ int i; \
-+ for (i=0; i<16; i++) \
-+ a[i] = n; \
- }
-
- #define DEFN_CHECKV(type) \
---- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
-@@ -0,0 +1,59 @@
-+/* { dg-options "-O2" } */
-+/* { dg-do run } */
-+
-+extern void abort (void);
-+
-+typedef unsigned int __u32;
-+
-+__u32
-+__rev16_32_alt (__u32 x)
-+{
-+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
-+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
-+}
-+
-+__u32
-+__rev16_32 (__u32 x)
-+{
-+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
-+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
-+}
-+
-+typedef unsigned long long __u64;
-+
-+__u64
-+__rev16_64_alt (__u64 x)
-+{
-+ return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
-+ | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
-+}
-+
-+__u64
-+__rev16_64 (__u64 x)
-+{
-+ return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
-+ | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
-+}
-+
-+int
-+main (void)
-+{
-+ volatile __u32 in32 = 0x12345678;
-+ volatile __u32 expected32 = 0x34127856;
-+ volatile __u64 in64 = 0x1234567890abcdefUL;
-+ volatile __u64 expected64 = 0x34127856ab90efcdUL;
-+
-+ if (__rev16_32 (in32) != expected32)
-+ abort ();
-+
-+ if (__rev16_32_alt (in32) != expected32)
-+ abort ();
-+
-+ if (__rev16_64 (in64) != expected64)
-+ abort ();
-+
-+ if (__rev16_64_alt (in64) != expected64)
-+ abort ();
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
-@@ -0,0 +1,60 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -std=c99" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT) \
-+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \
-+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \
-+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \
-+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \
-+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \
-+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \
-+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \
-+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \
-+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \
-+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
-+
-+
-+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \
-+int \
-+test_vget_low_ ##SUFFIX (BASETYPE *data) \
-+{ \
-+ BASETYPE temp [NUM64]; \
-+ TYPE128 vec = vld1q_##SUFFIX (data); \
-+ TYPE64 high = vget_high_##SUFFIX (vec); \
-+ vst1_##SUFFIX (temp, high); \
-+ for (int i = 0; i < NUM64; i++) \
-+ if (temp[i] != data[i + NUM64]) \
-+ return 1; \
-+ return 0; \
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \
-+ if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ uint8_t uint8_t_data[16] =
-+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+ int8_t int8_t_data[16] =
-+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+ VARIANTS (CHECK);
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
-@@ -0,0 +1,84 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT, STRUCT) \
-+VARIANT (uint8, , 8, _u8, STRUCT) \
-+VARIANT (uint16, , 4, _u16, STRUCT) \
-+VARIANT (uint32, , 2, _u32, STRUCT) \
-+VARIANT (uint64, , 1, _u64, STRUCT) \
-+VARIANT (int8, , 8, _s8, STRUCT) \
-+VARIANT (int16, , 4, _s16, STRUCT) \
-+VARIANT (int32, , 2, _s32, STRUCT) \
-+VARIANT (int64, , 1, _s64, STRUCT) \
-+VARIANT (poly8, , 8, _p8, STRUCT) \
-+VARIANT (poly16, , 4, _p16, STRUCT) \
-+VARIANT (float32, , 2, _f32, STRUCT) \
-+VARIANT (float64, , 1, _f64, STRUCT) \
-+VARIANT (uint8, q, 16, _u8, STRUCT) \
-+VARIANT (uint16, q, 8, _u16, STRUCT) \
-+VARIANT (uint32, q, 4, _u32, STRUCT) \
-+VARIANT (uint64, q, 2, _u64, STRUCT) \
-+VARIANT (int8, q, 16, _s8, STRUCT) \
-+VARIANT (int16, q, 8, _s16, STRUCT) \
-+VARIANT (int32, q, 4, _s32, STRUCT) \
-+VARIANT (int64, q, 2, _s64, STRUCT) \
-+VARIANT (poly8, q, 16, _p8, STRUCT) \
-+VARIANT (poly16, q, 8, _p16, STRUCT) \
-+VARIANT (float32, q, 4, _f32, STRUCT) \
-+VARIANT (float64, q, 2, _f64, STRUCT)
-+
-+#define TESTMETH(BASE, Q, ELTS, SUFFIX, STRUCT) \
-+int \
-+test_vld##STRUCT##Q##_dup##SUFFIX (const BASE##_t *data) \
-+{ \
-+ BASE##_t temp[ELTS]; \
-+ BASE##x##ELTS##x##STRUCT##_t vectors = \
-+ vld##STRUCT##Q##_dup##SUFFIX (data); \
-+ int i,j; \
-+ for (i = 0; i < STRUCT; i++) \
-+ { \
-+ vst1##Q##SUFFIX (temp, vectors.val[i]); \
-+ for (j = 0; j < ELTS; j++) \
-+ if (temp[j] != data[i]) \
-+ return 1; \
-+ } \
-+ return 0; \
-+}
-+
-+/* Tests of vld2_dup and vld2q_dup. */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vld3_dup and vld3q_dup. */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vld4_dup and vld4q_dup. */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, Q, ELTS, SUFFIX, STRUCT) \
-+ if (test_vld##STRUCT##Q##_dup##SUFFIX (BASE ##_data) != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ uint8_t uint8_data[4] = { 7, 11, 13, 17 };
-+ uint16_t uint16_data[4] = { 257, 263, 269, 271 };
-+ uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
-+ uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
-+ 0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+ int8_t int8_data[4] = { -1, 3, -5, 7 };
-+ int16_t int16_data[4] = { 257, -259, 261, -263 };
-+ int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+ int64_t *int64_data = (int64_t *)uint64_data;
-+ poly8_t poly8_data[4] = { 0, 7, 13, 18, };
-+ poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
-+ float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+ float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
-+
-+ VARIANTS (CHECK, 2);
-+ VARIANTS (CHECK, 3);
-+ VARIANTS (CHECK, 4);
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
-@@ -0,0 +1,343 @@
-+/* Test vdup_lane intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "-O1 --save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
-+ : "=w"(V1) \
-+ : "w"(V1) \
-+ : /* No clobbers */)
-+
-+extern void abort (void);
-+
-+float32_t __attribute__ ((noinline))
-+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a)
-+{
-+ return vdups_lane_f32 (a, 0);
-+}
-+
-+float32_t __attribute__ ((noinline))
-+wrap_vdups_lane_f32_1 (float32x2_t a)
-+{
-+ return vdups_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_f32 ()
-+{
-+ float32x2_t a;
-+ float32_t b;
-+ float32_t c[2] = { 0.0, 1.0 };
-+
-+ a = vld1_f32 (c);
-+ b = wrap_vdups_lane_f32_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vdups_lane_f32_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+float64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a)
-+{
-+ return vdupd_lane_f64 (a, 0);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_f64 ()
-+{
-+ float64x1_t a;
-+ float64_t b;
-+ float64_t c[1] = { 0.0 };
-+ a = vld1_f64 (c);
-+ b = wrap_vdupd_lane_f64_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+int8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a)
-+{
-+ int8_t result = vdupb_lane_s8 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_s8_1 (int8x8_t a)
-+{
-+ int8_t result = vdupb_lane_s8 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupb_lane_s8 ()
-+{
-+ int8x8_t a;
-+ int8_t b;
-+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+
-+ a = vld1_s8 (c);
-+ b = wrap_vdupb_lane_s8_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vdupb_lane_s8_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+
-+ return 0;
-+}
-+
-+uint8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a)
-+{
-+ uint8_t result = vdupb_lane_u8 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+uint8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_u8_1 (uint8x8_t a)
-+{
-+ uint8_t result = vdupb_lane_u8 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupb_lane_u8 ()
-+{
-+ uint8x8_t a;
-+ uint8_t b;
-+ uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+
-+ a = vld1_u8 (c);
-+ b = wrap_vdupb_lane_u8_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vdupb_lane_u8_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+int16_t __attribute__ ((noinline))
-+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a)
-+{
-+ int16_t result = vduph_lane_s16 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int16_t __attribute__ ((noinline))
-+wrap_vduph_lane_s16_1 (int16x4_t a)
-+{
-+ int16_t result = vduph_lane_s16 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vduph_lane_s16 ()
-+{
-+ int16x4_t a;
-+ int16_t b;
-+ int16_t c[4] = { 0, 1, 2, 3 };
-+
-+ a = vld1_s16 (c);
-+ b = wrap_vduph_lane_s16_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vduph_lane_s16_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+uint16_t __attribute__ ((noinline))
-+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a)
-+{
-+ uint16_t result = vduph_lane_u16 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+uint16_t __attribute__ ((noinline))
-+wrap_vduph_lane_u16_1 (uint16x4_t a)
-+{
-+ uint16_t result = vduph_lane_u16 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vduph_lane_u16 ()
-+{
-+ uint16x4_t a;
-+ uint16_t b;
-+ uint16_t c[4] = { 0, 1, 2, 3 };
-+
-+ a = vld1_u16 (c);
-+ b = wrap_vduph_lane_u16_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vduph_lane_u16_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+int32_t __attribute__ ((noinline))
-+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a)
-+{
-+ int32_t result = vdups_lane_s32 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int32_t __attribute__ ((noinline))
-+wrap_vdups_lane_s32_1 (int32x2_t a)
-+{
-+ int32_t result = vdups_lane_s32 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_s32 ()
-+{
-+ int32x2_t a;
-+ int32_t b;
-+ int32_t c[2] = { 0, 1 };
-+
-+ a = vld1_s32 (c);
-+ b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vdups_lane_s32_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+uint32_t __attribute__ ((noinline))
-+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a)
-+{
-+ uint32_t result = vdups_lane_u32 (a, 0);
-+ force_simd (result);
-+ return result;
-+}
-+
-+uint32_t __attribute__ ((noinline))
-+wrap_vdups_lane_u32_1 (uint32x2_t a)
-+{
-+ uint32_t result = vdups_lane_u32 (a, 1);
-+ force_simd (result);
-+ return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_u32 ()
-+{
-+ uint32x2_t a;
-+ uint32_t b;
-+ uint32_t c[2] = { 0, 1 };
-+ a = vld1_u32 (c);
-+ b = wrap_vdups_lane_u32_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ b = wrap_vdups_lane_u32_1 (a);
-+ if (c[1] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+uint64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a)
-+{
-+ return vdupd_lane_u64 (a, 0);;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_u64 ()
-+{
-+ uint64x1_t a;
-+ uint64_t b;
-+ uint64_t c[1] = { 0 };
-+
-+ a = vld1_u64 (c);
-+ b = wrap_vdupd_lane_u64_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+int64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
-+{
-+ return vdupd_lane_u64 (a, 0);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_s64 ()
-+{
-+ int64x1_t a;
-+ int64_t b;
-+ int64_t c[1] = { 0 };
-+
-+ a = vld1_s64 (c);
-+ b = wrap_vdupd_lane_s64_0 (a, a);
-+ if (c[0] != b)
-+ return 1;
-+ return 0;
-+}
-+
-+int
-+main ()
-+{
-+ if (test_vdups_lane_f32 ())
-+ abort ();
-+ if (test_vdupd_lane_f64 ())
-+ abort ();
-+ if (test_vdupb_lane_s8 ())
-+ abort ();
-+ if (test_vdupb_lane_u8 ())
-+ abort ();
-+ if (test_vduph_lane_s16 ())
-+ abort ();
-+ if (test_vduph_lane_u16 ())
-+ abort ();
-+ if (test_vdups_lane_s32 ())
-+ abort ();
-+ if (test_vdups_lane_u32 ())
-+ abort ();
-+ if (test_vdupd_lane_s64 ())
-+ abort ();
-+ if (test_vdupd_lane_u64 ())
-+ abort ();
-+ return 0;
-+}
-+
-+/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */
-+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */
-+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */
-+
-+/* Asm check for vduph_lane_h16, vduph_lane_h16. */
-+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */
-+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
-+
-+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */
-+/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32. */
-+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */
-+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
-+
-+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */
-+/* Attempts to make the compiler generate vdupd are not practical. */
-+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } }
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
-@@ -0,0 +1,22 @@
-+/* Test vbslq_u64 can be folded. */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+#include <arm_neon.h>
-+
-+/* Folds to BIC. */
-+
-+int32x4_t
-+half_fold_int (uint32x4_t mask)
-+{
-+ int32x4_t a = {0, 0, 0, 0};
-+ int32x4_t b = {2, 4, 8, 16};
-+ return vbslq_s32 (mask, a, b);
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+/* { dg-final { scan-assembler "bic\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
-@@ -0,0 +1,28 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef float float32x2_t __attribute__ ((__vector_size__ ((8))));
-+typedef unsigned int uint32x2_t __attribute__ ((__vector_size__ ((8))));
-+
-+float32x2_t
-+test_dup_1 (float32x2_t in)
-+{
-+ return __builtin_shuffle (in, (uint32x2_t) {1, 1});
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ float32x2_t test = {2.718, 3.141};
-+ float32x2_t res = test_dup_1 (test);
-+ if (res[0] != test[1] || res[1] != test[1])
-+ abort ();
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "\[ \t\]*dup\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.s\\\[\[01\]\\\]" 1 } } */
-+/* { dg-final { scan-assembler-not "zip" } } */
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
-@@ -0,0 +1,13 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * with outgoing.
-+ * total frame size <= 512.
-+ * one subtraction of the whole frame size. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test5, 300, "x19", 8, a[8])
-+t_frame_run (test5)
---- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
-@@ -5,48 +5,54 @@
-
- extern void abort (void);
-
--int __attribute__ ((noinline))
--test_vld1_vst1 ()
--{
-- int8x8_t a;
-- int8x8_t b;
-- int i = 0;
-- int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-- int8_t d[8];
-- a = vld1_s8 (c);
-- asm volatile ("":::"memory");
-- vst1_s8 (d, a);
-- asm volatile ("":::"memory");
-- for (; i < 8; i++)
-- if (c[i] != d[i])
-- return 1;
-- return 0;
-+#define TESTMETH(TYPE, NUM, BASETYPE, SUFFIX) \
-+int __attribute__ ((noinline)) \
-+test_vld1_vst1##SUFFIX () \
-+{ \
-+ TYPE vec; \
-+ int i = 0; \
-+ BASETYPE src[NUM]; \
-+ BASETYPE dest[NUM]; \
-+ for (i = 0; i < NUM; i++) \
-+ src[i] = 2*i + 1; \
-+ asm volatile ("":::"memory"); \
-+ vec = vld1 ## SUFFIX (src); \
-+ asm volatile ("":::"memory"); \
-+ vst1 ## SUFFIX (dest, vec); \
-+ asm volatile ("":::"memory"); \
-+ for (i = 0; i < NUM; i++) \
-+ if (src[i] != dest[i]) \
-+ return 1; \
-+ return 0; \
- }
-
--int __attribute__ ((noinline))
--test_vld1q_vst1q ()
--{
-- int16x8_t a;
-- int16x8_t b;
-- int i = 0;
-- int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-- int16_t d[8];
-- a = vld1q_s16 (c);
-- asm volatile ("":::"memory");
-- vst1q_s16 (d, a);
-- asm volatile ("":::"memory");
-- for (; i < 8; i++)
-- if (c[i] != d[i])
-- return 1;
-- return 0;
--}
-+#define VARIANTS(THING) \
-+THING (int8x8_t, 8, int8_t, _s8) \
-+THING (uint8x8_t, 8, uint8_t, _u8) \
-+THING (int16x4_t, 4, int16_t, _s16) \
-+THING (uint16x4_t, 4, uint16_t, _u16) \
-+THING (int32x2_t, 2, int32_t, _s32) \
-+THING (uint32x2_t, 2, uint32_t, _u32) \
-+THING (float32x2_t, 2, float32_t, _f32) \
-+THING (int8x16_t, 16, int8_t, q_s8) \
-+THING (uint8x16_t, 16, uint8_t, q_u8) \
-+THING (int16x8_t, 8, int16_t, q_s16) \
-+THING (uint16x8_t, 8, uint16_t, q_u16) \
-+THING (int32x4_t, 4, int32_t, q_s32) \
-+THING (uint32x4_t, 4, uint32_t, q_u32) \
-+THING (int64x2_t, 2, int64_t, q_s64) \
-+THING (uint64x2_t, 2, uint64_t, q_u64) \
-+THING (float64x2_t, 2, float64_t, q_f64)
-
-+VARIANTS (TESTMETH)
-+
-+#define DOTEST(TYPE, NUM, BASETYPE, SUFFIX) \
-+ if (test_vld1_vst1##SUFFIX ()) \
-+ abort ();
-+
- int
- main ()
- {
-- if (test_vld1_vst1 ())
-- abort ();
-- if (test_vld1q_vst1q ())
-- abort ();
-+ VARIANTS (DOTEST);
- return 0;
- }
---- a/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
-@@ -0,0 +1,95 @@
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline -O1" } */
-+
-+#define FCVTDEF(ftype,itype) \
-+void \
-+cvt_##itype##_to_##ftype (itype a, ftype b)\
-+{\
-+ ftype c;\
-+ c = (ftype) a;\
-+ if ( (c - b) > 0.00001) abort();\
-+}
-+
-+#define force_simd_for_float(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :)
-+#define force_simd_for_double(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :)
-+
-+#define FCVTDEF_SISD(ftype,itype) \
-+void \
-+cvt_##itype##_to_##ftype##_sisd (itype a, ftype b)\
-+{\
-+ ftype c;\
-+ force_simd_for_##ftype(a);\
-+ c = (ftype) a;\
-+ if ( (c - b) > 0.00001) abort();\
-+}
-+
-+#define FCVT(ftype,itype,ival,fval) cvt_##itype##_to_##ftype (ival, fval);
-+#define FCVT_SISD(ftype,itype,ival,fval) cvt_##itype##_to_##ftype##_sisd (ival, fval);
-+
-+typedef int int32_t;
-+typedef unsigned int uint32_t;
-+typedef long long int int64_t;
-+typedef unsigned long long int uint64_t;
-+
-+extern void abort();
-+
-+FCVTDEF (float, int32_t)
-+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
-+FCVTDEF (float, uint32_t)
-+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
-+FCVTDEF (double, int32_t)
-+/* "scvtf\td\[0-9\]+,\ w\[0-9\]+" */
-+FCVTDEF (double, uint32_t)
-+/* "ucvtf\td\[0-9\]+,\ w\[0-9\]+" */
-+FCVTDEF (float, int64_t)
-+/* "scvtf\ts\[0-9\]+,\ x\[0-9\]+" */
-+FCVTDEF (float, uint64_t)
-+/* "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" */
-+FCVTDEF (double, int64_t)
-+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
-+FCVTDEF (double, uint64_t)
-+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
-+FCVTDEF_SISD (float, int32_t)
-+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
-+FCVTDEF_SISD (double, int64_t)
-+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
-+FCVTDEF_SISD (float, uint32_t)
-+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
-+FCVTDEF_SISD (double, uint64_t)
-+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
-+FCVTDEF_SISD (float, int64_t)
-+/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (float, uint64_t)
-+/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (double, int32_t)
-+/* { dg-final { scan-assembler-times "scvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (double, uint32_t)
-+/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
-+
-+int32_t ival = -1234;
-+int64_t llival = -13031303L;
-+uint32_t uival = 1234;
-+uint64_t ullival = 13031303L;
-+
-+int main ()
-+{
-+ float x;
-+ double y;
-+
-+ FCVT (float, int32_t, ival, -1234.0);
-+ FCVT (float, uint32_t, uival, 1234.0);
-+ FCVT (float, int64_t, llival, -13031303.0);
-+ FCVT (float, uint64_t, ullival, 13031303.0);
-+ FCVT (double, int32_t, ival, -1234.0);
-+ FCVT (double, uint32_t, uival, 1234.0);
-+ FCVT (double, int64_t, llival, -13031303.0);
-+ FCVT (double, uint64_t, ullival, 13031303.0);
-+ FCVT_SISD (float, int32_t, ival, -1234.0);
-+ FCVT_SISD (double, int64_t, llival, -13031303.0);
-+ FCVT_SISD (float, uint32_t, uival, 1234.0);
-+ FCVT_SISD (double, uint64_t, ullival, 13031303.0);
-+
-+ return 0;
-+}
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
-@@ -17,6 +17,11 @@
- };
- typedef struct _IO_FILE FILE;
- extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream);
-+extern void *memset (void *s, int c, size_t n);
-+extern void *memcpy (void *dest, const void *src, size_t n);
-+extern int fprintf (FILE *stream, const char *format, ...);
-+extern char * safe_strncpy (char *dst, const char *src, size_t size);
-+extern size_t strlen (const char *s);
- extern struct _IO_FILE *stderr;
- extern int optind;
- struct aftype {
---- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
-@@ -0,0 +1,18 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
-+
-+typedef void FP (int);
-+
-+/* { dg-final { scan-assembler "br" } } */
-+/* { dg-final { scan-assembler-not "blr" } } */
-+void
-+f1 (FP fp, int n)
-+{
-+ (fp) (n);
-+}
-+
-+void
-+f2 (int n, FP fp)
-+{
-+ (fp) (n);
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
-@@ -0,0 +1,54 @@
-+/* { dg-do compile } */
-+
-+#include "arm_neon.h"
-+
-+int32x4_t
-+foo (int32x4_t a, int16x4_t b, int16x4_t c, int d)
-+{
-+ return vqdmlal_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo1 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
-+{
-+ return vqdmlal_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo2 (int32x4_t a, int16x4_t b, int16x4_t c, int d)
-+{
-+ return vqdmlsl_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo3 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
-+{
-+ return vqdmlsl_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo4 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
-+{
-+ return vqdmlal_high_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo5 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
-+{
-+ return vqdmlsl_high_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo6 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
-+{
-+ return vqdmlal_high_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo7 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
-+{
-+ return vqdmlsl_high_laneq_s16 (a, b, c, d);
-+}
-+
-+
-+/* { dg-excess-errors "incompatible type for argument" } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * without outgoing.
-+ * total frame size > 512.
-+ * number of callee-saved reg == 1.
-+ * split stack adjustment into two subtractions.
-+ the second subtraction should use "str !". */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test6, 700, )
-+t_frame_run (test6)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
-@@ -0,0 +1,94 @@
-+extern void abort ();
-+
-+#define CVT(v) ((unsigned char)(v))
-+
-+static void __attribute__((noinline))
-+check_args_8 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-+ int a8)
-+{
-+ if (a0 != 0
-+ || a1 != 1
-+ || a2 != 2
-+ || a3 != 3
-+ || a4 != 4
-+ || a5 != 5
-+ || a6 != 6
-+ || a7 != 7
-+ || a8 != 8)
-+ abort ();
-+}
-+
-+static void __attribute__((noinline))
-+check_args_24 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-+ int a8, int a9, int a10)
-+{
-+ if (a0 != 0
-+ || a1 != 1
-+ || a2 != 2
-+ || a3 != 3
-+ || a4 != 4
-+ || a5 != 5
-+ || a6 != 6
-+ || a7 != 7
-+ || a8 != 8
-+ || a9 != 9
-+ || a10 != 10)
-+ abort ();
-+}
-+
-+void __attribute__ ((noinline))
-+initialize_array (unsigned char *a, int len)
-+{
-+ int i;
-+
-+ for (i = 0; i < (len / 2); i++)
-+ {
-+ a[i] = i;
-+ a[len - i - 1] = i;
-+ }
-+
-+ return;
-+}
-+
-+#define t_frame_pattern(name, local_size, callee_saved)\
-+int \
-+name (void)\
-+{\
-+ unsigned char a[local_size];\
-+ initialize_array (a, local_size); \
-+ __asm__ ("":::callee_saved); \
-+ if (a[0] != a[local_size - 1] \
-+ || a[0] != 0) \
-+ return 0; \
-+ if (a[local_size / 2 - 1] != a[local_size / 2] \
-+ || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
-+ return 0; \
-+ return 1; \
-+}
-+
-+#define t_frame_pattern_outgoing(name, local_size, callee_saved, out_going_num, ...)\
-+int \
-+name (void)\
-+{\
-+ unsigned char a[local_size];\
-+ initialize_array (a, local_size); \
-+ __asm__ ("":::callee_saved); \
-+ if (a[0] != a[local_size - 1] \
-+ || a[0] != 0) \
-+ return 0; \
-+ if (a[local_size / 2 - 1] != a[local_size / 2] \
-+ || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
-+ return 0; \
-+ check_args_ ## out_going_num (a[0], a[1], a[2], a[3], a[4], a[5], a[6],\
-+ a[7], __VA_ARGS__); \
-+ return 1; \
-+}
-+
-+#define t_frame_run(name) \
-+int \
-+main (int argc, char **argv) \
-+{\
-+ if (!name ())\
-+ abort ();\
-+ return 0;\
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
-@@ -0,0 +1,76 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX) \
-+int __attribute__ ((noinline)) \
-+test_vst##STRUCT##SUFFIX () \
-+{ \
-+ BASE##_t src[ELTS * STRUCT]; \
-+ BASE##_t dest[ELTS * STRUCT]; \
-+ BASE##x##ELTS##x##STRUCT##_t vectors; \
-+ int i,j; \
-+ for (i = 0; i < STRUCT * ELTS; i++) \
-+ src [i] = (BASE##_t) 2*i + 1; \
-+ for (i = 0; i < STRUCT; i++) \
-+ vectors.val[i] = vld1##SUFFIX (&src[i*ELTS]); \
-+ asm volatile ("" : : : "memory"); \
-+ vst##STRUCT##SUFFIX (dest, vectors); \
-+ asm volatile ("" : : : "memory"); \
-+ for (i = 0; i < STRUCT; i++) \
-+ { \
-+ for (j = 0; j < ELTS; j++) \
-+ if (src[i*ELTS + j] != dest[i + STRUCT*j]) \
-+ return 1; \
-+ } \
-+ return 0; \
-+}
-+
-+#define VARIANTS(VARIANT, STRUCT) \
-+VARIANT (uint8, 8, STRUCT, _u8) \
-+VARIANT (uint16, 4, STRUCT, _u16) \
-+VARIANT (uint32, 2, STRUCT, _u32) \
-+VARIANT (uint64, 1, STRUCT, _u64) \
-+VARIANT (int8, 8, STRUCT, _s8) \
-+VARIANT (int16, 4, STRUCT, _s16) \
-+VARIANT (int32, 2, STRUCT, _s32) \
-+VARIANT (int64, 1, STRUCT, _s64) \
-+VARIANT (poly8, 8, STRUCT, _p8) \
-+VARIANT (poly16, 4, STRUCT, _p16) \
-+VARIANT (float32, 2, STRUCT, _f32) \
-+VARIANT (float64, 1, STRUCT, _f64) \
-+VARIANT (uint8, 16, STRUCT, q_u8) \
-+VARIANT (uint16, 8, STRUCT, q_u16) \
-+VARIANT (uint32, 4, STRUCT, q_u32) \
-+VARIANT (uint64, 2, STRUCT, q_u64) \
-+VARIANT (int8, 16, STRUCT, q_s8) \
-+VARIANT (int16, 8, STRUCT, q_s16) \
-+VARIANT (int32, 4, STRUCT, q_s32) \
-+VARIANT (int64, 2, STRUCT, q_s64) \
-+VARIANT (poly8, 16, STRUCT, q_p8) \
-+VARIANT (poly16, 8, STRUCT, q_p16) \
-+VARIANT (float32, 4, STRUCT, q_f32) \
-+VARIANT (float64, 2, STRUCT, q_f64)
-+
-+/* Tests of vst2 and vst2q. */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vst3 and vst3q. */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vst4 and vst4q. */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, ELTS, STRUCT, SUFFIX) \
-+ if (test_vst##STRUCT##SUFFIX () != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ VARIANTS (CHECK, 2)
-+ VARIANTS (CHECK, 3)
-+ VARIANTS (CHECK, 4)
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
-@@ -8,11 +8,11 @@
- #include "vect-fmaxv-fminv.x"
-
- #define DEFN_SETV(type) \
-- set_vector_##type (pR##type a, type n) \
-- { \
-- int i; \
-- for (i=0; i<16; i++) \
-- a[i] = n; \
-+ void set_vector_##type (pR##type a, type n) \
-+ { \
-+ int i; \
-+ for (i=0; i<16; i++) \
-+ a[i] = n; \
- }
-
- #define DEFN_CHECKV(type) \
---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
-@@ -193,7 +193,6 @@
- return b;
- }
- /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */
--/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */
-
- Int32x1
- test_corners_sisd_si (Int32x1 b)
-@@ -207,7 +206,6 @@
- return b;
- }
- /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */
--/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */
-
-
-
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
-@@ -0,0 +1,21 @@
-+/* Test vbslq_f64 can be folded. */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Folds to ret. */
-+
-+float32x4_t
-+fold_me (float32x4_t a, float32x4_t b)
-+{
-+ uint32x4_t mask = {-1, -1, -1, -1};
-+ return vbslq_f32 (mask, a, b);
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
-@@ -7,7 +7,7 @@
- for (i = 0; i < 8 / sizeof (TYPE); i++) \
- output[i] = *a; \
- } \
-- foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
-+ void foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
- { \
- int i; \
- for (i = 0; i < 32 / sizeof (TYPE); i++) \
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
-@@ -0,0 +1,21 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * with outgoing.
-+ * total frame size > 512.
-+ area except outgoing <= 512
-+ * number of callee-saved reg >= 2.
-+ * Split stack adjustment into two subtractions.
-+ the first subtractions could be optimized into "stp !". */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
-+t_frame_run (test10)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
-@@ -0,0 +1,105 @@
-+/* Test vrnd_f64 works correctly. */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+/* Bit offset to round mode field in FPCR. */
-+#define RMODE_START 22
-+
-+#define FPROUNDING_ZERO 3
-+
-+/* Set RMODE field of FPCR control register
-+ to rounding mode passed. */
-+void __inline __attribute__ ((__always_inline__))
-+set_rounding_mode (uint32_t mode)
-+{
-+ uint32_t r;
-+
-+ /* Read current FPCR. */
-+ asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
-+
-+ /* Clear rmode. */
-+ r &= ~(3 << RMODE_START);
-+ /* Calculate desired FPCR. */
-+ r |= mode << RMODE_START;
-+
-+ /* Write desired FPCR back. */
-+ asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
-+}
-+
-+float64x1_t __attribute__ ((noinline))
-+compare_f64 (float64x1_t passed, float64_t expected)
-+{
-+ return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
-+ > __DBL_EPSILON__);
-+}
-+
-+void __attribute__ ((noinline))
-+run_round_tests (float64x1_t *tests,
-+ float64_t expectations[][6])
-+{
-+ int i;
-+
-+ for (i = 0; i < 6; i++)
-+ {
-+ if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i]))
-+ abort ();
-+ if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i]))
-+ abort ();
-+ if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i]))
-+ abort ();
-+ if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i]))
-+ abort ();
-+ if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i]))
-+ abort ();
-+ if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i]))
-+ abort ();
-+ if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i]))
-+ abort ();
-+ }
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+ float64x1_t tests[6] =
-+ {
-+ vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */
-+ vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */
-+ vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */
-+ vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */
-+ vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */
-+ vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */
-+ };
-+
-+ float64_t expectations[7][6] =
-+ {
-+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */
-+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */
-+ { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */
-+ { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */
-+ { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */
-+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */
-+ { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */
-+ };
-+
-+ /* Set floating point control register
-+ to have predictable vrndx and vrndi behaviour. */
-+ set_rounding_mode (FPROUNDING_ZERO);
-+
-+ run_round_tests (tests, expectations);
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
-@@ -305,13 +305,28 @@
- return res;
- }
-
--/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
-+/* { dg-final { scan-assembler-times "\\tfaddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
-
-+float64_t
-+test_vpaddd_f64 (float64x2_t a)
-+{
-+ return vpaddd_f64 (a);
-+}
-+
-+/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 2 } } */
-+
-+int64_t
- test_vpaddd_s64 (int64x2_t a)
- {
- return vpaddd_s64 (a);
- }
-
-+uint64_t
-+test_vpaddd_u64 (uint64x2_t a)
-+{
-+ return vpaddd_u64 (a);
-+}
-+
- /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
-
- uint64x1_t
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * without outgoing.
-+ * total frame size > 512.
-+ * number of callee-saved reg == 2.
-+ * split stack adjustment into two subtractions.
-+ the second subtraction should use "stp !". */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test7, 700, "x19")
-+t_frame_run (test7)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
-@@ -34,6 +34,9 @@
- values [];
- };
- extern const struct locale_data _nl_C_LC_TIME __attribute__ ((visibility ("hidden")));
-+extern void *memset (void *s, int c, size_t n);
-+extern size_t strlen (const char *s);
-+extern int __strncasecmp_l (const char *s1, const char *s2, size_t n, __locale_t locale);
- char *
- __strptime_internal (rp, fmt, tmp, statep , locale)
- const char *rp;
-@@ -40,6 +43,7 @@
- const char *fmt;
- __locale_t locale;
- void *statep;
-+ int tmp;
- {
- struct locale_data *const current = locale->__locales[__LC_TIME];
- const char *rp_backup;
-@@ -124,5 +128,9 @@
- }
- char *
- __strptime_l (buf, format, tm , locale)
-+ int buf;
-+ int format;
-+ int tm;
-+ int locale;
- {
- }
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
-@@ -0,0 +1,24 @@
-+/* Test vbslq_f64 can be folded. */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Should fold out one half of the BSL, leaving just a BIC. */
-+
-+float32x4_t
-+half_fold_me (uint32x4_t mask)
-+{
-+ float32x4_t a = {0.0, 0.0, 0.0, 0.0};
-+ float32x4_t b = {2.0, 4.0, 8.0, 16.0};
-+ return vbslq_f32 (mask, a, b);
-+
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+/* { dg-final { scan-assembler "bic\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
-@@ -0,0 +1,16 @@
-+/* Verify:
-+ * without outgoing.
-+ * total frame size <= 512.
-+ * number of callee-save reg >= 2.
-+ * optimized code should use "stp !" for stack adjustment. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test11, 400, )
-+t_frame_run (test11)
-+
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
-@@ -0,0 +1,47 @@
-+/* Test vqneg_s64 intrinsics work correctly. */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+int __attribute__ ((noinline))
-+test_vqneg_s64 (int64x1_t passed, int64_t expected)
-+{
-+ return vget_lane_s64 (vqneg_s64 (passed), 0) != expected;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vqnegd_s64 (int64_t passed, int64_t expected)
-+{
-+ return vqnegd_s64 (passed) != expected;
-+}
-+
-+/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */
-+
-+int
-+main (int argc, char **argv)
-+{
-+ /* Basic test. */
-+ if (test_vqneg_s64 (vcreate_s64 (-1), 1))
-+ abort ();
-+ if (test_vqnegd_s64 (-1, 1))
-+ abort ();
-+
-+ /* Negating max int64_t. */
-+ if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001))
-+ abort ();
-+ if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001))
-+ abort ();
-+
-+ /* Negating min int64_t.
-+ Note, exact negation cannot be represented as int64_t. */
-+ if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
-+ abort ();
-+ if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff))
-+ abort ();
-+
-+ return 0;
-+}
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
-@@ -0,0 +1,60 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -std=c99" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT) \
-+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8) \
-+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16) \
-+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32) \
-+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64) \
-+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8) \
-+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16) \
-+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32) \
-+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64) \
-+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32) \
-+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
-+
-+
-+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \
-+int \
-+test_vget_low_ ##SUFFIX (BASETYPE *data) \
-+{ \
-+ BASETYPE temp [NUM64]; \
-+ TYPE128 vec = vld1q_##SUFFIX (data); \
-+ TYPE64 low = vget_low_##SUFFIX (vec); \
-+ vst1_##SUFFIX (temp, low); \
-+ for (int i = 0; i < NUM64; i++) \
-+ if (temp[i] != data[i]) \
-+ return 1; \
-+ return 0; \
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX) \
-+ if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ uint8_t uint8_t_data[16] =
-+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+ int8_t int8_t_data[16] =
-+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+ VARIANTS (CHECK);
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
-@@ -0,0 +1,18 @@
-+/* Verify:
-+ * -fomit-frame-pointer.
-+ * with outgoing.
-+ * total frame size bigger than 512.
-+ * number of callee-saved reg == 1. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test8, 700, , 8, a[8])
-+t_frame_run (test8)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
-@@ -0,0 +1,85 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT) \
-+VARIANT (uint8_t, , 8, uint8x8_t, _u8, 5) \
-+VARIANT (uint16_t, , 4, uint16x4_t, _u16, 3) \
-+VARIANT (uint32_t, , 2, uint32x2_t, _u32, 1) \
-+VARIANT (uint64_t, , 1, uint64x1_t, _u64, 0) \
-+VARIANT (int8_t, , 8, int8x8_t, _s8, 6) \
-+VARIANT (int16_t, , 4, int16x4_t, _s16, 2) \
-+VARIANT (int32_t, , 2, int32x2_t, _s32, 0) \
-+VARIANT (int64_t, , 1, int64x1_t, _s64, 0) \
-+VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6) \
-+VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2) \
-+VARIANT (float32_t, , 2, float32x2_t, _f32, 1) \
-+VARIANT (float64_t, , 1, float64x1_t, _f64, 0) \
-+VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11) \
-+VARIANT (uint16_t, q, 8, uint16x8_t, _u16, 7) \
-+VARIANT (uint32_t, q, 4, uint32x4_t, _u32, 2) \
-+VARIANT (uint64_t, q, 2, uint64x2_t, _u64, 1) \
-+VARIANT (int8_t, q, 16, int8x16_t, _s8, 13) \
-+VARIANT (int16_t, q, 8, int16x8_t, _s16, 5) \
-+VARIANT (int32_t, q, 4, int32x4_t, _s32, 3) \
-+VARIANT (int64_t, q, 2, int64x2_t, _s64, 0) \
-+VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14) \
-+VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6) \
-+VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \
-+VARIANT (float64_t, q, 2, float64x2_t, _f64, 1)
-+
-+#define TESTMETH(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX) \
-+int \
-+test_vset_lane ##Q##SUFFIX (BASETYPE *data) \
-+{ \
-+ BASETYPE temp [NUM]; \
-+ TYPE vec = vld1##Q##SUFFIX (data); \
-+ TYPE vec2; \
-+ BASETYPE changed = data[INDEX] - INDEX; \
-+ int check; \
-+ vec = vset##Q##_lane##SUFFIX (changed, vec, INDEX); \
-+ asm volatile ("orr %0.16b, %1.16b, %1.16b" \
-+ : "=w"(vec2) : "w" (vec) : ); \
-+ vst1##Q##SUFFIX (temp, vec2); \
-+ for (check = 0; check < NUM; check++) \
-+ { \
-+ BASETYPE desired = data[check]; \
-+ if (check==INDEX) desired = changed; \
-+ if (temp[check] != desired) \
-+ return 1; \
-+ } \
-+ return 0; \
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX) \
-+ if (test_vset_lane##Q##SUFFIX (BASETYPE ## _ ## data) != 0) \
-+ abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+ uint8_t uint8_t_data[16] =
-+ { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+ uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+ uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+ uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+ int8_t int8_t_data[16] =
-+ { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+ int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+ int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+ int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+ poly8_t poly8_t_data[16] =
-+ { 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 };
-+ poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 };
-+ float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+ float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+ VARIANTS (CHECK);
-+
-+ return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+ * with outgoing.
-+ * total frame size <= 512.
-+ * number of callee-save reg >= 2. */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test12, 400, , 8, a[8])
-+t_frame_run (test12)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+
-+/* Check epilogue using write-back. */
-+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/lib/gcc.exp
-+++ b/src/gcc/testsuite/lib/gcc.exp
-@@ -126,7 +126,9 @@
- global GCC_UNDER_TEST
- global TOOL_OPTIONS
- global TEST_ALWAYS_FLAGS
--
-+ global flags_to_postpone
-+ global board_info
-+
- if {[target_info needs_status_wrapper] != "" && \
- [target_info needs_status_wrapper] != "0" && \
- [info exists gluefile] } {
-@@ -162,8 +164,26 @@
- set options [concat "{additional_flags=$TOOL_OPTIONS}" $options]
- }
-
-+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+ # appended here to multilib_flags as it can be overridden by the latter
-+ # if it was added earlier. After the target_compile, multilib_flags is
-+ # restored to its orignal content.
-+ set tboard [target_info name]
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+ append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+ }
-+
- lappend options "timeout=[timeout_value]"
- lappend options "compiler=$GCC_UNDER_TEST"
- set options [dg-additional-files-options $options $source]
-- return [target_compile $source $dest $type $options]
-+ set return_val [target_compile $source $dest $type $options]
-+
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set board_info($tboard,multilib_flags) $orig_multilib_flags
-+ set flags_to_postpone ""
-+ }
-+
-+ return $return_val
- }
-+
---- a/src/gcc/testsuite/lib/g++.exp
-+++ b/src/gcc/testsuite/lib/g++.exp
-@@ -288,6 +288,8 @@
- global gluefile wrap_flags
- global ALWAYS_CXXFLAGS
- global GXX_UNDER_TEST
-+ global flags_to_postpone
-+ global board_info
-
- if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
- lappend options "libs=${gluefile}"
-@@ -313,10 +315,25 @@
- exec rm -f $rponame
- }
-
-+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+ # appended here to multilib_flags as it can be overridden by the latter
-+ # if it was added earlier. After the target_compile, multilib_flags is
-+ # restored to its orignal content.
-+ set tboard [target_info name]
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+ append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+ }
-+
- set options [dg-additional-files-options $options $source]
-
- set result [target_compile $source $dest $type $options]
-
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set board_info($tboard,multilib_flags) $orig_multilib_flags
-+ set flags_to_postpone ""
-+ }
-+
- return $result
- }
-
---- a/src/gcc/testsuite/lib/wrapper.exp
-+++ b/src/gcc/testsuite/lib/wrapper.exp
-@@ -34,9 +34,11 @@
- # became true for dejagnu-1.4.4. The set of warnings and code
- # that gcc objects on may change, so just make sure -w is always
- # passed to turn off all warnings.
-+ unset_currtarget_info wrap_compile_flags
- set_currtarget_info wrap_compile_flags \
- "$saved_wrap_compile_flags -w $flags"
- set result [build_wrapper $filename]
-+ unset_currtarget_info wrap_compile_flags
- set_currtarget_info wrap_compile_flags "$saved_wrap_compile_flags"
- if { $result != "" } {
- set gluefile [lindex $result 0]
---- a/src/gcc/testsuite/lib/compat.exp
-+++ b/src/gcc/testsuite/lib/compat.exp
-@@ -134,7 +134,6 @@
- "$options"]
- if ![${tool}_check_compile "$testcase $testname link" "" \
- $dest $comp_output] then {
-- unresolved "$testcase $testname execute $optstr"
- return
- }
-
---- a/src/gcc/testsuite/lib/gcc-defs.exp
-+++ b/src/gcc/testsuite/lib/gcc-defs.exp
-@@ -54,14 +54,19 @@
- if { [info proc ${tool}-dg-prune] != "" } {
- global target_triplet
- set gcc_output [${tool}-dg-prune $target_triplet $gcc_output]
-+ if [string match "*::unsupported::*" $gcc_output] then {
-+ regsub -- "::unsupported::" $gcc_output "" gcc_output
-+ unsupported "$testcase: $gcc_output"
-+ return 0
-+ }
-+ } else {
-+ set unsupported_message [${tool}_check_unsupported_p $gcc_output]
-+ if { $unsupported_message != "" } {
-+ unsupported "$testcase: $unsupported_message"
-+ return 0
-+ }
- }
-
-- set unsupported_message [${tool}_check_unsupported_p $gcc_output]
-- if { $unsupported_message != "" } {
-- unsupported "$testcase: $unsupported_message"
-- return 0
-- }
--
- # remove any leftover LF/CR to make sure any output is legit
- regsub -all -- "\[\r\n\]*" $gcc_output "" gcc_output
-
---- a/src/gcc/testsuite/lib/gfortran.exp
-+++ b/src/gcc/testsuite/lib/gfortran.exp
-@@ -234,6 +234,8 @@
- global gluefile wrap_flags
- global ALWAYS_GFORTRANFLAGS
- global GFORTRAN_UNDER_TEST
-+ global flags_to_postpone
-+ global board_info
-
- if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
- lappend options "libs=${gluefile}"
-@@ -240,10 +242,27 @@
- lappend options "ldflags=${wrap_flags}"
- }
-
-+ # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+ # appended here to multilib_flags as it can be overridden by the latter
-+ # if it was added earlier. After the target_compile, multilib_flags is
-+ # restored to its orignal content.
-+ set tboard [target_info name]
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+ append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+ }
-+
- lappend options "compiler=$GFORTRAN_UNDER_TEST"
- lappend options "timeout=[timeout_value]"
-
- set options [concat "$ALWAYS_GFORTRANFLAGS" $options]
- set options [dg-additional-files-options $options $source]
-- return [target_compile $source $dest $type $options]
-+ set return_val [target_compile $source $dest $type $options]
-+
-+ if {[board_info $tboard exists multilib_flags]} {
-+ set board_info($tboard,multilib_flags) $orig_multilib_flags
-+ set flags_to_postpone ""
-+ }
-+
-+ return $return_val
- }
---- a/src/gcc/testsuite/lib/target-supports.exp
-+++ b/src/gcc/testsuite/lib/target-supports.exp
-@@ -2261,7 +2261,7 @@
- }]
- }
-
--# Return 1 is this is an arm target using 32-bit instructions
-+# Return 1 if this is an arm target using 32-bit instructions
- proc check_effective_target_arm32 { } {
- return [check_no_compiler_messages arm32 assembly {
- #if !defined(__arm__) || (defined(__thumb__) && !defined(__thumb2__))
-@@ -2270,10 +2270,10 @@
- }]
- }
-
--# Return 1 is this is an arm target not using Thumb
-+# Return 1 if this is an arm target not using Thumb
- proc check_effective_target_arm_nothumb { } {
- return [check_no_compiler_messages arm_nothumb assembly {
-- #if (defined(__thumb__) || defined(__thumb2__))
-+ #if !defined(__arm__) || (defined(__thumb__) || defined(__thumb2__))
- #error FOO
- #endif
- }]
-@@ -2394,6 +2394,7 @@
- foreach flags {"" "-mfloat-abi=softfp" "-mfpu=crypto-neon-fp-armv8" "-mfpu=crypto-neon-fp-armv8 -mfloat-abi=softfp"} {
- if { [check_no_compiler_messages_nocache arm_crypto_ok object {
- #include "arm_neon.h"
-+ extern uint8x16_t vaeseq_u8 (uint8x16_t, uint8x16_t);
- uint8x16_t
- foo (uint8x16_t a, uint8x16_t b)
- {
-@@ -2538,6 +2539,7 @@
- "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
- if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
- #include "arm_neon.h"
-+ extern float16x4_t vcvt_f16_f32 (float32x4_t);
- float16x4_t
- foo (float32x4_t arg)
- {
-@@ -2613,6 +2615,7 @@
- foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
- if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
- #include "arm_neon.h"
-+ extern float32x2_t vfma_f32 (float32x2_t, float32x2_t, float32x2_t);
- float32x2_t
- foo (float32x2_t a, float32x2_t b, float32x2_t c)
- {
-@@ -3324,6 +3327,43 @@
- return $et_vect_shift_saved
- }
-
-+proc check_effective_target_whole_vector_shift { } {
-+ if { [istarget x86_64-*-*]
-+ || [istarget ia64-*-*]
-+ || ([check_effective_target_arm32]
-+ && [check_effective_target_arm_little_endian])
-+ || ([istarget mips*-*-*]
-+ && [check_effective_target_mips_loongson]) } {
-+ set answer 1
-+ } else {
-+ set answer 0
-+ }
-+
-+ verbose "check_effective_target_vect_long: returning $answer" 2
-+ return $answer
-+}
-+
-+# Return 1 if the target supports vector bswap operations.
-+
-+proc check_effective_target_vect_bswap { } {
-+ global et_vect_bswap_saved
-+
-+ if [info exists et_vect_bswap_saved] {
-+ verbose "check_effective_target_vect_bswap: using cached result" 2
-+ } else {
-+ set et_vect_bswap_saved 0
-+ if { [istarget aarch64*-*-*]
-+ || ([istarget arm*-*-*]
-+ && [check_effective_target_arm_neon])
-+ } {
-+ set et_vect_bswap_saved 1
-+ }
-+ }
-+
-+ verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
-+ return $et_vect_bswap_saved
-+}
-+
- # Return 1 if the target supports hardware vector shift operation for char.
-
- proc check_effective_target_vect_shift_char { } {
-@@ -3522,8 +3562,7 @@
- } else {
- set et_vect_perm_saved 0
- if { [is-effective-target arm_neon_ok]
-- || ([istarget aarch64*-*-*]
-- && [is-effective-target aarch64_little_endian])
-+ || [istarget aarch64*-*-*]
- || [istarget powerpc*-*-*]
- || [istarget spu-*-*]
- || [istarget i?86-*-*]
-@@ -5206,16 +5245,26 @@
- return $flags
- }
-
-+if {![info exists flags_to_postpone]} {
-+ set flags_to_postpone ""
-+}
-+
- # Add to FLAGS the flags needed to enable functions to bind locally
- # when using pic/PIC passes in the testsuite.
-+proc add_options_for_bind_pic_locally { flags } {
-+ global flags_to_postpone
-
--proc add_options_for_bind_pic_locally { flags } {
-+ # Instead of returning 'flags' with the -fPIE or -fpie appended, we save it
-+ # in 'flags_to_postpone' and append it later in gcc_target_compile procedure in
-+ # order to make sure that the multilib_flags doesn't override this.
-+
- if {[check_no_compiler_messages using_pic2 assembly {
- #if __PIC__ != 2
- #error FOO
- #endif
- }]} {
-- return "$flags -fPIE"
-+ set flags_to_postpone "-fPIE"
-+ return $flags
- }
- if {[check_no_compiler_messages using_pic1 assembly {
- #if __PIC__ != 1
-@@ -5222,9 +5271,9 @@
- #error FOO
- #endif
- }]} {
-- return "$flags -fpie"
-+ set flags_to_postpone "-fpie"
-+ return $flags
- }
--
- return $flags
- }
-
---- a/src/gcc/testsuite/ChangeLog.linaro
-+++ b/src/gcc/testsuite/ChangeLog.linaro
-@@ -0,0 +1,1031 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2015-01-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r218451.
-+ 2014-12-06 James Greenhalgh <james.greenhalgh@arm.com>
-+ Sebastian Pop <s.pop@samsung.com>
-+ Brian Rzycki <b.rzycki@samsung.com>
-+
-+ PR tree-optimization/54742
-+ * gcc.dg/tree-ssa/ssa-dom-thread-6.c: New test.
-+ * gcc.dg/tree-ssa/ssa-dom-thread-7.c: New test.
-+
-+2015-01-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211075.
-+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ gcc.target/arm/simd/vrev16p8_1.c: New file.
-+ gcc.target/arm/simd/vrev16qp8_1.c: New file.
-+ gcc.target/arm/simd/vrev16qs8_1.c: New file.
-+ gcc.target/arm/simd/vrev16qu8_1.c: New file.
-+ gcc.target/arm/simd/vrev16s8_1.c: New file.
-+ gcc.target/arm/simd/vrev16u8_1.c: New file.
-+ gcc.target/arm/simd/vrev32p16_1.c: New file.
-+ gcc.target/arm/simd/vrev32p8_1.c: New file.
-+ gcc.target/arm/simd/vrev32qp16_1.c: New file.
-+ gcc.target/arm/simd/vrev32qp8_1.c: New file.
-+ gcc.target/arm/simd/vrev32qs16_1.c: New file.
-+ gcc.target/arm/simd/vrev32qs8_1.c: New file.
-+ gcc.target/arm/simd/vrev32qu16_1.c: New file.
-+ gcc.target/arm/simd/vrev32qu8_1.c: New file.
-+ gcc.target/arm/simd/vrev32s16_1.c: New file.
-+ gcc.target/arm/simd/vrev32s8_1.c: New file.
-+ gcc.target/arm/simd/vrev32u16_1.c: New file.
-+ gcc.target/arm/simd/vrev32u8_1.c: New file.
-+ gcc.target/arm/simd/vrev64f32_1.c: New file.
-+ gcc.target/arm/simd/vrev64p16_1.c: New file.
-+ gcc.target/arm/simd/vrev64p8_1.c: New file.
-+ gcc.target/arm/simd/vrev64qf32_1.c: New file.
-+ gcc.target/arm/simd/vrev64qp16_1.c: New file.
-+ gcc.target/arm/simd/vrev64qp8_1.c: New file.
-+ gcc.target/arm/simd/vrev64qs16_1.c: New file.
-+ gcc.target/arm/simd/vrev64qs32_1.c: New file.
-+ gcc.target/arm/simd/vrev64qs8_1.c: New file.
-+ gcc.target/arm/simd/vrev64qu16_1.c: New file.
-+ gcc.target/arm/simd/vrev64qu32_1.c: New file.
-+ gcc.target/arm/simd/vrev64qu8_1.c: New file.
-+ gcc.target/arm/simd/vrev64s16_1.c: New file.
-+ gcc.target/arm/simd/vrev64s32_1.c: New file.
-+ gcc.target/arm/simd/vrev64s8_1.c: New file.
-+ gcc.target/arm/simd/vrev64u16_1.c: New file.
-+ gcc.target/arm/simd/vrev64u32_1.c: New file.
-+ gcc.target/arm/simd/vrev64u8_1.c: New file.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209620.
-+ 2014-04-22 Vidya Praveen <vidyapraveen@arm.com>
-+
-+ * gcc.target/aarch64/cvtf_1.c: New.
-+
-+2015-01-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217362.
-+ 2014-11-11 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * gcc.target/aarch64/vbslq_f64_1.c: New.
-+ * gcc.target/aarch64/vbslq_f64_2.c: Likewise.
-+ * gcc.target/aarch64/vbslq_u64_1.c: Likewise.
-+ * gcc.target/aarch64/vbslq_u64_2.c: Likewise.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r217742.
-+ 2014-11-18 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ PR target/63937
-+ * gcc.dg/memset-2.c: New.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216638.
-+ 2014-10-24 Christophe Lyon <christophe.lyon@linaro.org>
-+
-+ * lib/wrapper.exp ({tool}_maybe_build_wrapper): Clear
-+ wrap_compile_flags before setting it.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216544.
-+ 2014-10-22 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/pic-constantpool1.c: Add explicit declaration.
-+ * gcc.target/aarch64/pic-symrefplus.c: Likewise.
-+ * gcc.target/aarch64/reload-valid-spoff.c: Likewise.
-+ * gcc.target/aarch64/vect.x: Likewise.
-+ * gcc.target/aarch64/vect-ld1r.x: Add return type.
-+ * gcc.target/aarch64/vect-fmax-fmin.c: Likewise.
-+ * gcc.target/aarch64/vect-fp.c: Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216543.
-+ 2014-10-22 Jiong Wang <jiong.wang@arm.com>
-+
-+ * lib/compat.exp (compat-run): Remove "unresolved".
-+ * lib/gcc-defs.exp (${tools}_check_compile): Update code logic for
-+ unsupported testcase.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r216517.
-+ 2014-10-21 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/arm/20031108-1.c (Proc_7): Add explicit declaration.
-+ (Proc_1): Add return type.
-+ * gcc.target/arm/cold-lc.c (show_stack): Add explict declaration.
-+ * gcc.target/arm/neon-modes-2.c (foo): Likewise.
-+ * gcc.target/arm/pr43920-2.c (lseek): Likewise.
-+ * gcc.target/arm/pr44788.c (foo): Likewise.
-+ * gcc.target/arm/pr55642.c (abs): Likewise.
-+ * gcc.target/arm/pr58784.c (f): Likewise.
-+ * gcc.target/arm/pr60650.c (foo1, foo2): Likewise.
-+ * gcc.target/arm/vfp-ldmdbs.c (bar): Likewise.
-+ * gcc.target/arm/vfp-ldmias.c (bar): Likewise.
-+ * gcc.target/arm/pr60650-2.c (fn1, fn2): Add return type and add type
-+ for local variables.
-+ * lib/target-supports.exp
-+ (check_effective_target_arm_crypto_ok_nocache): Add declaration for
-+ vaeseq_u8.
-+ (check_effective_target_arm_neon_fp16_ok_nocache): Add declaration for
-+ vcvt_f16_f32.
-+ (check_effective_target_arm_neonv2_ok_nocache): Add declaration for
-+ vfma_f32.
-+ * gcc.target/arm/pr51968.c: Add -Wno-implicit-function-declaration.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215071.
-+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/int_comparisons_1.c: Tighten regexp.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215540.
-+ 2014-09-24 Zhenqiang Chen <zhenqiang.chen@arm.com>
-+
-+ * gcc.target/arm/pr63210.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215475.
-+ 2014-09-22 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.dg/vect/vect-reduc-or_1.c: New test.
-+ * gcc.dg/vect/vect-reduc-or_2.c: Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215473.
-+ 2014-09-22 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * lib/target-supports.exp (check_effective_target_whole_vector_shift):
-+ New.
-+
-+ * gcc.dg/vect/vect-reduc-mul_1.c: New test.
-+ * gcc.dg/vect/vect-reduc-mul_2.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215177.
-+ 2014-09-11 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vset_lane_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215129.
-+ 2014-09-10 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vstN_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215126.
-+ 2014-09-10 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vldN_lane_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215078.
-+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vldN_dup_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215077.
-+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vld1-vst1_1.c: Rewrite to test all variants.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215072.
-+ 2014-09-09 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vldN_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215047.
-+ 2014-09-09 Tony Wang <tony.wang@arm.com>
-+
-+ * gcc.target/arm/xordi3-opt.c: Disable this
-+ test case for thumb1 target.
-+ * gcc.target/arm/iordi3-opt.c: Ditto.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215046.
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/61749
-+ * gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c: New test.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214950.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vget_high_1.c: New test.
-+ * gcc.target/aarch64/vget_low_1.c: Likewise.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214948.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/int_comparisons.x: New file.
-+ * gcc.target/aarch64/simd/int_comparisons_1.c: New test.
-+ * gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
-+
-+2014-12-04 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213382.
-+ 2014-07-31 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New.
-+ (test_vpaddd_s64): Likewise.
-+ (test_vpaddd_s64): Likewise.
-+ * gcc.target/aarch64/simd/vpaddd_f64: New.
-+ * gcc.target/aarch64/simd/vpaddd_s64: New.
-+ * gcc.target/aarch64/simd/vpaddd_u64: New.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-08 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214825, r214826, r215085.
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/vect-lceilf_1.c: Make input and output arrays global
-+ and 16-byte aligned.
-+ * gcc.target/arm/vect-lfloorf_1.c: Likewise.
-+ * gcc.target/arm/vect-lroundf_1.c: Likewise.
-+ * gcc.target/arm/vect-rounding-btruncf.c: Likewise.
-+ * gcc.target/arm/vect-rounding-ceilf.c: Likewise.
-+ * gcc.target/arm/vect-rounding-floorf.c: Likewise.
-+ * gcc.target/arm/vect-rounding-roundf.c: Likewise.
-+
-+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/62275
-+ * gcc.target/arm/vect-lceilf_1.c: New test.
-+ * gcc.target/arm/vect-lfloorf_1.c: Likewise.
-+ * gcc.target/arm/vect-lroundf_1.c: Likewise.
-+
-+ 2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/62275
-+ * gcc.target/arm/lceil-vcvt_1.c: New test.
-+ * gcc.target/arm/lfloor-vcvt_1.c: Likewise.
-+ * gcc.target/arm/lround-vcvt_1.c: Likewise.
-+
-+2014-10-06 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+ Backport from trunk r214943.
-+ 2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/vrbit_1.c: New test.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215385.
-+ 2014-09-19 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * gcc.dg/ssp-3.c: New.
-+ * gcc.dg/ssp-4.c: Likewise.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215136.
-+ 2014-09-10 Xinliang David Li <davidxl@google.com>
-+
-+ PR target/63209
-+ * gcc.c-torture/execute/pr63209.c: New test.
-+
-+2014-10-06 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215067.
-+ 2014-09-09 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/arm/vect-copysignf.c: New testcase.
-+
-+2014-10-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r215050, r215051, r215052, r215053, r215054.
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+ 2014-09-09 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/pr51835.c: Update expected assembly.
-+ * gcc.target/arm/vfp-1.c: Likewise.
-+ * gcc.target/arm/vfp-ldmdbd.c: Likewise.
-+ * gcc.target/arm/vfp-ldmdbs.c: Likewise.
-+ * gcc.target/arm/vfp-ldmiad.c: Likewise.
-+ * gcc.target/arm/vfp-ldmias.c: Likewise.
-+ * gcc.target/arm/vfp-stmdbd.c: Likewise.
-+ * gcc.target/arm/vfp-stmdbs.c: Likewise.
-+ * gcc.target/arm/vfp-stmiad.c: Likewise.
-+ * gcc.target/arm/vfp-stmias.c: Likewise.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r214526.
-+ 2014-08-26 Joseph Myers <joseph@codesourcery.com>
-+
-+ PR target/60606
-+ PR target/61330
-+ * gcc.dg/torture/pr60606-1.c, gcc.target/arm/pr60606-2.c,
-+ gcc.target/arm/pr60606-3.c, gcc.target/arm/pr60606-4.c: New tests.
-+
-+2014-09-03 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213659.
-+ 2014-08-06 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vdup_n_2.c: New test.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213701.
-+ 2014-08-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.dg/pr61756.c: Remove arm-specific dg-options.
-+
-+2014-08-26 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213488, r213489.
-+ 2014-08-01 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/legitimize_stack_var_before_reload_1.c: New
-+ testcase.
-+
-+2014-08-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212927.
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.dg/ira-shrinkwrap-prep-1.c (target): Add arm_nothumb.
-+ * gcc.dg/ira-shrinkwrap-prep-2.c (target): Likewise.
-+ * gcc.dg/pr10474.c (target): Likewise.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213555.
-+ 2014-08-04 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR target/61713
-+ * gcc.dg/pr61756.c: New test.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r213376.
-+ 2014-07-31 Charles Baylis <charles.baylis@linaro.org>
-+
-+ PR target/61948
-+ * gcc.target/arm/pr61948.c: New test case.
-+
-+2014-08-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212959, r212976, r212999, r213000.
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/test_frame_1.c: Match optimized instruction
-+ sequences.
-+ * gcc.target/aarch64/test_frame_2.c: Likewise.
-+ * gcc.target/aarch64/test_frame_4.c: Likewise.
-+ * gcc.target/aarch64/test_frame_6.c: Likewise.
-+ * gcc.target/aarch64/test_frame_7.c: Likewise.
-+ * gcc.target/aarch64/test_frame_8.c: Likewise.
-+ * gcc.target/aarch64/test_frame_10.c: Likewise.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/test_frame_1.c: Match optimized instruction
-+ sequences.
-+ * gcc.target/aarch64/test_frame_10.c: Likewise.
-+ * gcc.target/aarch64/test_frame_2.c: Likewise.
-+ * gcc.target/aarch64/test_frame_4.c: Likewise.
-+ * gcc.target/aarch64/test_frame_6.c: Likewise.
-+ * gcc.target/aarch64/test_frame_7.c: Likewise.
-+ * gcc.target/aarch64/test_frame_8.c: Likewise.
-+ * gcc.target/aarch64/test_fp_attribute_1.c: Likewise.
-+
-+ 2014-07-24 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/test_frame_12.c: Match optimized instruction
-+ sequences.
-+
-+ 2014-07-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/test_frame_common.h: New file.
-+ * gcc.target/aarch64/test_frame_1.c: Likewise.
-+ * gcc.target/aarch64/test_frame_2.c: Likewise.
-+ * gcc.target/aarch64/test_frame_3.c: Likewise.
-+ * gcc.target/aarch64/test_frame_4.c: Likewise.
-+ * gcc.target/aarch64/test_frame_5.c: Likewise.
-+ * gcc.target/aarch64/test_frame_6.c: Likewise.
-+ * gcc.target/aarch64/test_frame_7.c: Likewise.
-+ * gcc.target/aarch64/test_frame_8.c: Likewise.
-+ * gcc.target/aarch64/test_frame_9.c: Likewise.
-+ * gcc.target/aarch64/test_frame_10.c: Likewise.
-+ * gcc.target/aarch64/test_frame_11.c: Likewise.
-+ * gcc.target/aarch64/test_frame_12.c: Likewise.
-+ * gcc.target/aarch64/test_frame_13.c: Likewise.
-+ * gcc.target/aarch64/test_frame_14.c: Likewise.
-+ * gcc.target/aarch64/test_frame_15.c: Likewise.
-+
-+2014-08-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r212023, r212024.
-+ 2014-06-26 Vidya Praveen <vidyapraveen@arm.com>
-+
-+ * gcc.dg/inline-22.c: Add bind_pic_locally.
-+ * gcc.dg/inline_4.c: Ditto.
-+ * gcc.dg/fail_always_inline.c: Ditto.
-+ * g++.dg/ipa/devirt-25.C: Ditto.
-+
-+ 2014-06-26 Vidya Praveen <vidyapraveen@arm.com>
-+
-+ * lib/target-support.exp (bind_pic_locally): Save the flags to
-+ 'flags_to_postpone' instead of appending to 'flags'.
-+ * lib/gcc.exp (gcc_target_compile): Append board_info's multilib_flags
-+ with flags_to_postpone and revert after target_compile.
-+ * lib/g++.exp (g++_target_compile): Ditto.
-+ * lib/gfortran.exp (gfortran_target_compile): Ditto.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211887.
-+ 2014-06-23 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211441.
-+ 2014-06-11 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/aarch64/acle/acle.exp: New.
-+ * gcc.target/aarch64/acle/crc32b.c: New test.
-+ * gcc.target/aarch64/acle/crc32cb.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32cd.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32ch.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32cw.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32d.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32h.c: Likewise.
-+ * gcc.target/aarch64/acle/crc32w.c: Likewise.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210153.
-+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/vrev16p8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16p8.x: New file.
-+ * gcc.target/aarch64/simd/vrev16qp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16qp8.x: New file.
-+ * gcc.target/aarch64/simd/vrev16qs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16qs8.x: New file.
-+ * gcc.target/aarch64/simd/vrev16qu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16qu8.x: New file.
-+ * gcc.target/aarch64/simd/vrev16s8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16s8.x: New file.
-+ * gcc.target/aarch64/simd/vrev16u8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev16u8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32p16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32p16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32p8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32p8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qp16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qp8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qs16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qs16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qs8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qu16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32qu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32qu8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32s16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32s16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32s8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32s8.x: New file.
-+ * gcc.target/aarch64/simd/vrev32u16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32u16.x: New file.
-+ * gcc.target/aarch64/simd/vrev32u8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev32u8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64f32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64f32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64p16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64p16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64p8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64p8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qf32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qp16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qp8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qs16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qs16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qs32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qs32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qs8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qu16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qu32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64qu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64qu8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64s16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64s16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64s32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64s32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64s8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64s8.x: New file.
-+ * gcc.target/aarch64/simd/vrev64u16_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64u16.x: New file.
-+ * gcc.target/aarch64/simd/vrev64u32_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64u32.x: New file.
-+ * gcc.target/aarch64/simd/vrev64u8_1.c: New file.
-+ * gcc.target/aarch64/simd/vrev64u8.x: New file.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210148, r210151, r210422.
-+ 2014-05-14 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/arm/simd/vtrnqf32_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqp16_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqp8_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqs16_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqs32_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqs8_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqu16_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqu32_1.c: New file.
-+ * gcc.target/arm/simd/vtrnqu8_1.c: New file.
-+ * gcc.target/arm/simd/vtrnf32_1.c: New file.
-+ * gcc.target/arm/simd/vtrnp16_1.c: New file.
-+ * gcc.target/arm/simd/vtrnp8_1.c: New file.
-+ * gcc.target/arm/simd/vtrns16_1.c: New file.
-+ * gcc.target/arm/simd/vtrns32_1.c: New file.
-+ * gcc.target/arm/simd/vtrns8_1.c: New file.
-+ * gcc.target/arm/simd/vtrnu16_1.c: New file.
-+ * gcc.target/arm/simd/vtrnu32_1.c: New file.
-+ * gcc.target/arm/simd/vtrnu8_1.c: New file.
-+
-+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12].
-+ * gcc.target/aarch64/vtrnu32.c: Likewise.
-+ * gcc.target/aarch64/vtrnf32.c: Likewise.
-+
-+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/vtrnf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnf32.x: New file.
-+ * gcc.target/aarch64/simd/vtrnp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnp16.x: New file.
-+ * gcc.target/aarch64/simd/vtrnp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnp8.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqf32.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqp16.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqp8.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqs16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqs16.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqs32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqs32.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqs8.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqu16.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqu32.x: New file.
-+ * gcc.target/aarch64/simd/vtrnqu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnqu8.x: New file.
-+ * gcc.target/aarch64/simd/vtrns16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrns16.x: New file.
-+ * gcc.target/aarch64/simd/vtrns32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrns32.x: New file.
-+ * gcc.target/aarch64/simd/vtrns8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrns8.x: New file.
-+ * gcc.target/aarch64/simd/vtrnu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnu16.x: New file.
-+ * gcc.target/aarch64/simd/vtrnu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnu32.x: New file.
-+ * gcc.target/aarch64/simd/vtrnu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vtrnu8.x: New file.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209794, 209858.
-+ 2014-04-25 Marek Polacek <polacek@redhat.com>
-+
-+ PR c/60114
-+ * gcc.dg/pr60114.c: New test.
-+
-+ 2014-04-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ PR c/60983
-+ * gcc.dg/pr60114.c: Use signed chars.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210861.
-+ 2014-05-23 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/aarch64/tail_indirect_call_1.c: New.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211314.
-+ 2014-06-06 James Greenhalgh <james.greenhalgh@arm.com>
-+
-+ * gcc.dg/tree-ssa/pr42585.c: Skip for AArch64.
-+ * gcc.dg/tree-ssa/sra-12.c: Likewise.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210967.
-+ 2014-05-27 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * lib/target-supports.exp (check_effective_target_vect_bswap):
-+ Specify arm*-*-* support.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r210152, 211059.
-+ 2014-05-29 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/arm/simd/vextQf32_1.c: New file.
-+ * gcc.target/arm/simd/vextQp16_1.c: New file.
-+ * gcc.target/arm/simd/vextQp8_1.c: New file.
-+ * gcc.target/arm/simd/vextQs16_1.c: New file.
-+ * gcc.target/arm/simd/vextQs32_1.c: New file.
-+ * gcc.target/arm/simd/vextQs64_1.c: New file.
-+ * gcc.target/arm/simd/vextQs8_1.c: New file.
-+ * gcc.target/arm/simd/vextQu16_1.c: New file.
-+ * gcc.target/arm/simd/vextQu32_1.c: New file.
-+ * gcc.target/arm/simd/vextQu64_1.c: New file.
-+ * gcc.target/arm/simd/vextQu8_1.c: New file.
-+ * gcc.target/arm/simd/vextQp64_1.c: New file.
-+ * gcc.target/arm/simd/vextf32_1.c: New file.
-+ * gcc.target/arm/simd/vextp16_1.c: New file.
-+ * gcc.target/arm/simd/vextp8_1.c: New file.
-+ * gcc.target/arm/simd/vexts16_1.c: New file.
-+ * gcc.target/arm/simd/vexts32_1.c: New file.
-+ * gcc.target/arm/simd/vexts64_1.c: New file.
-+ * gcc.target/arm/simd/vexts8_1.c: New file.
-+ * gcc.target/arm/simd/vextu16_1.c: New file.
-+ * gcc.target/arm/simd/vextu32_1.c: New file.
-+ * gcc.target/arm/simd/vextu64_1.c: New file.
-+ * gcc.target/arm/simd/vextu8_1.c: New file.
-+ * gcc.target/arm/simd/vextp64_1.c: New file.
-+
-+ 2014-05-07 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/ext_f32.x: New file.
-+ * gcc.target/aarch64/simd/ext_f32_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_p16.x: New file.
-+ * gcc.target/aarch64/simd/ext_p16_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_p8.x: New file.
-+ * gcc.target/aarch64/simd/ext_p8_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_s16.x: New file.
-+ * gcc.target/aarch64/simd/ext_s16_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_s32.x: New file.
-+ * gcc.target/aarch64/simd/ext_s32_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_s64.x: New file.
-+ * gcc.target/aarch64/simd/ext_s64_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_s8.x: New file.
-+ * gcc.target/aarch64/simd/ext_s8_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_u16.x: New file.
-+ * gcc.target/aarch64/simd/ext_u16_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_u32.x: New file.
-+ * gcc.target/aarch64/simd/ext_u32_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_u64.x: New file.
-+ * gcc.target/aarch64/simd/ext_u64_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_u8.x: New file.
-+ * gcc.target/aarch64/simd/ext_u8_1.c: New file.
-+ * gcc.target/aarch64/simd/ext_f64.c: New file.
-+ * gcc.target/aarch64/simd/extq_f32.x: New file.
-+ * gcc.target/aarch64/simd/extq_f32_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_p16.x: New file.
-+ * gcc.target/aarch64/simd/extq_p16_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_p8.x: New file.
-+ * gcc.target/aarch64/simd/extq_p8_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_s16.x: New file.
-+ * gcc.target/aarch64/simd/extq_s16_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_s32.x: New file.
-+ * gcc.target/aarch64/simd/extq_s32_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_s64.x: New file.
-+ * gcc.target/aarch64/simd/extq_s64_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_s8.x: New file.
-+ * gcc.target/aarch64/simd/extq_s8_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_u16.x: New file.
-+ * gcc.target/aarch64/simd/extq_u16_1.c: New file.
-+ * gcc.target/aarch64/simd/extq_u32.x: New file.
-+
-+2014-07-16 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209940, r209943, r209947.
-+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/arm/simd/vuzpqf32_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqp16_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqp8_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqs16_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqs32_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqs8_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqu16_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqu32_1.c: New file.
-+ * gcc.target/arm/simd/vuzpqu8_1.c: New file.
-+ * gcc.target/arm/simd/vuzpf32_1.c: New file.
-+ * gcc.target/arm/simd/vuzpp16_1.c: New file.
-+ * gcc.target/arm/simd/vuzpp8_1.c: New file.
-+ * gcc.target/arm/simd/vuzps16_1.c: New file.
-+ * gcc.target/arm/simd/vuzps32_1.c: New file.
-+ * gcc.target/arm/simd/vuzps8_1.c: New file.
-+ * gcc.target/arm/simd/vuzpu16_1.c: New file.
-+ * gcc.target/arm/simd/vuzpu32_1.c: New file.
-+ * gcc.target/arm/simd/vuzpu8_1.c: New file.
-+
-+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2.
-+ * gcc.target/aarch64/vuzpu32_1.c: Likewise.
-+ * gcc.target/aarch64/vuzpf32_1.c: Likewise.
-+
-+ 2014-04-30 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/vuzpf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpf32.x: New file.
-+ * gcc.target/aarch64/simd/vuzpp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpp16.x: New file.
-+ * gcc.target/aarch64/simd/vuzpp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpp8.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqf32.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqp16.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqp8.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqs16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqs16.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqs32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqs32.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqs8.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqu16.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqu32.x: New file.
-+ * gcc.target/aarch64/simd/vuzpqu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpqu8.x: New file.
-+ * gcc.target/aarch64/simd/vuzps16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzps16.x: New file.
-+ * gcc.target/aarch64/simd/vuzps32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzps32.x: New file.
-+ * gcc.target/aarch64/simd/vuzps8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzps8.x: New file.
-+ * gcc.target/aarch64/simd/vuzpu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpu16.x: New file.
-+ * gcc.target/aarch64/simd/vuzpu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpu32.x: New file.
-+ * gcc.target/aarch64/simd/vuzpu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vuzpu8.x: New file.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r211206.
-+ 2014-06-03 Andrew Pinski <apinski@cavium.com>
-+
-+ * gcc.c-torture/compile/20140528-1.c: New testcase.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209908.
-+ 2013-04-29 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/arm/simd/simd.exp: New file.
-+ * gcc.target/arm/simd/vzipqf32_1.c: New file.
-+ * gcc.target/arm/simd/vzipqp16_1.c: New file.
-+ * gcc.target/arm/simd/vzipqp8_1.c: New file.
-+ * gcc.target/arm/simd/vzipqs16_1.c: New file.
-+ * gcc.target/arm/simd/vzipqs32_1.c: New file.
-+ * gcc.target/arm/simd/vzipqs8_1.c: New file.
-+ * gcc.target/arm/simd/vzipqu16_1.c: New file.
-+ * gcc.target/arm/simd/vzipqu32_1.c: New file.
-+ * gcc.target/arm/simd/vzipqu8_1.c: New file.
-+ * gcc.target/arm/simd/vzipf32_1.c: New file.
-+ * gcc.target/arm/simd/vzipp16_1.c: New file.
-+ * gcc.target/arm/simd/vzipp8_1.c: New file.
-+ * gcc.target/arm/simd/vzips16_1.c: New file.
-+ * gcc.target/arm/simd/vzips32_1.c: New file.
-+ * gcc.target/arm/simd/vzips8_1.c: New file.
-+ * gcc.target/arm/simd/vzipu16_1.c: New file.
-+ * gcc.target/arm/simd/vzipu32_1.c: New file.
-+ * gcc.target/arm/simd/vzipu8_1.c: New file.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209893.
-+ 2014-04-29 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * gcc.target/aarch64/simd/simd.exp: New file.
-+ * gcc.target/aarch64/simd/vzipf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipf32.x: New file.
-+ * gcc.target/aarch64/simd/vzipp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipp16.x: New file.
-+ * gcc.target/aarch64/simd/vzipp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipp8.x: New file.
-+ * gcc.target/aarch64/simd/vzipqf32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqf32.x: New file.
-+ * gcc.target/aarch64/simd/vzipqp16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqp16.x: New file.
-+ * gcc.target/aarch64/simd/vzipqp8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqp8.x: New file.
-+ * gcc.target/aarch64/simd/vzipqs16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqs16.x: New file.
-+ * gcc.target/aarch64/simd/vzipqs32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqs32.x: New file.
-+ * gcc.target/aarch64/simd/vzipqs8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqs8.x: New file.
-+ * gcc.target/aarch64/simd/vzipqu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqu16.x: New file.
-+ * gcc.target/aarch64/simd/vzipqu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqu32.x: New file.
-+ * gcc.target/aarch64/simd/vzipqu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipqu8.x: New file.
-+ * gcc.target/aarch64/simd/vzips16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzips16.x: New file.
-+ * gcc.target/aarch64/simd/vzips32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzips32.x: New file.
-+ * gcc.target/aarch64/simd/vzips8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzips8.x: New file.
-+ * gcc.target/aarch64/simd/vzipu16_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipu16.x: New file.
-+ * gcc.target/aarch64/simd/vzipu32_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipu32.x: New file.
-+ * gcc.target/aarch64/simd/vzipu8_1.c: New file.
-+ * gcc.target/aarch64/simd/vzipu8.x: New file.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209808.
-+ 2014-04-25 Jiong Wang <jiong.wang@arm.com>
-+
-+ * gcc.target/arm/tail-long-call.c: New test.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209749.
-+ 2014-04-24 Alan Lawrence <alan.lawrence@arm.com>
-+
-+ * lib/target-supports.exp (check_effective_target_vect_perm): Return
-+ true for aarch64_be.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209736.
-+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * lib/target-supports.exp (check_effective_target_vect_bswap): New.
-+ * gcc.dg/vect/vect-bswap16: New test.
-+ * gcc.dg/vect/vect-bswap32: Likewise.
-+ * gcc.dg/vect/vect-bswap64: Likewise.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209713.
-+ 2014-04-23 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * gcc.target/aarch64/vdup_lane_1.c: New testcase.
-+ * gcc.target/aarch64/vdup_lane_2.c: New testcase.
-+ * gcc.target/aarch64/vdup_n_1.c: New testcase.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209704, 209705.
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/arm/rev16.c: New test.
-+
-+ 2014-04-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * gcc.target/aarch64/rev16_1.c: New test.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209642.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * gcc.target/aarch64/vreinterpret_f64_1.c: New.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209640.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * gcc.target/aarch64/vqneg_s64_1.c: New testcase.
-+ * gcc.target/aarch64/vqabs_s64_1.c: New testcase.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209613, 209614.
-+ 2014-04-22 Ian Bolton <ian.bolton@arm.com>
-+
-+ * gcc.target/arm/anddi_notdi-1.c: New test.
-+ * gcc.target/arm/iordi_notdi-1.c: New test case.
-+
-+ 2014-04-22 Ian Bolton <ian.bolton@arm.com>
-+
-+ * gcc.target/arm/iordi_notdi-1.c: New test.
-+
-+2014-05-23 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209559.
-+ 2014-04-22 Alex Velenko <Alex.Velenko@arm.com>
-+
-+ * gcc.target/aarch64/vrnd_f64_1.c : New file.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-05-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209889.
-+ 2014-04-29 Zhenqiang Chen <zhenqiang.chen@linaro.org>
-+
-+ * gcc.target/aarch64/fcsel_1.c: New test case.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
-+++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
-@@ -0,0 +1,9 @@
-+unsigned f(unsigned flags, unsigned capabilities)
-+{
-+ unsigned gfp_mask;
-+ unsigned gfp_notmask = 0;
-+ gfp_mask = flags & ((1 << 25) - 1);
-+ if (!(capabilities & 0x00000001))
-+ gfp_mask |= 0x1000000u;
-+ return (gfp_mask & ~gfp_notmask);
-+}
---- a/src/gcc/testsuite/gcc.dg/fail_always_inline.c
-+++ b/src/gcc/testsuite/gcc.dg/fail_always_inline.c
-@@ -1,4 +1,5 @@
- /* { dg-do compile } */
-+/* { dg-add-options bind_pic_locally } */
-
- extern __attribute__ ((always_inline)) void
- bar() { } /* { dg-warning "function might not be inlinable" } */
---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
-+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */
-
- long __attribute__((noinline, noclone))
---- a/src/gcc/testsuite/gcc.dg/pr10474.c
-+++ b/src/gcc/testsuite/gcc.dg/pr10474.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-pro_and_epilogue" } */
-
- void f(int *i)
---- a/src/gcc/testsuite/gcc.dg/ssp-4.c
-+++ b/src/gcc/testsuite/gcc.dg/ssp-4.c
-@@ -0,0 +1,18 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
-+/* { dg-require-effective-target fstack_protector } */
-+
-+typedef unsigned int uint32_t;
-+struct ctx
-+{
-+ uint32_t A;
-+};
-+
-+void *
-+buffer_copy (const struct ctx *ctx, void *resbuf)
-+{
-+ uint32_t buffer[4];
-+ buffer[0] = (ctx->A);
-+ __builtin_memcpy (resbuf, buffer, sizeof (buffer));
-+ return resbuf;
-+}
---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
-+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue" } */
-
- long __attribute__((noinline, noclone))
---- a/src/gcc/testsuite/gcc.dg/inline-22.c
-+++ b/src/gcc/testsuite/gcc.dg/inline-22.c
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-funit-at-a-time -Wno-attributes" } */
-+/* { dg-add-options bind_pic_locally } */
- /* Verify we can inline without a complete prototype and with promoted
- arguments. See also PR32492. */
- __attribute__((always_inline)) void f1() {}
---- a/src/gcc/testsuite/gcc.dg/memset-2.c
-+++ b/src/gcc/testsuite/gcc.dg/memset-2.c
-@@ -0,0 +1,11 @@
-+/* PR target/63937 */
-+/* { dg-do compile { target lp64 } } */
-+/* { dg-options "-O2" } */
-+
-+void
-+foo (char *p)
-+{
-+ p = __builtin_assume_aligned (p, 64);
-+ __builtin_memset (p, 0, 0x100000001ULL);
-+}
-+
---- a/src/gcc/testsuite/gcc.dg/inline_4.c
-+++ b/src/gcc/testsuite/gcc.dg/inline_4.c
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-einline=foo2 -fdisable-ipa-inline -Wno-attributes" } */
-+/* { dg-add-options bind_pic_locally } */
- int g;
- __attribute__((always_inline)) void bar (void)
- {
---- a/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
-+++ b/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-ffat-lto-objects" } */
-+
-+int
-+f (void)
-+{
-+ register unsigned int r asm ("no-such-register"); /* { dg-error "invalid register name" } */
-+ return r;
-+}
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
-@@ -0,0 +1,43 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
-+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */
-+/* { dg-final { cleanup-tree-dump "dom1" } } */
-+
-+int sum0, sum1, sum2, sum3;
-+int foo (char *s, char **ret)
-+{
-+ int state=0;
-+ char c;
-+
-+ for (; *s && state != 4; s++)
-+ {
-+ c = *s;
-+ if (c == '*')
-+ {
-+ s++;
-+ break;
-+ }
-+ switch (state)
-+ {
-+ case 0:
-+ if (c == '+')
-+ state = 1;
-+ else if (c != '-')
-+ sum0+=c;
-+ break;
-+ case 1:
-+ if (c == '+')
-+ state = 2;
-+ else if (c == '-')
-+ state = 0;
-+ else
-+ sum1+=c;
-+ break;
-+ default:
-+ break;
-+ }
-+
-+ }
-+ *ret = s;
-+ return state;
-+}
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
-@@ -35,6 +35,6 @@
- /* Whether the structs are totally scalarized or not depends on the
- MOVE_RATIO macro definition in the back end. The scalarization will
- not take place when using small values for MOVE_RATIO. */
--/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
--/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
- /* { dg-final { cleanup-tree-dump "optimized" } } */
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
-@@ -21,5 +21,5 @@
- *p = l;
- }
-
--/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */
- /* { dg-final { cleanup-tree-dump "release_ssa" } } */
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
-@@ -0,0 +1,127 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
-+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */
-+/* { dg-final { cleanup-tree-dump "dom1" } } */
-+
-+enum STATE {
-+ S0=0,
-+ SI,
-+ S1,
-+ S2,
-+ S3,
-+ S4,
-+ S5,
-+ S6
-+};
-+
-+int bar (enum STATE s);
-+
-+enum STATE foo (unsigned char **y, unsigned *c)
-+{
-+ unsigned char *x = *y;
-+ unsigned char n;
-+ enum STATE s = S0;
-+
-+ for( ; *x && s != SI; x++ )
-+ {
-+ n = *x;
-+ if (n == 'x')
-+ {
-+ x++;
-+ break;
-+ }
-+ switch(s)
-+ {
-+ case S0:
-+ if(bar(n))
-+ s = S3;
-+ else if( n == 'a' || n == 'b' )
-+ s = S1;
-+ else if( n == 'c' )
-+ s = S4;
-+ else
-+ {
-+ s = SI;
-+ c[SI]++;
-+ }
-+ c[S0]++;
-+ break;
-+ case S1:
-+ if(bar(n))
-+ {
-+ s = S3;
-+ c[S1]++;
-+ }
-+ else if( n == 'c' )
-+ {
-+ s = S4;
-+ c[S1]++;
-+ }
-+ else
-+ {
-+ s = SI;
-+ c[S1]++;
-+ }
-+ break;
-+ case S3:
-+ if( n == 'c' )
-+ {
-+ s = S4;
-+ c[S3]++;
-+ }
-+ else if(!bar(n))
-+ {
-+ s = SI;
-+ c[S3]++;
-+ }
-+ break;
-+ case S4:
-+ if( n == 'E' || n == 'e' )
-+ {
-+ s = S2;
-+ c[S4]++;
-+ }
-+ else if(!bar(n))
-+ {
-+ s = SI;
-+ c[S4]++;
-+ }
-+ break;
-+ case S2:
-+ if( n == 'a' || n == 'b' )
-+ {
-+ s = S5;
-+ c[S2]++;
-+ }
-+ else
-+ {
-+ s = SI;
-+ c[S2]++;
-+ }
-+ break;
-+ case S5:
-+ if(bar(n))
-+ {
-+ s = S6;
-+ c[S5]++;
-+ }
-+ else
-+ {
-+ s = SI;
-+ c[S5]++;
-+ }
-+ break;
-+ case S6:
-+ if(!bar(n))
-+ {
-+ s = SI;
-+ c[SI]++;
-+ }
-+ break;
-+ default:
-+ break;
-+ }
-+ }
-+ *y=x;
-+ return s;
-+}
---- a/src/gcc/testsuite/gcc.dg/pr60114.c
-+++ b/src/gcc/testsuite/gcc.dg/pr60114.c
-@@ -0,0 +1,31 @@
-+/* PR c/60114 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wconversion" } */
-+
-+struct S { int n, u[2]; };
-+const signed char z[] = {
-+ [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */
-+ [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */
-+};
-+int A[] = {
-+ 0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */
-+ 0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */
-+ 0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */
-+ };
-+int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */
-+union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */
-+typedef int H[];
-+void
-+foo (void)
-+{
-+ signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */
-+ 1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */
-+ { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */
-+ };
-+ (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */
-+ (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */
-+ struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */
-+ struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */
-+ struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */
-+ H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */
-+}
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
-@@ -0,0 +1,36 @@
-+/* { dg-require-effective-target vect_int_mult } */
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts. */
-+
-+extern void abort(void);
-+
-+unsigned char in[16];
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+ unsigned char i = 0;
-+ unsigned char sum = 1;
-+
-+ for (i = 0; i < 16; i++)
-+ in[i] = i + i + 1;
-+
-+ /* Prevent constant propagation of the entire loop below. */
-+ asm volatile ("" : : : "memory");
-+
-+ for (i = 0; i < 16; i++)
-+ sum *= in[i];
-+
-+ if (sum != 33)
-+ {
-+ __builtin_printf("Failed %d\n", sum);
-+ abort();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
-@@ -0,0 +1,32 @@
-+/* { dg-require-effective-target vect_int_mult } */
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts and folded. */
-+
-+extern void abort(void);
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+ unsigned char in[16];
-+ unsigned char i = 0;
-+ unsigned char sum = 1;
-+
-+ for (i = 0; i < 16; i++)
-+ in[i] = i + i + 1;
-+
-+ for (i = 0; i < 16; i++)
-+ sum *= in[i];
-+
-+ if (sum != 33)
-+ {
-+ __builtin_printf("Failed %d\n", sum);
-+ abort();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
-@@ -0,0 +1,35 @@
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts. */
-+
-+extern void abort(void);
-+
-+unsigned char in[16] __attribute__((__aligned__(16)));
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+ unsigned char i = 0;
-+ unsigned char sum = 1;
-+
-+ for (i = 0; i < 16; i++)
-+ in[i] = (i + i + 1) & 0xfd;
-+
-+ /* Prevent constant propagation of the entire loop below. */
-+ asm volatile ("" : : : "memory");
-+
-+ for (i = 0; i < 16; i++)
-+ sum |= in[i];
-+
-+ if (sum != 29)
-+ {
-+ __builtin_printf("Failed %d\n", sum);
-+ abort();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo32 (unsigned int* a)
-+{
-+ int i = 0;
-+ for (i = 0; i < N; ++i)
-+ a[i] = __builtin_bswap32 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+ unsigned int arr[N];
-+ unsigned int expect[N];
-+ int i;
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ arr[i] = i;
-+ expect[i] = __builtin_bswap32 (i);
-+ if (y) /* Avoid vectorisation. */
-+ abort ();
-+ }
-+
-+ vfoo32 (arr);
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ if (arr[i] != expect[i])
-+ abort ();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
-@@ -0,0 +1,31 @@
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts and folded. */
-+
-+extern void abort(void);
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+ unsigned char in[16] __attribute__((aligned(16)));
-+ unsigned char i = 0;
-+ unsigned char sum = 1;
-+
-+ for (i = 0; i < 16; i++)
-+ in[i] = (i + i + 1) & 0xfd;
-+
-+ for (i = 0; i < 16; i++)
-+ sum |= in[i];
-+
-+ if (sum != 29)
-+ {
-+ __builtin_printf("Failed %d\n", sum);
-+ abort();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo16 (unsigned short int* a)
-+{
-+ int i = 0;
-+ for (i = 0; i < N; ++i)
-+ a[i] = __builtin_bswap16 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+ unsigned short arr[N];
-+ unsigned short expect[N];
-+ int i;
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ arr[i] = i;
-+ expect[i] = __builtin_bswap16 (i);
-+ if (y) /* Avoid vectorisation. */
-+ abort ();
-+ }
-+
-+ vfoo16 (arr);
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ if (arr[i] != expect[i])
-+ abort ();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo64 (unsigned long long* a)
-+{
-+ int i = 0;
-+ for (i = 0; i < N; ++i)
-+ a[i] = __builtin_bswap64 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+ unsigned long long arr[N];
-+ unsigned long long expect[N];
-+ int i;
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ arr[i] = i;
-+ expect[i] = __builtin_bswap64 (i);
-+ if (y) /* Avoid vectorisation. */
-+ abort ();
-+ }
-+
-+ vfoo64 (arr);
-+
-+ for (i = 0; i < N; ++i)
-+ {
-+ if (arr[i] != expect[i])
-+ abort ();
-+ }
-+
-+ return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/ssp-3.c
-+++ b/src/gcc/testsuite/gcc.dg/ssp-3.c
-@@ -0,0 +1,16 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
-+/* { dg-require-effective-target fstack_protector } */
-+
-+extern int bar (const char *s, int *argc);
-+extern int baz (const char *s);
-+
-+char
-+foo (const char *s)
-+{
-+ int argc;
-+ int ret;
-+ if ( !bar (s, &argc))
-+ ret = baz (s);
-+ return *s;
-+}
---- a/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
-+++ b/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-O3 -fdump-ipa-cp" } */
-+/* { dg-add-options bind_pic_locally } */
-
- class ert_RefCounter {
- protected:
---- a/src/gcc/objcp/ChangeLog.linaro
-+++ b/src/gcc/objcp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/cp/ChangeLog.linaro
-+++ b/src/gcc/cp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/compare-elim.c
-+++ b/src/gcc/compare-elim.c
-@@ -100,6 +100,9 @@
- constants. */
- rtx in_a, in_b;
-
-+ /* The REG_EH_REGION of the comparison. */
-+ rtx eh_note;
-+
- /* Information about how this comparison is used. */
- struct comparison_use uses[MAX_CMP_USE];
-
-@@ -262,6 +265,7 @@
- struct comparison *last_cmp;
- rtx insn, next, last_clobber;
- bool last_cmp_valid;
-+ bool need_purge = false;
- bitmap killed;
-
- killed = BITMAP_ALLOC (NULL);
-@@ -303,44 +307,60 @@
- if (src)
- {
- enum machine_mode src_mode = GET_MODE (src);
-+ rtx eh_note = NULL;
-
-- /* Eliminate a compare that's redundant with the previous. */
-- if (last_cmp_valid
-- && rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
-- && rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
-- {
-- rtx flags, x;
-- enum machine_mode new_mode
-- = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
-+ if (flag_non_call_exceptions)
-+ eh_note = find_reg_note (insn, REG_EH_REGION, NULL);
-
-- /* New mode is incompatible with the previous compare mode. */
-- if (new_mode == VOIDmode)
-- continue;
-+ if (!last_cmp_valid)
-+ goto dont_delete;
-
-- if (new_mode != last_cmp->orig_mode)
-- {
-- flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
-+ /* Take care that it's in the same EH region. */
-+ if (flag_non_call_exceptions
-+ && !rtx_equal_p (eh_note, last_cmp->eh_note))
-+ goto dont_delete;
-
-- /* Generate new comparison for substitution. */
-- x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
-- x = gen_rtx_SET (VOIDmode, flags, x);
-+ /* Make sure the compare is redundant with the previous. */
-+ if (!rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
-+ || !rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
-+ goto dont_delete;
-
-- if (!validate_change (last_cmp->insn,
-- &PATTERN (last_cmp->insn), x, false))
-- continue;
-+ /* New mode must be compatible with the previous compare mode. */
-+ {
-+ enum machine_mode new_mode
-+ = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
-+ if (new_mode == VOIDmode)
-+ goto dont_delete;
-
-- last_cmp->orig_mode = new_mode;
-- }
-+ if (new_mode != last_cmp->orig_mode)
-+ {
-+ rtx x, flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
-
-- delete_insn (insn);
-- continue;
-- }
-+ /* Generate new comparison for substitution. */
-+ x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
-+ x = gen_rtx_SET (VOIDmode, flags, x);
-
-+ if (!validate_change (last_cmp->insn,
-+ &PATTERN (last_cmp->insn), x, false))
-+ goto dont_delete;
-+
-+ last_cmp->orig_mode = new_mode;
-+ }
-+ }
-+
-+ /* All tests and substitutions succeeded! */
-+ if (eh_note)
-+ need_purge = true;
-+ delete_insn (insn);
-+ continue;
-+
-+ dont_delete:
- last_cmp = XCNEW (struct comparison);
- last_cmp->insn = insn;
- last_cmp->prev_clobber = last_clobber;
- last_cmp->in_a = XEXP (src, 0);
- last_cmp->in_b = XEXP (src, 1);
-+ last_cmp->eh_note = eh_note;
- last_cmp->orig_mode = src_mode;
- all_compares.safe_push (last_cmp);
-
-@@ -404,6 +424,11 @@
- }
- }
- }
-+
-+ /* If we deleted a compare with a REG_EH_REGION note, we may need to
-+ remove EH edges. */
-+ if (need_purge)
-+ purge_dead_edges (bb);
- }
-
- /* Find all comparisons in the function. */
---- a/src/gcc/ira-int.h
-+++ b/src/gcc/ira-int.h
-@@ -281,6 +281,9 @@
- /* Mode of the allocno which is the mode of the corresponding
- pseudo-register. */
- ENUM_BITFIELD (machine_mode) mode : 8;
-+ /* Widest mode of the allocno which in at least one case could be
-+ for paradoxical subregs where wmode > mode. */
-+ ENUM_BITFIELD (machine_mode) wmode : 8;
- /* Register class which should be used for allocation for given
- allocno. NO_REGS means that we should use memory. */
- ENUM_BITFIELD (reg_class) aclass : 16;
-@@ -313,7 +316,7 @@
- number (0, ...) - 2. Value -1 is used for allocnos spilled by the
- reload (at this point pseudo-register has only one allocno) which
- did not get stack slot yet. */
-- short int hard_regno;
-+ signed int hard_regno : 16;
- /* Allocnos with the same regno are linked by the following member.
- Allocnos corresponding to inner loops are first in the list (it
- corresponds to depth-first traverse of the loops). */
-@@ -430,6 +433,7 @@
- #define ALLOCNO_BAD_SPILL_P(A) ((A)->bad_spill_p)
- #define ALLOCNO_ASSIGNED_P(A) ((A)->assigned_p)
- #define ALLOCNO_MODE(A) ((A)->mode)
-+#define ALLOCNO_WMODE(A) ((A)->wmode)
- #define ALLOCNO_PREFS(A) ((A)->allocno_prefs)
- #define ALLOCNO_COPIES(A) ((A)->allocno_copies)
- #define ALLOCNO_HARD_REG_COSTS(A) ((A)->hard_reg_costs)
---- a/src/gcc/ira-color.c
-+++ b/src/gcc/ira-color.c
-@@ -1711,6 +1711,7 @@
- {
- ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
- enum reg_class conflict_aclass;
-+ allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a);
-
- /* Reload can give another class so we need to check all
- allocnos. */
-@@ -1782,7 +1783,12 @@
- hard_regno = ira_class_hard_regs[aclass][j];
- ira_assert (hard_regno >= 0);
- k = ira_class_hard_reg_index[conflict_aclass][hard_regno];
-- if (k < 0)
-+ if (k < 0
-+ /* If HARD_REGNO is not available for CONFLICT_A,
-+ the conflict would be ignored, since HARD_REGNO
-+ will never be assigned to CONFLICT_A. */
-+ || !TEST_HARD_REG_BIT (data->profitable_hard_regs,
-+ hard_regno))
- continue;
- full_costs[j] -= conflict_costs[k];
- }
---- a/src/gcc/ifcvt.c
-+++ b/src/gcc/ifcvt.c
-@@ -1432,10 +1432,17 @@
- end_sequence ();
- }
-
-- /* Don't even try if the comparison operands are weird. */
-+ /* Don't even try if the comparison operands are weird
-+ except that the target supports cbranchcc4. */
- if (! general_operand (cmp_a, GET_MODE (cmp_a))
- || ! general_operand (cmp_b, GET_MODE (cmp_b)))
-- return NULL_RTX;
-+ {
-+#if HAVE_cbranchcc4
-+ if (GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
-+ || cmp_b != const0_rtx)
-+#endif
-+ return NULL_RTX;
-+ }
-
- #if HAVE_conditional_move
- unsignedp = (code == LTU || code == GEU
-@@ -1753,7 +1760,12 @@
- {
- rtx cond, set, insn;
- int reverse;
-+ int allow_cc_mode = false;
-+#if HAVE_cbranchcc4
-+ allow_cc_mode = true;
-+#endif
-
-+
- /* If target is already mentioned in the known condition, return it. */
- if (reg_mentioned_p (target, if_info->cond))
- {
-@@ -1874,7 +1886,7 @@
- }
-
- cond = canonicalize_condition (if_info->jump, cond, reverse,
-- earliest, target, false, true);
-+ earliest, target, allow_cc_mode, true);
- if (! cond || ! reg_mentioned_p (target, cond))
- return NULL;
-
-@@ -2325,6 +2337,10 @@
- {
- rtx cond, set, tmp;
- bool reverse;
-+ int allow_cc_mode = false;
-+#if HAVE_cbranchcc4
-+ allow_cc_mode = true;
-+#endif
-
- if (! any_condjump_p (jump))
- return NULL_RTX;
-@@ -2361,7 +2377,7 @@
- /* Otherwise, fall back on canonicalize_condition to do the dirty
- work of manipulating MODE_CC values and COMPARE rtx codes. */
- tmp = canonicalize_condition (jump, cond, reverse, earliest,
-- NULL_RTX, false, true);
-+ NULL_RTX, allow_cc_mode, true);
-
- /* We don't handle side-effects in the condition, like handling
- REG_INC notes and making sure no duplicate conditions are emitted. */
---- a/src/gcc/expr.c
-+++ b/src/gcc/expr.c
-@@ -68,22 +68,6 @@
- #include "tree-ssa-address.h"
- #include "cfgexpand.h"
-
--/* Decide whether a function's arguments should be processed
-- from first to last or from last to first.
--
-- They should if the stack and args grow in opposite directions, but
-- only if we have push insns. */
--
--#ifdef PUSH_ROUNDING
--
--#ifndef PUSH_ARGS_REVERSED
--#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD)
--#define PUSH_ARGS_REVERSED /* If it's last to first. */
--#endif
--#endif
--
--#endif
--
- #ifndef STACK_PUSH_CODE
- #ifdef STACK_GROWS_DOWNWARD
- #define STACK_PUSH_CODE PRE_DEC
-@@ -172,37 +156,6 @@
- static rtx const_vector_from_tree (tree);
- static void write_complex_part (rtx, rtx, bool);
-
--/* This macro is used to determine whether move_by_pieces should be called
-- to perform a structure copy. */
--#ifndef MOVE_BY_PIECES_P
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether clear_by_pieces should be
-- called to clear storage. */
--#ifndef CLEAR_BY_PIECES_P
--#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-- < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether store_by_pieces should be
-- called to "memset" storage with byte values other than zero. */
--#ifndef SET_BY_PIECES_P
--#define SET_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-- < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether store_by_pieces should be
-- called to "memcpy" storage when the source is a constant string. */
--#ifndef STORE_BY_PIECES_P
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
--#endif
-
- /* This is run to set up which modes can be used
- directly in memory and to initialize the block move optab. It is run
-@@ -843,22 +796,16 @@
- return mode;
- }
-
--/* STORE_MAX_PIECES is the number of bytes at a time that we can
-- store efficiently. Due to internal GCC limitations, this is
-- MOVE_MAX_PIECES limited by the number of bytes GCC can represent
-- for an immediate constant. */
--
--#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
--
- /* Determine whether the LEN bytes can be moved by using several move
- instructions. Return nonzero if a call to move_by_pieces should
- succeed. */
-
- int
--can_move_by_pieces (unsigned HOST_WIDE_INT len ATTRIBUTE_UNUSED,
-- unsigned int align ATTRIBUTE_UNUSED)
-+can_move_by_pieces (unsigned HOST_WIDE_INT len,
-+ unsigned int align)
- {
-- return MOVE_BY_PIECES_P (len, align);
-+ return targetm.use_by_pieces_infrastructure_p (len, align, MOVE_BY_PIECES,
-+ optimize_insn_for_speed_p ());
- }
-
- /* Generate several move instructions to copy LEN bytes from block FROM to
-@@ -1195,7 +1142,7 @@
- set_mem_size (y, INTVAL (size));
- }
-
-- if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align))
-+ if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
- move_by_pieces (x, y, INTVAL (size), align, 0);
- else if (emit_block_move_via_movmem (x, y, size, align,
- expected_align, expected_size,
-@@ -2396,6 +2343,18 @@
- = gen_rtx_EXPR_LIST (mode, gen_rtx_USE (VOIDmode, reg), *call_fusage);
- }
-
-+/* Add a CLOBBER expression for REG to the (possibly empty) list pointed
-+ to by CALL_FUSAGE. REG must denote a hard register. */
-+
-+void
-+clobber_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode)
-+{
-+ gcc_assert (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER);
-+
-+ *call_fusage
-+ = gen_rtx_EXPR_LIST (mode, gen_rtx_CLOBBER (VOIDmode, reg), *call_fusage);
-+}
-+
- /* Add USE expressions to *CALL_FUSAGE for each of NREGS consecutive regs,
- starting at REGNO. All of these registers must be hard registers. */
-
-@@ -2498,9 +2457,11 @@
- if (len == 0)
- return 1;
-
-- if (! (memsetp
-- ? SET_BY_PIECES_P (len, align)
-- : STORE_BY_PIECES_P (len, align)))
-+ if (!targetm.use_by_pieces_infrastructure_p (len, align,
-+ memsetp
-+ ? SET_BY_PIECES
-+ : STORE_BY_PIECES,
-+ optimize_insn_for_speed_p ()))
- return 0;
-
- align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
-@@ -2576,9 +2537,13 @@
- return to;
- }
-
-- gcc_assert (memsetp
-- ? SET_BY_PIECES_P (len, align)
-- : STORE_BY_PIECES_P (len, align));
-+ gcc_assert (targetm.use_by_pieces_infrastructure_p
-+ (len, align,
-+ memsetp
-+ ? SET_BY_PIECES
-+ : STORE_BY_PIECES,
-+ optimize_insn_for_speed_p ()));
-+
- data.constfun = constfun;
- data.constfundata = constfundata;
- data.len = len;
-@@ -2815,7 +2780,9 @@
- align = MEM_ALIGN (object);
-
- if (CONST_INT_P (size)
-- && CLEAR_BY_PIECES_P (INTVAL (size), align))
-+ && targetm.use_by_pieces_infrastructure_p (INTVAL (size), align,
-+ CLEAR_BY_PIECES,
-+ optimize_insn_for_speed_p ()))
- clear_by_pieces (object, INTVAL (size), align);
- else if (set_storage_via_setmem (object, size, const0_rtx, align,
- expected_align, expected_size,
-@@ -4221,7 +4188,7 @@
- && CONST_INT_P (size)
- && skip == 0
- && MEM_ALIGN (xinner) >= align
-- && (MOVE_BY_PIECES_P ((unsigned) INTVAL (size) - used, align))
-+ && can_move_by_pieces ((unsigned) INTVAL (size) - used, align)
- /* Here we avoid the case of a structure whose weak alignment
- forces many pushes of a small amount of data,
- and such small pushes do rounding that causes trouble. */
-@@ -4353,11 +4320,7 @@
- /* Loop over all the words allocated on the stack for this arg. */
- /* We can do it by words, because any scalar bigger than a word
- has a size a multiple of a word. */
--#ifndef PUSH_ARGS_REVERSED
-- for (i = not_stack; i < size; i++)
--#else
- for (i = size - 1; i >= not_stack; i--)
--#endif
- if (i >= not_stack + offset)
- emit_push_insn (operand_subword_force (x, i, mode),
- word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
-@@ -7838,7 +7801,7 @@
- && ! (target != 0 && safe_from_p (target, exp, 1)))
- || TREE_ADDRESSABLE (exp)
- || (tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
-- && (! MOVE_BY_PIECES_P
-+ && (! can_move_by_pieces
- (tree_to_uhwi (TYPE_SIZE_UNIT (type)),
- TYPE_ALIGN (type)))
- && ! mostly_zeros_p (exp))))
---- a/src/gcc/expr.h
-+++ b/src/gcc/expr.h
-@@ -346,6 +346,7 @@
- /* Mark REG as holding a parameter for the next CALL_INSN.
- Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode. */
- extern void use_reg_mode (rtx *, rtx, enum machine_mode);
-+extern void clobber_reg_mode (rtx *, rtx, enum machine_mode);
-
- extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
-
-@@ -356,6 +357,13 @@
- use_reg_mode (fusage, reg, VOIDmode);
- }
-
-+/* Mark REG as clobbered by the call with FUSAGE as CALL_INSN_FUNCTION_USAGE. */
-+static inline void
-+clobber_reg (rtx *fusage, rtx reg)
-+{
-+ clobber_reg_mode (fusage, reg, VOIDmode);
-+}
-+
- /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters
- for the next CALL_INSN. */
- extern void use_regs (rtx *, int, int);
---- a/src/gcc/go/ChangeLog.linaro
-+++ b/src/gcc/go/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/genattrtab.c
-+++ b/src/gcc/genattrtab.c
-@@ -4765,6 +4765,7 @@
-
- static struct bypass_list *all_bypasses;
- static size_t n_bypasses;
-+static size_t n_bypassed;
-
- static void
- gen_bypass_1 (const char *s, size_t len)
-@@ -4810,12 +4811,18 @@
- struct bypass_list *b;
- struct insn_reserv *r;
-
-+ n_bypassed = 0;
-+
- /* The reservation list is likely to be much longer than the bypass
- list. */
- for (r = all_insn_reservs; r; r = r->next)
- for (b = all_bypasses; b; b = b->next)
- if (fnmatch (b->pattern, r->name, 0) == 0)
-- r->bypassed = true;
-+ {
-+ n_bypassed++;
-+ r->bypassed = true;
-+ break;
-+ }
- }
-
- /* Check that attribute NAME is used in define_insn_reservation condition
-@@ -5074,7 +5081,7 @@
- process_bypasses ();
-
- byps_exp = rtx_alloc (COND);
-- XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2);
-+ XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2);
- XEXP (byps_exp, 1) = make_numeric_value (0);
- for (decl = all_insn_reservs, i = 0;
- decl;
---- a/src/gcc/ada/ChangeLog.linaro
-+++ b/src/gcc/ada/ChangeLog.linaro
-@@ -0,0 +1,95 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-05-13 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209653,209866,209871.
-+
-+ 2014-04-28 Richard Henderson <rth@redhat.com>
-+
-+ * gcc-interface/Makefile.in: Support aarch64-linux.
-+
-+ 2014-04-28 Eric Botcazou <ebotcazou@adacore.com>
-+
-+ * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix,
-+ add 'Suffix' parameter and adjust comment.
-+ (Get_External_Name_With_Suffix): Delete.
-+ * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into...
-+ (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add
-+ 'Suffix' parameter.
-+ (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name.
-+ Call Get_External_Name instead of Get_External_Name_With_Suffix.
-+ (Get_Secondary_DT_External_Name): Likewise.
-+ * exp_cg.adb (Write_Call_Info): Likewise.
-+ * exp_disp.adb (Export_DT): Likewise.
-+ (Import_DT): Likewise.
-+ * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC
-+ parameter with False default.
-+ * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly.
-+ * types.h (Fat_Pointer): Rename into...
-+ (String_Pointer): ...this. Add comment on interfacing rules.
-+ * fe.h (Compiler_Abort): Adjust for above renaming.
-+ (Error_Msg_N): Likewise.
-+ (Error_Msg_NE): Likewise.
-+ (Get_External_Name): Likewise. Add third parameter.
-+ (Get_External_Name_With_Suffix): Delete.
-+ * gcc-interface/decl.c (STDCALL_PREFIX): Define.
-+ (create_concat_name): Adjust call to Get_External_Name, remove call to
-+ Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming.
-+ * gcc-interface/trans.c (post_error): Likewise.
-+ (post_error_ne): Likewise.
-+ * gcc-interface/misc.c (internal_error_function): Likewise.
-+
-+ 2014-04-22 Richard Henderson <rth@redhat.com>
-+
-+ * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define.
-+ (__gnat_alternate_stack): Enable for all linux except ia64.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/common/config/aarch64/aarch64-common.c
-+++ b/src/gcc/common/config/aarch64/aarch64-common.c
-@@ -44,6 +44,8 @@
- {
- /* Enable section anchors by default at -O1 or higher. */
- { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
-+ /* Enable -fsched-pressure by default when optimizing. */
-+ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
- /* Enable redundant extension instructions removal at -O2 and higher. */
- { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
- { OPT_LEVELS_NONE, 0, NULL, 0 }
---- a/src/gcc/fortran/ChangeLog.linaro
-+++ b/src/gcc/fortran/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/configure.ac
-+++ b/src/gcc/configure.ac
-@@ -809,7 +809,7 @@
- )
- AC_SUBST(CONFIGURE_SPECS)
-
--ACX_PKGVERSION([GCC])
-+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
- ACX_BUGURL([http://gcc.gnu.org/bugs.html])
-
- # Sanity check enable_languages in case someone does not run the toplevel
---- a/src/gcc/ira-build.c
-+++ b/src/gcc/ira-build.c
-@@ -523,6 +523,7 @@
- ALLOCNO_BAD_SPILL_P (a) = false;
- ALLOCNO_ASSIGNED_P (a) = false;
- ALLOCNO_MODE (a) = (regno < 0 ? VOIDmode : PSEUDO_REGNO_MODE (regno));
-+ ALLOCNO_WMODE (a) = ALLOCNO_MODE (a);
- ALLOCNO_PREFS (a) = NULL;
- ALLOCNO_COPIES (a) = NULL;
- ALLOCNO_HARD_REG_COSTS (a) = NULL;
-@@ -892,6 +893,7 @@
- parent = ALLOCNO_LOOP_TREE_NODE (a)->parent;
- cap = ira_create_allocno (ALLOCNO_REGNO (a), true, parent);
- ALLOCNO_MODE (cap) = ALLOCNO_MODE (a);
-+ ALLOCNO_WMODE (cap) = ALLOCNO_WMODE (a);
- aclass = ALLOCNO_CLASS (a);
- ira_set_allocno_class (cap, aclass);
- ira_create_allocno_objects (cap);
-@@ -1856,9 +1858,9 @@
-
- /* This recursive function creates allocnos corresponding to
- pseudo-registers containing in X. True OUTPUT_P means that X is
-- a lvalue. */
-+ an lvalue. PARENT corresponds to the parent expression of X. */
- static void
--create_insn_allocnos (rtx x, bool output_p)
-+create_insn_allocnos (rtx x, rtx outer, bool output_p)
- {
- int i, j;
- const char *fmt;
-@@ -1873,7 +1875,15 @@
- ira_allocno_t a;
-
- if ((a = ira_curr_regno_allocno_map[regno]) == NULL)
-- a = ira_create_allocno (regno, false, ira_curr_loop_tree_node);
-+ {
-+ a = ira_create_allocno (regno, false, ira_curr_loop_tree_node);
-+ if (outer != NULL && GET_CODE (outer) == SUBREG)
-+ {
-+ enum machine_mode wmode = GET_MODE (outer);
-+ if (GET_MODE_SIZE (wmode) > GET_MODE_SIZE (ALLOCNO_WMODE (a)))
-+ ALLOCNO_WMODE (a) = wmode;
-+ }
-+ }
-
- ALLOCNO_NREFS (a)++;
- ALLOCNO_FREQ (a) += REG_FREQ_FROM_BB (curr_bb);
-@@ -1884,25 +1894,25 @@
- }
- else if (code == SET)
- {
-- create_insn_allocnos (SET_DEST (x), true);
-- create_insn_allocnos (SET_SRC (x), false);
-+ create_insn_allocnos (SET_DEST (x), NULL, true);
-+ create_insn_allocnos (SET_SRC (x), NULL, false);
- return;
- }
- else if (code == CLOBBER)
- {
-- create_insn_allocnos (XEXP (x, 0), true);
-+ create_insn_allocnos (XEXP (x, 0), NULL, true);
- return;
- }
- else if (code == MEM)
- {
-- create_insn_allocnos (XEXP (x, 0), false);
-+ create_insn_allocnos (XEXP (x, 0), NULL, false);
- return;
- }
- else if (code == PRE_DEC || code == POST_DEC || code == PRE_INC ||
- code == POST_INC || code == POST_MODIFY || code == PRE_MODIFY)
- {
-- create_insn_allocnos (XEXP (x, 0), true);
-- create_insn_allocnos (XEXP (x, 0), false);
-+ create_insn_allocnos (XEXP (x, 0), NULL, true);
-+ create_insn_allocnos (XEXP (x, 0), NULL, false);
- return;
- }
-
-@@ -1910,10 +1920,10 @@
- for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
- {
- if (fmt[i] == 'e')
-- create_insn_allocnos (XEXP (x, i), output_p);
-+ create_insn_allocnos (XEXP (x, i), x, output_p);
- else if (fmt[i] == 'E')
- for (j = 0; j < XVECLEN (x, i); j++)
-- create_insn_allocnos (XVECEXP (x, i, j), output_p);
-+ create_insn_allocnos (XVECEXP (x, i, j), x, output_p);
- }
- }
-
-@@ -1932,7 +1942,7 @@
- ira_assert (bb != NULL);
- FOR_BB_INSNS_REVERSE (bb, insn)
- if (NONDEBUG_INSN_P (insn))
-- create_insn_allocnos (PATTERN (insn), false);
-+ create_insn_allocnos (PATTERN (insn), NULL, false);
- /* It might be a allocno living through from one subloop to
- another. */
- EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (bb), FIRST_PSEUDO_REGISTER, i, bi)
---- a/src/gcc/calls.c
-+++ b/src/gcc/calls.c
-@@ -1104,8 +1104,6 @@
- {
- CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far);
- location_t loc = EXPR_LOCATION (exp);
-- /* 1 if scanning parms front to back, -1 if scanning back to front. */
-- int inc;
-
- /* Count arg position in order args appear. */
- int argpos;
-@@ -1116,22 +1114,9 @@
- args_size->var = 0;
-
- /* In this loop, we consider args in the order they are written.
-- We fill up ARGS from the front or from the back if necessary
-- so that in any case the first arg to be pushed ends up at the front. */
-+ We fill up ARGS from the back. */
-
-- if (PUSH_ARGS_REVERSED)
-- {
-- i = num_actuals - 1, inc = -1;
-- /* In this case, must reverse order of args
-- so that we compute and push the last arg first. */
-- }
-- else
-- {
-- i = 0, inc = 1;
-- }
--
-- /* First fill in the actual arguments in the ARGS array, splitting
-- complex arguments if necessary. */
-+ i = num_actuals - 1;
- {
- int j = i;
- call_expr_arg_iterator iter;
-@@ -1140,7 +1125,7 @@
- if (struct_value_addr_value)
- {
- args[j].tree_value = struct_value_addr_value;
-- j += inc;
-+ j--;
- }
- FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
- {
-@@ -1152,17 +1137,17 @@
- {
- tree subtype = TREE_TYPE (argtype);
- args[j].tree_value = build1 (REALPART_EXPR, subtype, arg);
-- j += inc;
-+ j--;
- args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg);
- }
- else
- args[j].tree_value = arg;
-- j += inc;
-+ j--;
- }
- }
-
- /* I counts args in order (to be) pushed; ARGPOS counts in order written. */
-- for (argpos = 0; argpos < num_actuals; i += inc, argpos++)
-+ for (argpos = 0; argpos < num_actuals; i--, argpos++)
- {
- tree type = TREE_TYPE (args[i].tree_value);
- int unsignedp;
-@@ -2952,9 +2937,8 @@
-
- compute_argument_addresses (args, argblock, num_actuals);
-
-- /* If we push args individually in reverse order, perform stack alignment
-- before the first push (the last arg). */
-- if (PUSH_ARGS_REVERSED && argblock == 0
-+ /* Perform stack alignment before the first push (the last arg). */
-+ if (argblock == 0
- && adjusted_args_size.constant > reg_parm_stack_space
- && adjusted_args_size.constant != unadjusted_args_size)
- {
-@@ -3097,12 +3081,6 @@
- sibcall_failure = 1;
- }
-
-- /* If we pushed args in forward order, perform stack alignment
-- after pushing the last arg. */
-- if (!PUSH_ARGS_REVERSED && argblock == 0)
-- anti_adjust_stack (GEN_INT (adjusted_args_size.constant
-- - unadjusted_args_size));
--
- /* If register arguments require space on the stack and stack space
- was not preallocated, allocate stack space here for arguments
- passed in registers. */
-@@ -3152,8 +3130,7 @@
- if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
- {
- int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
-- if (PUSH_ARGS_REVERSED)
-- arg_nr = num_actuals - arg_nr - 1;
-+ arg_nr = num_actuals - arg_nr - 1;
- if (arg_nr >= 0
- && arg_nr < num_actuals
- && args[arg_nr].reg
-@@ -3597,7 +3574,6 @@
- isn't present here, so we default to native calling abi here. */
- tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
- tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
-- int inc;
- int count;
- rtx argblock = 0;
- CUMULATIVE_ARGS args_so_far_v;
-@@ -3946,22 +3922,13 @@
- argblock = push_block (GEN_INT (args_size.constant), 0, 0);
- }
-
-- /* If we push args individually in reverse order, perform stack alignment
-+ /* We push args individually in reverse order, perform stack alignment
- before the first push (the last arg). */
-- if (argblock == 0 && PUSH_ARGS_REVERSED)
-+ if (argblock == 0)
- anti_adjust_stack (GEN_INT (args_size.constant
- - original_args_size.constant));
-
-- if (PUSH_ARGS_REVERSED)
-- {
-- inc = -1;
-- argnum = nargs - 1;
-- }
-- else
-- {
-- inc = 1;
-- argnum = 0;
-- }
-+ argnum = nargs - 1;
-
- #ifdef REG_PARM_STACK_SPACE
- if (ACCUMULATE_OUTGOING_ARGS)
-@@ -3978,7 +3945,7 @@
-
- /* ARGNUM indexes the ARGVEC array in the order in which the arguments
- are to be pushed. */
-- for (count = 0; count < nargs; count++, argnum += inc)
-+ for (count = 0; count < nargs; count++, argnum--)
- {
- enum machine_mode mode = argvec[argnum].mode;
- rtx val = argvec[argnum].value;
-@@ -4080,17 +4047,8 @@
- }
- }
-
-- /* If we pushed args in forward order, perform stack alignment
-- after pushing the last arg. */
-- if (argblock == 0 && !PUSH_ARGS_REVERSED)
-- anti_adjust_stack (GEN_INT (args_size.constant
-- - original_args_size.constant));
-+ argnum = nargs - 1;
-
-- if (PUSH_ARGS_REVERSED)
-- argnum = nargs - 1;
-- else
-- argnum = 0;
--
- fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
-
- /* Now load any reg parms into their regs. */
-@@ -4097,7 +4055,7 @@
-
- /* ARGNUM indexes the ARGVEC array in the order in which the arguments
- are to be pushed. */
-- for (count = 0; count < nargs; count++, argnum += inc)
-+ for (count = 0; count < nargs; count++, argnum--)
- {
- enum machine_mode mode = argvec[argnum].mode;
- rtx val = argvec[argnum].value;
---- a/src/gcc/cfgexpand.c
-+++ b/src/gcc/cfgexpand.c
-@@ -1292,7 +1292,12 @@
- else if (TREE_CODE (var) == VAR_DECL && DECL_HARD_REGISTER (var))
- {
- if (really_expand)
-- expand_one_hard_reg_var (var);
-+ {
-+ expand_one_hard_reg_var (var);
-+ if (!DECL_HARD_REGISTER (var))
-+ /* Invalid register specification. */
-+ expand_one_error_var (var);
-+ }
- }
- else if (use_register_for_decl (var))
- {
---- a/src/gcc/explow.c
-+++ b/src/gcc/explow.c
-@@ -329,11 +329,13 @@
- an address in the address space's address mode, or vice versa (TO_MODE says
- which way). We take advantage of the fact that pointers are not allowed to
- overflow by commuting arithmetic operations over conversions so that address
-- arithmetic insns can be used. */
-+ arithmetic insns can be used. IN_CONST is true if this conversion is inside
-+ a CONST. */
-
--rtx
--convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED,
-- rtx x, addr_space_t as ATTRIBUTE_UNUSED)
-+static rtx
-+convert_memory_address_addr_space_1 (enum machine_mode to_mode ATTRIBUTE_UNUSED,
-+ rtx x, addr_space_t as ATTRIBUTE_UNUSED,
-+ bool in_const)
- {
- #ifndef POINTERS_EXTEND_UNSIGNED
- gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
-@@ -389,32 +391,29 @@
-
- case CONST:
- return gen_rtx_CONST (to_mode,
-- convert_memory_address_addr_space
-- (to_mode, XEXP (x, 0), as));
-+ convert_memory_address_addr_space_1
-+ (to_mode, XEXP (x, 0), as, true));
- break;
-
- case PLUS:
- case MULT:
-- /* FIXME: For addition, we used to permute the conversion and
-- addition operation only if one operand is a constant and
-- converting the constant does not change it or if one operand
-- is a constant and we are using a ptr_extend instruction
-- (POINTERS_EXTEND_UNSIGNED < 0) even if the resulting address
-- may overflow/underflow. We relax the condition to include
-- zero-extend (POINTERS_EXTEND_UNSIGNED > 0) since the other
-- parts of the compiler depend on it. See PR 49721.
--
-+ /* For addition we can safely permute the conversion and addition
-+ operation if one operand is a constant and converting the constant
-+ does not change it or if one operand is a constant and we are
-+ using a ptr_extend instruction (POINTERS_EXTEND_UNSIGNED < 0).
- We can always safely permute them if we are making the address
-- narrower. */
-+ narrower. Inside a CONST RTL, this is safe for both pointers
-+ zero or sign extended as pointers cannot wrap. */
- if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
- || (GET_CODE (x) == PLUS
- && CONST_INT_P (XEXP (x, 1))
-- && (POINTERS_EXTEND_UNSIGNED != 0
-- || XEXP (x, 1) == convert_memory_address_addr_space
-- (to_mode, XEXP (x, 1), as))))
-+ && ((in_const && POINTERS_EXTEND_UNSIGNED != 0)
-+ || XEXP (x, 1) == convert_memory_address_addr_space_1
-+ (to_mode, XEXP (x, 1), as, in_const)
-+ || POINTERS_EXTEND_UNSIGNED < 0)))
- return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
-- convert_memory_address_addr_space
-- (to_mode, XEXP (x, 0), as),
-+ convert_memory_address_addr_space_1
-+ (to_mode, XEXP (x, 0), as, in_const),
- XEXP (x, 1));
- break;
-
-@@ -426,6 +425,18 @@
- x, POINTERS_EXTEND_UNSIGNED);
- #endif /* defined(POINTERS_EXTEND_UNSIGNED) */
- }
-+
-+/* Given X, a memory address in address space AS' pointer mode, convert it to
-+ an address in the address space's address mode, or vice versa (TO_MODE says
-+ which way). We take advantage of the fact that pointers are not allowed to
-+ overflow by commuting arithmetic operations over conversions so that address
-+ arithmetic insns can be used. */
-+
-+rtx
-+convert_memory_address_addr_space (enum machine_mode to_mode, rtx x, addr_space_t as)
-+{
-+ return convert_memory_address_addr_space_1 (to_mode, x, as, false);
-+}
-
- /* Return something equivalent to X but valid as a memory address for something
- of mode MODE in the named address space AS. When X is not itself valid,
---- a/src/gcc/lto/ChangeLog.linaro
-+++ b/src/gcc/lto/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/po/ChangeLog.linaro
-+++ b/src/gcc/po/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/varasm.c
-+++ b/src/gcc/varasm.c
-@@ -1335,6 +1335,11 @@
- /* As a register variable, it has no section. */
- return;
- }
-+ /* Avoid internal errors from invalid register
-+ specifications. */
-+ SET_DECL_ASSEMBLER_NAME (decl, NULL_TREE);
-+ DECL_HARD_REGISTER (decl) = 0;
-+ return;
- }
- /* Now handle ordinary static variables and functions (in memory).
- Also handle vars declared register invalidly. */
---- a/src/gcc/sched-deps.c
-+++ b/src/gcc/sched-deps.c
-@@ -2828,35 +2828,42 @@
- sched_deps_info->finish_rhs ();
- }
-
--/* Try to group comparison and the following conditional jump INSN if
-- they're already adjacent. This is to prevent scheduler from scheduling
-- them apart. */
-+/* Try to group two fuseable insns together to prevent scheduler
-+ from scheduling them apart. */
-
- static void
--try_group_insn (rtx insn)
-+sched_macro_fuse_insns (rtx insn)
- {
-- unsigned int condreg1, condreg2;
-- rtx cc_reg_1;
- rtx prev;
-
-- if (!any_condjump_p (insn))
-- return;
-+ if (any_condjump_p (insn))
-+ {
-+ unsigned int condreg1, condreg2;
-+ rtx cc_reg_1;
-+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
-+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-+ prev = prev_nonnote_nondebug_insn (insn);
-+ if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
-+ || !prev
-+ || !modified_in_p (cc_reg_1, prev))
-+ return;
-+ }
-+ else
-+ {
-+ rtx insn_set = single_set (insn);
-
-- targetm.fixed_condition_code_regs (&condreg1, &condreg2);
-- cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-- prev = prev_nonnote_nondebug_insn (insn);
-- if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
-- || !prev
-- || !modified_in_p (cc_reg_1, prev))
-- return;
-+ prev = prev_nonnote_nondebug_insn (insn);
-+ if (!prev
-+ || !insn_set
-+ || !single_set (prev)
-+ || !modified_in_p (SET_DEST (insn_set), prev))
-+ return;
-
-- /* Different microarchitectures support macro fusions for different
-- combinations of insn pairs. */
-- if (!targetm.sched.macro_fusion_pair_p
-- || !targetm.sched.macro_fusion_pair_p (prev, insn))
-- return;
-+ }
-
-- SCHED_GROUP_P (insn) = 1;
-+ if (targetm.sched.macro_fusion_pair_p (prev, insn))
-+ SCHED_GROUP_P (insn) = 1;
-+
- }
-
- /* Analyze an INSN with pattern X to find all dependencies. */
-@@ -2885,7 +2892,7 @@
- /* Group compare and branch insns for macro-fusion. */
- if (targetm.sched.macro_fusion_p
- && targetm.sched.macro_fusion_p ())
-- try_group_insn (insn);
-+ sched_macro_fuse_insns (insn);
-
- if (may_trap_p (x))
- /* Avoid moving trapping instructions across function calls that might
---- a/src/gcc/var-tracking.c
-+++ b/src/gcc/var-tracking.c
-@@ -5997,7 +5997,8 @@
- {
- cselib_val *oval = cselib_lookup (oloc, GET_MODE (oloc), 0, VOIDmode);
-
-- gcc_assert (oval != v);
-+ if (oval == v)
-+ return;
- gcc_assert (REG_P (oloc) || MEM_P (oloc));
-
- if (oval && !cselib_preserved_value_p (oval))
---- a/src/gcc/system.h
-+++ b/src/gcc/system.h
-@@ -830,7 +830,8 @@
- CAN_DEBUG_WITHOUT_FP UNLIKELY_EXECUTED_TEXT_SECTION_NAME \
- HOT_TEXT_SECTION_NAME LEGITIMATE_CONSTANT_P ALWAYS_STRIP_DOTDOT \
- OUTPUT_ADDR_CONST_EXTRA SMALL_REGISTER_CLASSES ASM_OUTPUT_IDENT \
-- ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK
-+ ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK CLEAR_BY_PIECES_P \
-+ MOVE_BY_PIECES_P SET_BY_PIECES_P STORE_BY_PIECES_P
-
- /* Target macros only used for code built for the target, that have
- moved to libgcc-tm.h or have never been present elsewhere. */
-@@ -912,7 +913,8 @@
- USE_COMMON_FOR_ONE_ONLY IFCVT_EXTRA_FIELDS IFCVT_INIT_EXTRA_FIELDS \
- CASE_USE_BIT_TESTS FIXUNS_TRUNC_LIKE_FIX_TRUNC \
- GO_IF_MODE_DEPENDENT_ADDRESS DELAY_SLOTS_FOR_EPILOGUE \
-- ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS
-+ ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS \
-+ LARGEST_EXPONENT_IS_NORNAL ROUND_TOWARDS_ZERO
-
- /* Hooks that are no longer used. */
- #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \
---- a/src/gcc/config.gcc
-+++ b/src/gcc/config.gcc
-@@ -312,8 +312,9 @@
- aarch64*-*-*)
- cpu_type=aarch64
- need_64bit_hwint=yes
-- extra_headers="arm_neon.h"
-+ extra_headers="arm_neon.h arm_acle.h"
- extra_objs="aarch64-builtins.o aarch-common.o"
-+ target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
- target_has_targetm_common=yes
- ;;
- alpha*-*-*)
---- a/src/gcc/Makefile.in
-+++ b/src/gcc/Makefile.in
-@@ -814,10 +814,12 @@
- DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
- DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty
- REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX]
-+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]
-
- BASEVER_c := $(shell cat $(BASEVER))
- DEVPHASE_c := $(shell cat $(DEVPHASE))
- DATESTAMP_c := $(shell cat $(DATESTAMP))
-+LINAROVER_c := $(shell cat $(LINAROVER))
-
- ifeq (,$(wildcard $(REVISION)))
- REVISION_c :=
-@@ -838,6 +840,7 @@
- DATESTAMP_s := "\"$(if $(DEVPHASE_c), $(DATESTAMP_c))\""
- PKGVERSION_s:= "\"@PKGVERSION@\""
- BUGURL_s := "\"@REPORT_BUGS_TO@\""
-+LINAROVER_s := "\"$(LINAROVER_c)\""
-
- PKGVERSION := @PKGVERSION@
- BUGURL_TEXI := @REPORT_BUGS_TEXI@
-@@ -2542,8 +2545,9 @@
- -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
- @TARGET_SYSTEM_ROOT_DEFINE@
-
--CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
--cppbuiltin.o: $(BASEVER)
-+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \
-+ -DLINAROVER=$(LINAROVER_s)
-+cppbuiltin.o: $(BASEVER) $(LINAROVER)
-
- CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
-
-@@ -2799,8 +2803,7 @@
- gcov.texi trouble.texi bugreport.texi service.texi \
- contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \
- fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \
-- implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \
-- arm-acle-intrinsics.texi
-+ implement-c.texi implement-cxx.texi
-
- # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with
- # the generated tm.texi; the latter might have a more recent timestamp,
---- a/src/gcc/tree-cfg.c
-+++ b/src/gcc/tree-cfg.c
-@@ -2594,7 +2594,7 @@
- near its "logical" location. This is of most help to humans looking
- at debugging dumps. */
-
--static basic_block
-+basic_block
- split_edge_bb_loc (edge edge_in)
- {
- basic_block dest = edge_in->dest;
---- a/src/gcc/tree-cfg.h
-+++ b/src/gcc/tree-cfg.h
-@@ -62,6 +62,7 @@
- extern tree gimple_block_label (basic_block);
- extern void add_phi_args_after_copy_bb (basic_block);
- extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
-+extern basic_block split_edge_bb_loc (edge);
- extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
- basic_block *, bool);
- extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
---- a/src/gcc/ree.c
-+++ b/src/gcc/ree.c
-@@ -794,6 +794,14 @@
- if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
- return false;
-
-+ enum machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
-+ rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
-+
-+ /* Ensure the number of hard registers of the copy match. */
-+ if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode)
-+ != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg)))
-+ return false;
-+
- /* There's only one reaching def. */
- rtx def_insn = state->defs_list[0];
-
-@@ -843,7 +851,7 @@
- start_sequence ();
- rtx pat = PATTERN (cand->insn);
- rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
-- REGNO (XEXP (SET_SRC (pat), 0)));
-+ REGNO (get_extended_src_reg (SET_SRC (pat))));
- rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
- REGNO (SET_DEST (pat)));
- emit_move_insn (new_dst, new_src);
---- a/src/gcc/config/s390/s390.c
-+++ b/src/gcc/config/s390/s390.c
-@@ -12066,6 +12066,18 @@
- register_pass (&insert_pass_s390_early_mach);
- }
-
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
-+
-+static bool
-+s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int align ATTRIBUTE_UNUSED,
-+ enum by_pieces_operation op ATTRIBUTE_UNUSED,
-+ bool speed_p ATTRIBUTE_UNUSED)
-+{
-+ return (size == 1 || size == 2
-+ || size == 4 || (TARGET_ZARCH && size == 8));
-+}
-+
- /* Initialize GCC target structure. */
-
- #undef TARGET_ASM_ALIGNED_HI_OP
-@@ -12248,6 +12260,10 @@
- #undef TARGET_SET_UP_BY_PROLOGUE
- #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
-
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+ s390_use_by_pieces_infrastructure_p
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
-
- #include "gt-s390.h"
---- a/src/gcc/config/s390/s390.h
-+++ b/src/gcc/config/s390/s390.h
-@@ -752,24 +752,6 @@
- #define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4)
- #define MAX_MOVE_MAX 16
-
--/* Determine whether to use move_by_pieces or block move insn. */
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-- ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4 \
-- || (TARGET_ZARCH && (SIZE) == 8) )
--
--/* Determine whether to use clear_by_pieces or block clear insn. */
--#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
-- ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4 \
-- || (TARGET_ZARCH && (SIZE) == 8) )
--
--/* This macro is used to determine whether store_by_pieces should be
-- called to "memcpy" storage when the source is a constant string. */
--#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
--
--/* Likewise to decide whether to "memset" storage with byte values
-- other than zero. */
--#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
--
- /* Don't perform CSE on function addresses. */
- #define NO_FUNCTION_CSE
-
---- a/src/gcc/config/i386/i386.c
-+++ b/src/gcc/config/i386/i386.c
-@@ -25796,6 +25796,9 @@
- rtx compare_set = NULL_RTX, test_if, cond;
- rtx alu_set = NULL_RTX, addr = NULL_RTX;
-
-+ if (!any_condjump_p (condjmp))
-+ return false;
-+
- if (get_attr_type (condgen) != TYPE_TEST
- && get_attr_type (condgen) != TYPE_ICMP
- && get_attr_type (condgen) != TYPE_INCDEC
---- a/src/gcc/config/sh/sh.c
-+++ b/src/gcc/config/sh/sh.c
-@@ -317,6 +317,10 @@
- static bool sh_legitimate_constant_p (enum machine_mode, rtx);
- static int mov_insn_size (enum machine_mode, bool);
- static int mov_insn_alignment_mask (enum machine_mode, bool);
-+static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+ unsigned int,
-+ enum by_pieces_operation,
-+ bool);
- static bool sequence_insn_p (rtx);
- static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
- static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
-@@ -601,6 +605,10 @@
- #undef TARGET_FIXED_CONDITION_CODE_REGS
- #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
-
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+ sh_use_by_pieces_infrastructure_p
-+
- /* Machine-specific symbol_ref flags. */
- #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
-
-@@ -13533,4 +13541,27 @@
- return NULL_RTX;
- }
-
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
-+
-+static bool
-+sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int align,
-+ enum by_pieces_operation op,
-+ bool speed_p)
-+{
-+ switch (op)
-+ {
-+ case MOVE_BY_PIECES:
-+ return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
-+ < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
-+ case STORE_BY_PIECES:
-+ case SET_BY_PIECES:
-+ return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
-+ < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
-+ default:
-+ return default_use_by_pieces_infrastructure_p (size, align,
-+ op, speed_p);
-+ }
-+}
-+
- #include "gt-sh.h"
---- a/src/gcc/config/sh/sh.h
-+++ b/src/gcc/config/sh/sh.h
-@@ -1584,16 +1584,6 @@
- #define USE_STORE_PRE_DECREMENT(mode) ((mode == SImode || mode == DImode) \
- ? 0 : TARGET_SH1)
-
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-- < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
--
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
-- < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
--
--#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
--
- /* Macros to check register numbers against specific register classes. */
-
- /* These assume that REGNO is a hard or pseudo reg number.
---- a/src/gcc/config/host-linux.c
-+++ b/src/gcc/config/host-linux.c
-@@ -86,6 +86,8 @@
- # define TRY_EMPTY_VM_SPACE 0x60000000
- #elif defined(__mc68000__)
- # define TRY_EMPTY_VM_SPACE 0x40000000
-+#elif defined(__aarch64__) && defined(__ILP32__)
-+# define TRY_EMPTY_VM_SPACE 0x60000000
- #elif defined(__aarch64__)
- # define TRY_EMPTY_VM_SPACE 0x1000000000
- #elif defined(__ARM_EABI__)
---- a/src/gcc/config/cris/cris.h
-+++ b/src/gcc/config/cris/cris.h
-@@ -80,15 +80,7 @@
- /* Which CPU version this is. The parsed and adjusted cris_cpu_str. */
- extern int cris_cpu_version;
-
--/* Changing the order used to be necessary to put the fourth __make_dp
-- argument (a DImode parameter) in registers, to fit with the libfunc
-- parameter passing scheme used for intrinsic functions. FIXME: Check
-- performance. */
--#ifdef IN_LIBGCC2
--#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c)
--#endif
-
--
- /* Node: Driver */
-
- /* Also provide canonical vN definitions when user specifies an alias. */
---- a/src/gcc/config/aarch64/geniterators.sh
-+++ b/src/gcc/config/aarch64/geniterators.sh
-@@ -0,0 +1,45 @@
-+#!/bin/sh
-+#
-+# Copyright (C) 2014 Free Software Foundation, Inc.
-+# Contributed by ARM Ltd.
-+#
-+# This file is part of GCC.
-+#
-+# GCC is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3, or (at your option)
-+# any later version.
-+#
-+# GCC is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3. If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# Generate aarch64-builtin-iterators.h, a file containing a series of
-+# BUILTIN_<ITERATOR> macros, which expand to VAR<N> Macros covering the
-+# same set of modes as the iterator in iterators.md
-+
-+echo "/* -*- buffer-read-only: t -*- */"
-+echo "/* Generated automatically by geniterators.sh from iterators.md. */"
-+echo "#ifndef GCC_AARCH64_ITERATORS_H"
-+echo "#define GCC_AARCH64_ITERATORS_H"
-+
-+# Strip newlines, create records marked ITERATOR, and strip junk (anything
-+# which does not have a matching brace because it contains characters we
-+# don't want to or can't handle (e.g P, PTR iterators change depending on
-+# Pmode and ptr_mode).
-+cat $1 | tr "\n" " " \
-+ | sed 's/(define_mode_iterator \([A-Za-z0-9_]*\) \([]\[A-Z0-9 \t]*\)/\n#define BUILTIN_\1(T, N, MAP) \\ \2\n/g' \
-+ | grep '#define [A-Z0-9_(), \\]* \[[A-Z0-9[:space:]]*]' \
-+ | sed 's/\t//g' \
-+ | sed 's/ \+/ /g' \
-+ | sed 's/ \[\([A-Z0-9 ]*\)]/\n\L\1/' \
-+ | awk ' BEGIN { FS = " " ; OFS = ", "} \
-+ /#/ { print } \
-+ ! /#/ { $1 = $1 ; printf " VAR%d (T, N, MAP, %s)\n", NF, $0 }'
-+
-+echo "#endif /* GCC_AARCH64_ITERATORS_H */"
---- a/src/gcc/config/aarch64/aarch64-simd.md
-+++ b/src/gcc/config/aarch64/aarch64-simd.md
-@@ -19,8 +19,8 @@
- ;; <http://www.gnu.org/licenses/>.
-
- (define_expand "mov<mode>"
-- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
-- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
-+ [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-+ (match_operand:VALL 1 "general_operand" ""))]
- "TARGET_SIMD"
- "
- if (GET_CODE (operands[0]) == MEM)
-@@ -29,8 +29,8 @@
- )
-
- (define_expand "movmisalign<mode>"
-- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
-- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
-+ [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-+ (match_operand:VALL 1 "general_operand" ""))]
- "TARGET_SIMD"
- {
- /* This pattern is not permitted to fail during expansion: if both arguments
-@@ -91,9 +91,9 @@
- )
-
- (define_insn "*aarch64_simd_mov<mode>"
-- [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
-+ [(set (match_operand:VD 0 "nonimmediate_operand"
- "=w, m, w, ?r, ?w, ?r, w")
-- (match_operand:VD 1 "aarch64_simd_general_operand"
-+ (match_operand:VD 1 "general_operand"
- "m, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
- && (register_operand (operands[0], <MODE>mode)
-@@ -119,9 +119,9 @@
- )
-
- (define_insn "*aarch64_simd_mov<mode>"
-- [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
-+ [(set (match_operand:VQ 0 "nonimmediate_operand"
- "=w, m, w, ?r, ?w, ?r, w")
-- (match_operand:VQ 1 "aarch64_simd_general_operand"
-+ (match_operand:VQ 1 "general_operand"
- "m, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
- && (register_operand (operands[0], <MODE>mode)
-@@ -286,6 +286,23 @@
- [(set_attr "type" "neon_mul_<Vetype><q>")]
- )
-
-+(define_insn "bswap<mode>"
-+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
-+ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
-+ "TARGET_SIMD"
-+ "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
-+ [(set_attr "type" "neon_rev<q>")]
-+)
-+
-+(define_insn "aarch64_rbit<mode>"
-+ [(set (match_operand:VB 0 "register_operand" "=w")
-+ (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
-+ UNSPEC_RBIT))]
-+ "TARGET_SIMD"
-+ "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
-+ [(set_attr "type" "neon_rbit")]
-+)
-+
- (define_insn "*aarch64_mul3_elt<mode>"
- [(set (match_operand:VMUL 0 "register_operand" "=w")
- (mult:VMUL
-@@ -954,7 +971,7 @@
- dup\\t%d0, %1.d[0]
- fmov\\t%d0, %1
- dup\\t%d0, %1"
-- [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
-+ [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
- (set_attr "simd" "yes,*,yes")
- (set_attr "fp" "*,yes,*")
- (set_attr "length" "4")]
-@@ -1046,7 +1063,7 @@
- (match_operand:<VHALF> 1 "register_operand" "w,r")
- (vec_select:<VHALF>
- (match_dup 0)
-- (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
-+ (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
- "@
- ins\\t%0.d[1], %1.d[0]
-@@ -1059,7 +1076,7 @@
- (match_operand:<VHALF> 1 "register_operand" "")]
- "TARGET_SIMD"
- {
-- rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
-+ rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
- operands[1], p));
-@@ -1099,7 +1116,7 @@
- ;; For quads.
-
- (define_insn "vec_pack_trunc_<mode>"
-- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
-+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
- (vec_concat:<VNARROWQ2>
- (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
- (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
-@@ -1541,7 +1558,7 @@
- )
-
- ;; Vector versions of the floating-point frint patterns.
--;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
-+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
- (define_insn "<frint_pattern><mode>2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-@@ -1853,15 +1870,15 @@
- ;; bif op0, op1, mask
-
- (define_insn "aarch64_simd_bsl<mode>_internal"
-- [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w")
-- (ior:VALLDIF
-- (and:VALLDIF
-- (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
-- (match_operand:VALLDIF 2 "register_operand" " w,w,0"))
-- (and:VALLDIF
-+ [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
-+ (ior:VSDQ_I_DI
-+ (and:VSDQ_I_DI
- (not:<V_cmp_result>
-- (match_dup:<V_cmp_result> 1))
-- (match_operand:VALLDIF 3 "register_operand" " w,0,w"))
-+ (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w"))
-+ (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w"))
-+ (and:VSDQ_I_DI
-+ (match_dup:<V_cmp_result> 1)
-+ (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0"))
- ))]
- "TARGET_SIMD"
- "@
-@@ -1879,9 +1896,21 @@
- "TARGET_SIMD"
- {
- /* We can't alias operands together if they have different modes. */
-+ rtx tmp = operands[0];
-+ if (FLOAT_MODE_P (<MODE>mode))
-+ {
-+ operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
-+ operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
-+ tmp = gen_reg_rtx (<V_cmp_result>mode);
-+ }
- operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
-- emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
-- operands[2], operands[3]));
-+ emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
-+ operands[1],
-+ operands[2],
-+ operands[3]));
-+ if (tmp != operands[0])
-+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-+
- DONE;
- })
-
-@@ -1895,58 +1924,94 @@
- (match_operand:VDQ 2 "nonmemory_operand")))]
- "TARGET_SIMD"
- {
-- int inverse = 0, has_zero_imm_form = 0;
- rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx mask = gen_reg_rtx (<MODE>mode);
-+ enum rtx_code code = GET_CODE (operands[3]);
-
-- switch (GET_CODE (operands[3]))
-+ /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
-+ and desirable for other comparisons if it results in FOO ? -1 : 0
-+ (this allows direct use of the comparison result without a bsl). */
-+ if (code == NE
-+ || (code != EQ
-+ && op1 == CONST0_RTX (<V_cmp_result>mode)
-+ && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
- {
-+ op1 = operands[2];
-+ op2 = operands[1];
-+ switch (code)
-+ {
-+ case LE: code = GT; break;
-+ case LT: code = GE; break;
-+ case GE: code = LT; break;
-+ case GT: code = LE; break;
-+ /* No case EQ. */
-+ case NE: code = EQ; break;
-+ case LTU: code = GEU; break;
-+ case LEU: code = GTU; break;
-+ case GTU: code = LEU; break;
-+ case GEU: code = LTU; break;
-+ default: gcc_unreachable ();
-+ }
-+ }
-+
-+ /* Make sure we can handle the last operand. */
-+ switch (code)
-+ {
-+ case NE:
-+ /* Normalized to EQ above. */
-+ gcc_unreachable ();
-+
- case LE:
- case LT:
-- case NE:
-- inverse = 1;
-- /* Fall through. */
- case GE:
- case GT:
- case EQ:
-- has_zero_imm_form = 1;
-- break;
-- case LEU:
-- case LTU:
-- inverse = 1;
-- break;
-+ /* These instructions have a form taking an immediate zero. */
-+ if (operands[5] == CONST0_RTX (<MODE>mode))
-+ break;
-+ /* Fall through, as may need to load into register. */
- default:
-+ if (!REG_P (operands[5]))
-+ operands[5] = force_reg (<MODE>mode, operands[5]);
- break;
- }
-
-- if (!REG_P (operands[5])
-- && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
-- operands[5] = force_reg (<MODE>mode, operands[5]);
--
-- switch (GET_CODE (operands[3]))
-+ switch (code)
- {
- case LT:
-+ emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
-+ break;
-+
- case GE:
- emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
- break;
-
- case LE:
-+ emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
-+ break;
-+
- case GT:
- emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
- break;
-
- case LTU:
-+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
-+ break;
-+
- case GEU:
- emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
- break;
-
- case LEU:
-+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
-+ break;
-+
- case GTU:
- emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
- break;
-
-- case NE:
-+ /* NE has been normalized to EQ above. */
- case EQ:
- emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
- break;
-@@ -1955,12 +2020,6 @@
- gcc_unreachable ();
- }
-
-- if (inverse)
-- {
-- op1 = operands[2];
-- op2 = operands[1];
-- }
--
- /* If we have (a = (b CMP c) ? -1 : 0);
- Then we can simply move the generated mask. */
-
-@@ -2348,6 +2407,15 @@
- DONE;
- })
-
-+(define_expand "aarch64_reinterpretdf<mode>"
-+ [(match_operand:DF 0 "register_operand" "")
-+ (match_operand:VD_RE 1 "register_operand" "")]
-+ "TARGET_SIMD"
-+{
-+ aarch64_simd_reinterpret (operands[0], operands[1]);
-+ DONE;
-+})
-+
- (define_expand "aarch64_reinterpretv16qi<mode>"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:VQ 1 "register_operand" "")]
-@@ -2734,9 +2802,9 @@
- ;; <su>q<absneg>
-
- (define_insn "aarch64_s<optab><mode>"
-- [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
-- (UNQOPS:VSDQ_I_BHSI
-- (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
-+ [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
-+ (UNQOPS:VSDQ_I
-+ (match_operand:VSDQ_I 1 "register_operand" "w")))]
- "TARGET_SIMD"
- "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
- [(set_attr "type" "neon_<optab><q>")]
-@@ -3788,26 +3856,46 @@
- )))
- (clobber (reg:CC CC_REGNUM))]
- "TARGET_SIMD"
-- "@
-- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-- cm<optab>\t%d0, %d1, #0
-- #"
-- "reload_completed
-- /* We need to prevent the split from
-- happening in the 'w' constraint cases. */
-- && GP_REGNUM_P (REGNO (operands[0]))
-- && GP_REGNUM_P (REGNO (operands[1]))"
-- [(const_int 0)]
-+ "#"
-+ "reload_completed"
-+ [(set (match_operand:DI 0 "register_operand")
-+ (neg:DI
-+ (COMPARISONS:DI
-+ (match_operand:DI 1 "register_operand")
-+ (match_operand:DI 2 "aarch64_simd_reg_or_zero")
-+ )))]
- {
-- enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
-- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-- DONE;
-+ /* If we are in the general purpose register file,
-+ we split to a sequence of comparison and store. */
-+ if (GP_REGNUM_P (REGNO (operands[0]))
-+ && GP_REGNUM_P (REGNO (operands[1])))
-+ {
-+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
-+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+ DONE;
-+ }
-+ /* Otherwise, we expand to a similar pattern which does not
-+ clobber CC_REGNUM. */
- }
- [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
- )
-
-+(define_insn "*aarch64_cm<optab>di"
-+ [(set (match_operand:DI 0 "register_operand" "=w,w")
-+ (neg:DI
-+ (COMPARISONS:DI
-+ (match_operand:DI 1 "register_operand" "w,w")
-+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
-+ )))]
-+ "TARGET_SIMD && reload_completed"
-+ "@
-+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-+ cm<optab>\t%d0, %d1, #0"
-+ [(set_attr "type" "neon_compare, neon_compare_zero")]
-+)
-+
- ;; cm(hs|hi)
-
- (define_insn "aarch64_cm<optab><mode>"
-@@ -3831,35 +3919,62 @@
- )))
- (clobber (reg:CC CC_REGNUM))]
- "TARGET_SIMD"
-- "@
-- cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-- #"
-- "reload_completed
-- /* We need to prevent the split from
-- happening in the 'w' constraint cases. */
-- && GP_REGNUM_P (REGNO (operands[0]))
-- && GP_REGNUM_P (REGNO (operands[1]))"
-- [(const_int 0)]
-+ "#"
-+ "reload_completed"
-+ [(set (match_operand:DI 0 "register_operand")
-+ (neg:DI
-+ (UCOMPARISONS:DI
-+ (match_operand:DI 1 "register_operand")
-+ (match_operand:DI 2 "aarch64_simd_reg_or_zero")
-+ )))]
- {
-- enum machine_mode mode = CCmode;
-- rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-- rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-- DONE;
-+ /* If we are in the general purpose register file,
-+ we split to a sequence of comparison and store. */
-+ if (GP_REGNUM_P (REGNO (operands[0]))
-+ && GP_REGNUM_P (REGNO (operands[1])))
-+ {
-+ enum machine_mode mode = CCmode;
-+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+ DONE;
-+ }
-+ /* Otherwise, we expand to a similar pattern which does not
-+ clobber CC_REGNUM. */
- }
-- [(set_attr "type" "neon_compare, neon_compare_zero")]
-+ [(set_attr "type" "neon_compare,multiple")]
- )
-
-+(define_insn "*aarch64_cm<optab>di"
-+ [(set (match_operand:DI 0 "register_operand" "=w")
-+ (neg:DI
-+ (UCOMPARISONS:DI
-+ (match_operand:DI 1 "register_operand" "w")
-+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
-+ )))]
-+ "TARGET_SIMD && reload_completed"
-+ "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
-+ [(set_attr "type" "neon_compare")]
-+)
-+
- ;; cmtst
-
-+;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
-+;; we don't have any insns using ne, and aarch64_vcond_internal outputs
-+;; not (neg (eq (and x y) 0))
-+;; which is rewritten by simplify_rtx as
-+;; plus (eq (and x y) 0) -1.
-+
- (define_insn "aarch64_cmtst<mode>"
- [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-- (neg:<V_cmp_result>
-- (ne:<V_cmp_result>
-+ (plus:<V_cmp_result>
-+ (eq:<V_cmp_result>
- (and:VDQ
- (match_operand:VDQ 1 "register_operand" "w")
- (match_operand:VDQ 2 "register_operand" "w"))
-- (vec_duplicate:<V_cmp_result> (const_int 0)))))]
-+ (match_operand:VDQ 3 "aarch64_simd_imm_zero"))
-+ (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
-+ ]
- "TARGET_SIMD"
- "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
- [(set_attr "type" "neon_tst<q>")]
-@@ -3875,23 +3990,44 @@
- (const_int 0))))
- (clobber (reg:CC CC_REGNUM))]
- "TARGET_SIMD"
-- "@
-- cmtst\t%d0, %d1, %d2
-- #"
-- "reload_completed
-- /* We need to prevent the split from
-- happening in the 'w' constraint cases. */
-- && GP_REGNUM_P (REGNO (operands[0]))
-- && GP_REGNUM_P (REGNO (operands[1]))"
-- [(const_int 0)]
-+ "#"
-+ "reload_completed"
-+ [(set (match_operand:DI 0 "register_operand")
-+ (neg:DI
-+ (ne:DI
-+ (and:DI
-+ (match_operand:DI 1 "register_operand")
-+ (match_operand:DI 2 "register_operand"))
-+ (const_int 0))))]
- {
-- rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
-- enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
-- rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
-- rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
-- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-- DONE;
-+ /* If we are in the general purpose register file,
-+ we split to a sequence of comparison and store. */
-+ if (GP_REGNUM_P (REGNO (operands[0]))
-+ && GP_REGNUM_P (REGNO (operands[1])))
-+ {
-+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
-+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
-+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
-+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
-+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+ DONE;
-+ }
-+ /* Otherwise, we expand to a similar pattern which does not
-+ clobber CC_REGNUM. */
- }
-+ [(set_attr "type" "neon_tst,multiple")]
-+)
-+
-+(define_insn "*aarch64_cmtstdi"
-+ [(set (match_operand:DI 0 "register_operand" "=w")
-+ (neg:DI
-+ (ne:DI
-+ (and:DI
-+ (match_operand:DI 1 "register_operand" "w")
-+ (match_operand:DI 2 "register_operand" "w"))
-+ (const_int 0))))]
-+ "TARGET_SIMD"
-+ "cmtst\t%d0, %d1, %d2"
- [(set_attr "type" "neon_tst")]
- )
-
-@@ -3972,6 +4108,16 @@
- [(set_attr "type" "neon_load2_2reg<q>")]
- )
-
-+(define_insn "aarch64_simd_ld2r<mode>"
-+ [(set (match_operand:OI 0 "register_operand" "=w")
-+ (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+ UNSPEC_LD2_DUP))]
-+ "TARGET_SIMD"
-+ "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
-+ [(set_attr "type" "neon_load2_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesoi<mode>"
- [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:OI [(match_operand:OI 1 "register_operand" "w")
-@@ -3982,6 +4128,17 @@
- [(set_attr "type" "neon_store2_2reg<q>")]
- )
-
-+(define_insn "vec_store_lanesoi_lane<mode>"
-+ [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+ (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand" "i")]
-+ UNSPEC_ST2_LANE))]
-+ "TARGET_SIMD"
-+ "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
-+ [(set_attr "type" "neon_store3_one_lane<q>")]
-+)
-+
- (define_insn "vec_load_lanesci<mode>"
- [(set (match_operand:CI 0 "register_operand" "=w")
- (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
-@@ -3992,6 +4149,16 @@
- [(set_attr "type" "neon_load3_3reg<q>")]
- )
-
-+(define_insn "aarch64_simd_ld3r<mode>"
-+ [(set (match_operand:CI 0 "register_operand" "=w")
-+ (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+ UNSPEC_LD3_DUP))]
-+ "TARGET_SIMD"
-+ "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
-+ [(set_attr "type" "neon_load3_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesci<mode>"
- [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:CI [(match_operand:CI 1 "register_operand" "w")
-@@ -4002,6 +4169,17 @@
- [(set_attr "type" "neon_store3_3reg<q>")]
- )
-
-+(define_insn "vec_store_lanesci_lane<mode>"
-+ [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+ (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand" "i")]
-+ UNSPEC_ST3_LANE))]
-+ "TARGET_SIMD"
-+ "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
-+ [(set_attr "type" "neon_store3_one_lane<q>")]
-+)
-+
- (define_insn "vec_load_lanesxi<mode>"
- [(set (match_operand:XI 0 "register_operand" "=w")
- (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
-@@ -4012,6 +4190,16 @@
- [(set_attr "type" "neon_load4_4reg<q>")]
- )
-
-+(define_insn "aarch64_simd_ld4r<mode>"
-+ [(set (match_operand:XI 0 "register_operand" "=w")
-+ (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+ UNSPEC_LD4_DUP))]
-+ "TARGET_SIMD"
-+ "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
-+ [(set_attr "type" "neon_load4_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesxi<mode>"
- [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:XI [(match_operand:XI 1 "register_operand" "w")
-@@ -4022,6 +4210,17 @@
- [(set_attr "type" "neon_store4_4reg<q>")]
- )
-
-+(define_insn "vec_store_lanesxi_lane<mode>"
-+ [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+ (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand" "i")]
-+ UNSPEC_ST4_LANE))]
-+ "TARGET_SIMD"
-+ "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
-+ [(set_attr "type" "neon_store4_one_lane<q>")]
-+)
-+
- ;; Reload patterns for AdvSIMD register list operands.
-
- (define_expand "mov<mode>"
-@@ -4141,6 +4340,45 @@
- aarch64_simd_disambiguate_copy (operands, dest, src, 4);
- })
-
-+(define_expand "aarch64_ld2r<mode>"
-+ [(match_operand:OI 0 "register_operand" "=w")
-+ (match_operand:DI 1 "register_operand" "w")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_TWO_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+ emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
-+ DONE;
-+})
-+
-+(define_expand "aarch64_ld3r<mode>"
-+ [(match_operand:CI 0 "register_operand" "=w")
-+ (match_operand:DI 1 "register_operand" "w")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_THREE_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+ emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
-+ DONE;
-+})
-+
-+(define_expand "aarch64_ld4r<mode>"
-+ [(match_operand:XI 0 "register_operand" "=w")
-+ (match_operand:DI 1 "register_operand" "w")
-+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_FOUR_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+ emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
-+ DONE;
-+})
-+
- (define_insn "aarch64_ld2<mode>_dreg"
- [(set (match_operand:OI 0 "register_operand" "=w")
- (subreg:OI
-@@ -4375,7 +4613,7 @@
- (match_operand:VB 1 "register_operand")
- (match_operand:VB 2 "register_operand")
- (match_operand:VB 3 "register_operand")]
-- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-+ "TARGET_SIMD"
- {
- aarch64_expand_vec_perm (operands[0], operands[1],
- operands[2], operands[3]);
-@@ -4430,6 +4668,44 @@
- [(set_attr "type" "neon_permute<q>")]
- )
-
-+;; Note immediate (third) operand is lane index not byte index.
-+(define_insn "aarch64_ext<mode>"
-+ [(set (match_operand:VALL 0 "register_operand" "=w")
-+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
-+ (match_operand:VALL 2 "register_operand" "w")
-+ (match_operand:SI 3 "immediate_operand" "i")]
-+ UNSPEC_EXT))]
-+ "TARGET_SIMD"
-+{
-+ operands[3] = GEN_INT (INTVAL (operands[3])
-+ * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
-+ return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
-+}
-+ [(set_attr "type" "neon_ext<q>")]
-+)
-+
-+;; This exists solely to check the arguments to the corresponding __builtin.
-+;; Used where we want an error for out-of-range indices which would otherwise
-+;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
-+(define_expand "aarch64_im_lane_boundsi"
-+ [(match_operand:SI 0 "immediate_operand" "i")
-+ (match_operand:SI 1 "immediate_operand" "i")]
-+ "TARGET_SIMD"
-+{
-+ aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
-+ DONE;
-+}
-+)
-+
-+(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
-+ [(set (match_operand:VALL 0 "register_operand" "=w")
-+ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
-+ REVERSE))]
-+ "TARGET_SIMD"
-+ "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
-+ [(set_attr "type" "neon_rev<q>")]
-+)
-+
- (define_insn "aarch64_st2<mode>_dreg"
- [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
- (unspec:TI [(match_operand:OI 1 "register_operand" "w")
-@@ -4516,6 +4792,57 @@
- DONE;
- })
-
-+(define_expand "aarch64_st2_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+ (match_operand:OI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand")]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_TWO_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[0]);
-+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+ emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
-+ operands[1],
-+ operands[2]));
-+ DONE;
-+})
-+
-+(define_expand "aarch64_st3_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+ (match_operand:CI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand")]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_THREE_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[0]);
-+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+ emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
-+ operands[1],
-+ operands[2]));
-+ DONE;
-+})
-+
-+(define_expand "aarch64_st4_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+ (match_operand:XI 1 "register_operand" "w")
-+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+ (match_operand:SI 2 "immediate_operand")]
-+ "TARGET_SIMD"
-+{
-+ enum machine_mode mode = <V_FOUR_ELEM>mode;
-+ rtx mem = gen_rtx_MEM (mode, operands[0]);
-+ operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+ emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
-+ operands[1],
-+ operands[2]));
-+ DONE;
-+})
-+
- (define_expand "aarch64_st1<VALL:mode>"
- [(match_operand:DI 0 "register_operand")
- (match_operand:VALL 1 "register_operand")]
---- a/src/gcc/config/aarch64/predicates.md
-+++ b/src/gcc/config/aarch64/predicates.md
-@@ -26,6 +26,10 @@
- && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
- )
-
-+(define_predicate "aarch64_call_insn_operand"
-+ (ior (match_code "symbol_ref")
-+ (match_operand 0 "register_operand")))
-+
- (define_predicate "aarch64_simd_register"
- (and (match_code "reg")
- (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
-@@ -119,6 +123,10 @@
- (match_test "INTVAL (op) != 0
- && (unsigned) exact_log2 (INTVAL (op)) < 64")))
-
-+(define_predicate "aarch64_mem_pair_offset"
-+ (and (match_code "const_int")
-+ (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
-+
- (define_predicate "aarch64_mem_pair_operand"
- (and (match_code "mem")
- (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
-@@ -194,6 +202,18 @@
- (define_special_predicate "aarch64_comparison_operator"
- (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt"))
-
-+(define_special_predicate "aarch64_comparison_operation"
-+ (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt")
-+{
-+ if (XEXP (op, 1) != const0_rtx)
-+ return false;
-+ rtx op0 = XEXP (op, 0);
-+ if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
-+ return false;
-+ return aarch64_get_condition_code (op) >= 0;
-+})
-+
-+
- ;; True if the operand is memory reference suitable for a load/store exclusive.
- (define_predicate "aarch64_sync_memory_operand"
- (and (match_operand 0 "memory_operand")
-@@ -203,62 +223,15 @@
- (define_special_predicate "vect_par_cnst_hi_half"
- (match_code "parallel")
- {
-- HOST_WIDE_INT count = XVECLEN (op, 0);
-- int nunits = GET_MODE_NUNITS (mode);
-- int i;
--
-- if (count < 1
-- || count != nunits / 2)
-- return false;
--
-- if (!VECTOR_MODE_P (mode))
-- return false;
--
-- for (i = 0; i < count; i++)
-- {
-- rtx elt = XVECEXP (op, 0, i);
-- int val;
--
-- if (GET_CODE (elt) != CONST_INT)
-- return false;
--
-- val = INTVAL (elt);
-- if (val != (nunits / 2) + i)
-- return false;
-- }
-- return true;
-+ return aarch64_simd_check_vect_par_cnst_half (op, mode, true);
- })
-
- (define_special_predicate "vect_par_cnst_lo_half"
- (match_code "parallel")
- {
-- HOST_WIDE_INT count = XVECLEN (op, 0);
-- int nunits = GET_MODE_NUNITS (mode);
-- int i;
--
-- if (count < 1
-- || count != nunits / 2)
-- return false;
--
-- if (!VECTOR_MODE_P (mode))
-- return false;
--
-- for (i = 0; i < count; i++)
-- {
-- rtx elt = XVECEXP (op, 0, i);
-- int val;
--
-- if (GET_CODE (elt) != CONST_INT)
-- return false;
--
-- val = INTVAL (elt);
-- if (val != i)
-- return false;
-- }
-- return true;
-+ return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
- })
-
--
- (define_special_predicate "aarch64_simd_lshift_imm"
- (match_code "const_vector")
- {
-@@ -300,3 +273,9 @@
- {
- return aarch64_simd_imm_zero_p (op, mode);
- })
-+
-+(define_special_predicate "aarch64_simd_imm_minus_one"
-+ (match_code "const_vector")
-+{
-+ return aarch64_const_vec_all_same_int_p (op, -1);
-+})
---- a/src/gcc/config/aarch64/arm_neon.h
-+++ b/src/gcc/config/aarch64/arm_neon.h
-@@ -2113,29 +2113,26 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
-- (int64x1_t) __b);
-+ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-+ (uint64_t) __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -2165,29 +2162,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -2217,29 +2210,26 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
-- (int64x1_t) __b);
-+ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-+ (uint64_t) __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -2269,29 +2259,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -2312,6 +2298,12 @@
- return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
- }
-
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vqneg_s64 (int64x1_t __a)
-+{
-+ return __builtin_aarch64_sqnegdi (__a);
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vqnegq_s8 (int8x16_t __a)
- {
-@@ -2348,6 +2340,12 @@
- return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
- }
-
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vqabs_s64 (int64x1_t __a)
-+{
-+ return __builtin_aarch64_sqabsdi (__a);
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vqabsq_s8 (int8x16_t __a)
- {
-@@ -2637,1352 +2635,1587 @@
- /* vreinterpret */
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_p8_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv8qidf_ps (__a);
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s8 (int8x8_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s16 (int16x4_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s32 (int32x2_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s64 (int64x1_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_f32 (float32x2_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u8 (uint8x8_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u16 (uint16x4_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u32 (uint32x2_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u64 (uint64x1_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_p16 (poly16x4_t __a)
- {
-- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (poly8x8_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_p8_f64 (float64x2_t __a)
-+{
-+ return (poly8x16_t) __a;
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s8 (int8x16_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s16 (int16x8_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s32 (int32x4_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s64 (int64x2_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_f32 (float32x4_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u8 (uint8x16_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-- __a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u16 (uint16x8_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-- __a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u32 (uint32x4_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
-- __a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u64 (uint64x2_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
-- __a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_p16 (poly16x8_t __a)
- {
-- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-- __a);
-+ return (poly8x16_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_p16_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv4hidf_ps (__a);
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s8 (int8x8_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s16 (int16x4_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s32 (int32x2_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s64 (int64x1_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_f32 (float32x2_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u8 (uint8x8_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u16 (uint16x4_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u32 (uint32x2_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u64 (uint64x1_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_p8 (poly8x8_t __a)
- {
-- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (poly16x4_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_p16_f64 (float64x2_t __a)
-+{
-+ return (poly16x8_t) __a;
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s8 (int8x16_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s16 (int16x8_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s32 (int32x4_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s64 (int64x2_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_f32 (float32x4_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u8 (uint8x16_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-- __a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u16 (uint16x8_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u32 (uint32x4_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u64 (uint64x2_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_p8 (poly8x16_t __a)
- {
-- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-- __a);
-+ return (poly16x8_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_f32_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv2sfdf (__a);
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s8 (int8x8_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s16 (int16x4_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s32 (int32x2_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s64 (int64x1_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u8 (uint8x8_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u16 (uint16x4_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
-- __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u32 (uint32x2_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
-- __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u64 (uint64x1_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_p8 (poly8x8_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_p16 (poly16x4_t __a)
- {
-- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
-- __a);
-+ return (float32x2_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_f32_f64 (float64x2_t __a)
-+{
-+ return (float32x4_t) __a;
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s8 (int8x16_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s16 (int16x8_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s32 (int32x4_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s64 (int64x2_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u8 (uint8x16_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u16 (uint16x8_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u32 (uint32x4_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u64 (uint64x2_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_p8 (poly8x16_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_p16 (poly16x8_t __a)
- {
-- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
-- __a);
-+ return (float32x4_t) __a;
- }
-
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_f32 (float32x2_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv2sf (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_p8 (poly8x8_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_p16 (poly16x4_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s8 (int8x8_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv8qi (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s16 (int16x4_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv4hi (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s32 (int32x2_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv2si (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s64 (int64x1_t __a)
-+{
-+ return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u8 (uint8x8_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv8qi_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u16 (uint16x4_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv4hi_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u32 (uint32x2_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdfv2si_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u64 (uint64x1_t __a)
-+{
-+ return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_f32 (float32x4_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_p8 (poly8x16_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_p16 (poly16x8_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s8 (int8x16_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s16 (int16x8_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s32 (int32x4_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s64 (int64x2_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u8 (uint8x16_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u16 (uint16x8_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u32 (uint32x4_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u64 (uint64x2_t __a)
-+{
-+ return (float64x2_t) __a;
-+}
-+
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vreinterpret_s64_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdidf (__a);
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s8 (int8x8_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s16 (int16x4_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s32 (int32x2_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_f32 (float32x2_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u8 (uint8x8_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u16 (uint16x4_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u32 (uint32x2_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u64 (uint64x1_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_p8 (poly8x8_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_p16 (poly16x4_t __a)
- {
-- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+ return (int64x1_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vreinterpretq_s64_f64 (float64x2_t __a)
-+{
-+ return (int64x2_t) __a;
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s8 (int8x16_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s16 (int16x8_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s32 (int32x4_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_f32 (float32x4_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u8 (uint8x16_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u16 (uint16x8_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u32 (uint32x4_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u64 (uint64x2_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_p8 (poly8x16_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_p16 (poly16x8_t __a)
- {
-- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+ return (int64x2_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vreinterpret_u64_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretdidf_us (__a);
-+}
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s8 (int8x8_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s16 (int16x4_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s32 (int32x2_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s64 (int64x1_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_f32 (float32x2_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u8 (uint8x8_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u16 (uint16x4_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u32 (uint32x2_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_p8 (poly8x8_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_p16 (poly16x4_t __a)
- {
-- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+ return (uint64x1_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vreinterpretq_u64_f64 (float64x2_t __a)
-+{
-+ return (uint64x2_t) __a;
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s8 (int8x16_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s16 (int16x8_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s32 (int32x4_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s64 (int64x2_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_f32 (float32x4_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u8 (uint8x16_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
-- __a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u16 (uint16x8_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u32 (uint32x4_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_p8 (poly8x16_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
-- __a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_p16 (poly16x8_t __a)
- {
-- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+ return (uint64x2_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_s8_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv8qidf (__a);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s16 (int16x4_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s32 (int32x2_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s64 (int64x1_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_f32 (float32x2_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u8 (uint8x8_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u16 (uint16x4_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u32 (uint32x2_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u64 (uint64x1_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_p8 (poly8x8_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_p16 (poly16x4_t __a)
- {
-- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (int8x8_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_s8_f64 (float64x2_t __a)
-+{
-+ return (int8x16_t) __a;
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s16 (int16x8_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s32 (int32x4_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s64 (int64x2_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_f32 (float32x4_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u8 (uint8x16_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-- __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u16 (uint16x8_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u32 (uint32x4_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u64 (uint64x2_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_p8 (poly8x16_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-- __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_p16 (poly16x8_t __a)
- {
-- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
-+ return (int8x16_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_s16_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv4hidf (__a);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s8 (int8x8_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s32 (int32x2_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s64 (int64x1_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_f32 (float32x2_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u8 (uint8x8_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u16 (uint16x4_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u32 (uint32x2_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u64 (uint64x1_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_p8 (poly8x8_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_p16 (poly16x4_t __a)
- {
-- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+ return (int16x4_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_s16_f64 (float64x2_t __a)
-+{
-+ return (int16x8_t) __a;
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s8 (int8x16_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s32 (int32x4_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s64 (int64x2_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_f32 (float32x4_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u8 (uint8x16_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u16 (uint16x8_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u32 (uint32x4_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u64 (uint64x2_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_p8 (poly8x16_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_p16 (poly16x8_t __a)
- {
-- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+ return (int16x8_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_s32_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv2sidf (__a);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s8 (int8x8_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s16 (int16x4_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s64 (int64x1_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_f32 (float32x2_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u8 (uint8x8_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u16 (uint16x4_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u32 (uint32x2_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u64 (uint64x1_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_p8 (poly8x8_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_p16 (poly16x4_t __a)
- {
-- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+ return (int32x2_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_s32_f64 (float64x2_t __a)
-+{
-+ return (int32x4_t) __a;
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s8 (int8x16_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s16 (int16x8_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s64 (int64x2_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_f32 (float32x4_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u8 (uint8x16_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u16 (uint16x8_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u32 (uint32x4_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u64 (uint64x2_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_p8 (poly8x16_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_p16 (poly16x8_t __a)
- {
-- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+ return (int32x4_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_u8_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv8qidf_us (__a);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s8 (int8x8_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s16 (int16x4_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s32 (int32x2_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s64 (int64x1_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_f32 (float32x2_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u16 (uint16x4_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u32 (uint32x2_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u64 (uint64x1_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_p8 (poly8x8_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_p16 (poly16x4_t __a)
- {
-- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+ return (uint8x8_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_u8_f64 (float64x2_t __a)
-+{
-+ return (uint8x16_t) __a;
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s8 (int8x16_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s16 (int16x8_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s32 (int32x4_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s64 (int64x2_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_f32 (float32x4_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u16 (uint16x8_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-- __a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u32 (uint32x4_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
-- __a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u64 (uint64x2_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
-- __a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_p8 (poly8x16_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
-- __a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_p16 (poly16x8_t __a)
- {
-- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
-- __a);
-+ return (uint8x16_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_u16_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv4hidf_us (__a);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s8 (int8x8_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s16 (int16x4_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s32 (int32x2_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s64 (int64x1_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_f32 (float32x2_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u8 (uint8x8_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u32 (uint32x2_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u64 (uint64x1_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_p8 (poly8x8_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_p16 (poly16x4_t __a)
- {
-- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+ return (uint16x4_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_u16_f64 (float64x2_t __a)
-+{
-+ return (uint16x8_t) __a;
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s8 (int8x16_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s16 (int16x8_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s32 (int32x4_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s64 (int64x2_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_f32 (float32x4_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u8 (uint8x16_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-- __a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u32 (uint32x4_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u64 (uint64x2_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_p8 (poly8x16_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
-- __a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_p16 (poly16x8_t __a)
- {
-- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+ return (uint16x8_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_u32_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_reinterpretv2sidf_us (__a);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s8 (int8x8_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s16 (int16x4_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s32 (int32x2_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s64 (int64x1_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_f32 (float32x2_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u8 (uint8x8_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u16 (uint16x4_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u64 (uint64x1_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_p8 (poly8x8_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_p16 (poly16x4_t __a)
- {
-- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+ return (uint32x2_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_u32_f64 (float64x2_t __a)
-+{
-+ return (uint32x4_t) __a;
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s8 (int8x16_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s16 (int16x8_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s32 (int32x4_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s64 (int64x2_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_f32 (float32x4_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u8 (uint8x16_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
-- __a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u16 (uint16x8_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u64 (uint64x2_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_p8 (poly8x16_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
-- __a);
-+ return (uint32x4_t) __a;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_p16 (poly16x8_t __a)
- {
-- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+ return (uint32x4_t) __a;
- }
-
- #define __GET_LOW(__TYPE) \
-@@ -4064,6 +4297,85 @@
-
- #undef __GET_LOW
-
-+#define __GET_HIGH(__TYPE) \
-+ uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
-+ uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
-+ return vreinterpret_##__TYPE##_u64 (hi);
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vget_high_f32 (float32x4_t __a)
-+{
-+ __GET_HIGH (f32);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vget_high_f64 (float64x2_t __a)
-+{
-+ __GET_HIGH (f64);
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vget_high_p8 (poly8x16_t __a)
-+{
-+ __GET_HIGH (p8);
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vget_high_p16 (poly16x8_t __a)
-+{
-+ __GET_HIGH (p16);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vget_high_s8 (int8x16_t __a)
-+{
-+ __GET_HIGH (s8);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vget_high_s16 (int16x8_t __a)
-+{
-+ __GET_HIGH (s16);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vget_high_s32 (int32x4_t __a)
-+{
-+ __GET_HIGH (s32);
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vget_high_s64 (int64x2_t __a)
-+{
-+ __GET_HIGH (s64);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vget_high_u8 (uint8x16_t __a)
-+{
-+ __GET_HIGH (u8);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vget_high_u16 (uint16x8_t __a)
-+{
-+ __GET_HIGH (u16);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vget_high_u32 (uint32x4_t __a)
-+{
-+ __GET_HIGH (u32);
-+}
-+
-+#undef __GET_HIGH
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vget_high_u64 (uint64x2_t __a)
-+{
-+ return vcreate_u64 (vgetq_lane_u64 (__a, 1));
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vcombine_s8 (int8x8_t __a, int8x8_t __b)
- {
-@@ -5408,318 +5720,6 @@
- return result;
- }
-
--#define vext_f32(a, b, c) \
-- __extension__ \
-- ({ \
-- float32x2_t b_ = (b); \
-- float32x2_t a_ = (a); \
-- float32x2_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_f64(a, b, c) \
-- __extension__ \
-- ({ \
-- float64x1_t b_ = (b); \
-- float64x1_t a_ = (a); \
-- float64x1_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_p8(a, b, c) \
-- __extension__ \
-- ({ \
-- poly8x8_t b_ = (b); \
-- poly8x8_t a_ = (a); \
-- poly8x8_t result; \
-- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_p16(a, b, c) \
-- __extension__ \
-- ({ \
-- poly16x4_t b_ = (b); \
-- poly16x4_t a_ = (a); \
-- poly16x4_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_s8(a, b, c) \
-- __extension__ \
-- ({ \
-- int8x8_t b_ = (b); \
-- int8x8_t a_ = (a); \
-- int8x8_t result; \
-- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_s16(a, b, c) \
-- __extension__ \
-- ({ \
-- int16x4_t b_ = (b); \
-- int16x4_t a_ = (a); \
-- int16x4_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_s32(a, b, c) \
-- __extension__ \
-- ({ \
-- int32x2_t b_ = (b); \
-- int32x2_t a_ = (a); \
-- int32x2_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_s64(a, b, c) \
-- __extension__ \
-- ({ \
-- int64x1_t b_ = (b); \
-- int64x1_t a_ = (a); \
-- int64x1_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_u8(a, b, c) \
-- __extension__ \
-- ({ \
-- uint8x8_t b_ = (b); \
-- uint8x8_t a_ = (a); \
-- uint8x8_t result; \
-- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_u16(a, b, c) \
-- __extension__ \
-- ({ \
-- uint16x4_t b_ = (b); \
-- uint16x4_t a_ = (a); \
-- uint16x4_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_u32(a, b, c) \
-- __extension__ \
-- ({ \
-- uint32x2_t b_ = (b); \
-- uint32x2_t a_ = (a); \
-- uint32x2_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vext_u64(a, b, c) \
-- __extension__ \
-- ({ \
-- uint64x1_t b_ = (b); \
-- uint64x1_t a_ = (a); \
-- uint64x1_t result; \
-- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_f32(a, b, c) \
-- __extension__ \
-- ({ \
-- float32x4_t b_ = (b); \
-- float32x4_t a_ = (a); \
-- float32x4_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_f64(a, b, c) \
-- __extension__ \
-- ({ \
-- float64x2_t b_ = (b); \
-- float64x2_t a_ = (a); \
-- float64x2_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_p8(a, b, c) \
-- __extension__ \
-- ({ \
-- poly8x16_t b_ = (b); \
-- poly8x16_t a_ = (a); \
-- poly8x16_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_p16(a, b, c) \
-- __extension__ \
-- ({ \
-- poly16x8_t b_ = (b); \
-- poly16x8_t a_ = (a); \
-- poly16x8_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_s8(a, b, c) \
-- __extension__ \
-- ({ \
-- int8x16_t b_ = (b); \
-- int8x16_t a_ = (a); \
-- int8x16_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_s16(a, b, c) \
-- __extension__ \
-- ({ \
-- int16x8_t b_ = (b); \
-- int16x8_t a_ = (a); \
-- int16x8_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_s32(a, b, c) \
-- __extension__ \
-- ({ \
-- int32x4_t b_ = (b); \
-- int32x4_t a_ = (a); \
-- int32x4_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_s64(a, b, c) \
-- __extension__ \
-- ({ \
-- int64x2_t b_ = (b); \
-- int64x2_t a_ = (a); \
-- int64x2_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_u8(a, b, c) \
-- __extension__ \
-- ({ \
-- uint8x16_t b_ = (b); \
-- uint8x16_t a_ = (a); \
-- uint8x16_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_u16(a, b, c) \
-- __extension__ \
-- ({ \
-- uint16x8_t b_ = (b); \
-- uint16x8_t a_ = (a); \
-- uint16x8_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_u32(a, b, c) \
-- __extension__ \
-- ({ \
-- uint32x4_t b_ = (b); \
-- uint32x4_t a_ = (a); \
-- uint32x4_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
--#define vextq_u64(a, b, c) \
-- __extension__ \
-- ({ \
-- uint64x2_t b_ = (b); \
-- uint64x2_t a_ = (a); \
-- uint64x2_t result; \
-- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
-- : "=w"(result) \
-- : "w"(a_), "w"(b_), "i"(c) \
-- : /* No clobbers */); \
-- result; \
-- })
--
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
- {
-@@ -5819,139 +5819,7 @@
- return result;
- }
-
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vget_high_f32 (float32x4_t a)
--{
-- float32x2_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
--vget_high_f64 (float64x2_t a)
--{
-- float64x1_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vget_high_p8 (poly8x16_t a)
--{
-- poly8x8_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vget_high_p16 (poly16x8_t a)
--{
-- poly16x4_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vget_high_s8 (int8x16_t a)
--{
-- int8x8_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vget_high_s16 (int16x8_t a)
--{
-- int16x4_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vget_high_s32 (int32x4_t a)
--{
-- int32x2_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
--vget_high_s64 (int64x2_t a)
--{
-- int64x1_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vget_high_u8 (uint8x16_t a)
--{
-- uint8x8_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vget_high_u16 (uint16x8_t a)
--{
-- uint16x4_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vget_high_u32 (uint32x4_t a)
--{
-- uint32x2_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vget_high_u64 (uint64x2_t a)
--{
-- uint64x1_t result;
-- __asm__ ("ins %0.d[0], %1.d[1]"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vhsub_s8 (int8x8_t a, int8x8_t b)
- {
- int8x8_t result;
-@@ -6784,7 +6652,7 @@
- #define vmlal_high_lane_s16(a, b, c, d) \
- __extension__ \
- ({ \
-- int16x8_t c_ = (c); \
-+ int16x4_t c_ = (c); \
- int16x8_t b_ = (b); \
- int32x4_t a_ = (a); \
- int32x4_t result; \
-@@ -6798,7 +6666,7 @@
- #define vmlal_high_lane_s32(a, b, c, d) \
- __extension__ \
- ({ \
-- int32x4_t c_ = (c); \
-+ int32x2_t c_ = (c); \
- int32x4_t b_ = (b); \
- int64x2_t a_ = (a); \
- int64x2_t result; \
-@@ -6812,7 +6680,7 @@
- #define vmlal_high_lane_u16(a, b, c, d) \
- __extension__ \
- ({ \
-- uint16x8_t c_ = (c); \
-+ uint16x4_t c_ = (c); \
- uint16x8_t b_ = (b); \
- uint32x4_t a_ = (a); \
- uint32x4_t result; \
-@@ -6826,7 +6694,7 @@
- #define vmlal_high_lane_u32(a, b, c, d) \
- __extension__ \
- ({ \
-- uint32x4_t c_ = (c); \
-+ uint32x2_t c_ = (c); \
- uint32x4_t b_ = (b); \
- uint64x2_t a_ = (a); \
- uint64x2_t result; \
-@@ -7237,18 +7105,6 @@
- return result;
- }
-
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
--{
-- float64x2_t result;
-- float64x2_t t1;
-- __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
-- : "=w"(result), "=w"(t1)
-- : "0"(a), "w"(b), "w"(c)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
- {
-@@ -7484,7 +7340,7 @@
- #define vmlsl_high_lane_s16(a, b, c, d) \
- __extension__ \
- ({ \
-- int16x8_t c_ = (c); \
-+ int16x4_t c_ = (c); \
- int16x8_t b_ = (b); \
- int32x4_t a_ = (a); \
- int32x4_t result; \
-@@ -7498,7 +7354,7 @@
- #define vmlsl_high_lane_s32(a, b, c, d) \
- __extension__ \
- ({ \
-- int32x4_t c_ = (c); \
-+ int32x2_t c_ = (c); \
- int32x4_t b_ = (b); \
- int64x2_t a_ = (a); \
- int64x2_t result; \
-@@ -7512,7 +7368,7 @@
- #define vmlsl_high_lane_u16(a, b, c, d) \
- __extension__ \
- ({ \
-- uint16x8_t c_ = (c); \
-+ uint16x4_t c_ = (c); \
- uint16x8_t b_ = (b); \
- uint32x4_t a_ = (a); \
- uint32x4_t result; \
-@@ -7526,7 +7382,7 @@
- #define vmlsl_high_lane_u32(a, b, c, d) \
- __extension__ \
- ({ \
-- uint32x4_t c_ = (c); \
-+ uint32x2_t c_ = (c); \
- uint32x4_t b_ = (b); \
- uint64x2_t a_ = (a); \
- uint64x2_t result; \
-@@ -7937,18 +7793,6 @@
- return result;
- }
-
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
--{
-- float64x2_t result;
-- float64x2_t t1;
-- __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
-- : "=w"(result), "=w"(t1)
-- : "0"(a), "w"(b), "x"(c)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
- {
-@@ -9312,57 +9156,7 @@
- return result;
- }
-
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vpadd_s8 (int8x8_t __a, int8x8_t __b)
--{
-- return __builtin_aarch64_addpv8qi (__a, __b);
--}
--
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vpadd_s16 (int16x4_t __a, int16x4_t __b)
--{
-- return __builtin_aarch64_addpv4hi (__a, __b);
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vpadd_s32 (int32x2_t __a, int32x2_t __b)
--{
-- return __builtin_aarch64_addpv2si (__a, __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
--{
-- return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
--{
-- return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
--{
-- return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
--}
--
--__extension__ static __inline float64_t __attribute__ ((__always_inline__))
--vpaddd_f64 (float64x2_t a)
--{
-- float64_t result;
-- __asm__ ("faddp %d0,%1.2d"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vpaddl_s8 (int8x8_t a)
- {
- int16x4_t result;
-@@ -10556,50 +10350,6 @@
- result; \
- })
-
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrbit_s8 (int8x8_t a)
--{
-- int8x8_t result;
-- __asm__ ("rbit %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrbit_u8 (uint8x8_t a)
--{
-- uint8x8_t result;
-- __asm__ ("rbit %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrbitq_s8 (int8x16_t a)
--{
-- int8x16_t result;
-- __asm__ ("rbit %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrbitq_u8 (uint8x16_t a)
--{
-- uint8x16_t result;
-- __asm__ ("rbit %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrecpe_u32 (uint32x2_t a)
- {
-@@ -10622,402 +10372,6 @@
- return result;
- }
-
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev16_p8 (poly8x8_t a)
--{
-- poly8x8_t result;
-- __asm__ ("rev16 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev16_s8 (int8x8_t a)
--{
-- int8x8_t result;
-- __asm__ ("rev16 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev16_u8 (uint8x8_t a)
--{
-- uint8x8_t result;
-- __asm__ ("rev16 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev16q_p8 (poly8x16_t a)
--{
-- poly8x16_t result;
-- __asm__ ("rev16 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev16q_s8 (int8x16_t a)
--{
-- int8x16_t result;
-- __asm__ ("rev16 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev16q_u8 (uint8x16_t a)
--{
-- uint8x16_t result;
-- __asm__ ("rev16 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev32_p8 (poly8x8_t a)
--{
-- poly8x8_t result;
-- __asm__ ("rev32 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vrev32_p16 (poly16x4_t a)
--{
-- poly16x4_t result;
-- __asm__ ("rev32 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev32_s8 (int8x8_t a)
--{
-- int8x8_t result;
-- __asm__ ("rev32 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vrev32_s16 (int16x4_t a)
--{
-- int16x4_t result;
-- __asm__ ("rev32 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev32_u8 (uint8x8_t a)
--{
-- uint8x8_t result;
-- __asm__ ("rev32 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vrev32_u16 (uint16x4_t a)
--{
-- uint16x4_t result;
-- __asm__ ("rev32 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev32q_p8 (poly8x16_t a)
--{
-- poly8x16_t result;
-- __asm__ ("rev32 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vrev32q_p16 (poly16x8_t a)
--{
-- poly16x8_t result;
-- __asm__ ("rev32 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev32q_s8 (int8x16_t a)
--{
-- int8x16_t result;
-- __asm__ ("rev32 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vrev32q_s16 (int16x8_t a)
--{
-- int16x8_t result;
-- __asm__ ("rev32 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev32q_u8 (uint8x16_t a)
--{
-- uint8x16_t result;
-- __asm__ ("rev32 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vrev32q_u16 (uint16x8_t a)
--{
-- uint16x8_t result;
-- __asm__ ("rev32 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vrev64_f32 (float32x2_t a)
--{
-- float32x2_t result;
-- __asm__ ("rev64 %0.2s,%1.2s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev64_p8 (poly8x8_t a)
--{
-- poly8x8_t result;
-- __asm__ ("rev64 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vrev64_p16 (poly16x4_t a)
--{
-- poly16x4_t result;
-- __asm__ ("rev64 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev64_s8 (int8x8_t a)
--{
-- int8x8_t result;
-- __asm__ ("rev64 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vrev64_s16 (int16x4_t a)
--{
-- int16x4_t result;
-- __asm__ ("rev64 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vrev64_s32 (int32x2_t a)
--{
-- int32x2_t result;
-- __asm__ ("rev64 %0.2s,%1.2s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev64_u8 (uint8x8_t a)
--{
-- uint8x8_t result;
-- __asm__ ("rev64 %0.8b,%1.8b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vrev64_u16 (uint16x4_t a)
--{
-- uint16x4_t result;
-- __asm__ ("rev64 %0.4h,%1.4h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vrev64_u32 (uint32x2_t a)
--{
-- uint32x2_t result;
-- __asm__ ("rev64 %0.2s,%1.2s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vrev64q_f32 (float32x4_t a)
--{
-- float32x4_t result;
-- __asm__ ("rev64 %0.4s,%1.4s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev64q_p8 (poly8x16_t a)
--{
-- poly8x16_t result;
-- __asm__ ("rev64 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vrev64q_p16 (poly16x8_t a)
--{
-- poly16x8_t result;
-- __asm__ ("rev64 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev64q_s8 (int8x16_t a)
--{
-- int8x16_t result;
-- __asm__ ("rev64 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vrev64q_s16 (int16x8_t a)
--{
-- int16x8_t result;
-- __asm__ ("rev64 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vrev64q_s32 (int32x4_t a)
--{
-- int32x4_t result;
-- __asm__ ("rev64 %0.4s,%1.4s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev64q_u8 (uint8x16_t a)
--{
-- uint8x16_t result;
-- __asm__ ("rev64 %0.16b,%1.16b"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vrev64q_u16 (uint16x8_t a)
--{
-- uint16x8_t result;
-- __asm__ ("rev64 %0.8h,%1.8h"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vrev64q_u32 (uint32x4_t a)
--{
-- uint32x4_t result;
-- __asm__ ("rev64 %0.4s,%1.4s"
-- : "=w"(result)
-- : "w"(a)
-- : /* No clobbers */);
-- return result;
--}
--
- #define vrshrn_high_n_s16(a, b, c) \
- __extension__ \
- ({ \
-@@ -11323,17 +10677,6 @@
- return result;
- }
-
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vrsrtsq_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
- {
-@@ -12441,469 +11784,7 @@
- return result;
- }
-
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vtrn1_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vtrn1_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vtrn1_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vtrn1_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vtrn1_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vtrn1_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vtrn1_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vtrn1_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vtrn1_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vtrn1q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vtrn1q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vtrn1q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vtrn1q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vtrn1q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vtrn1q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vtrn2_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vtrn2_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vtrn2_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vtrn2_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vtrn2_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vtrn2_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vtrn2_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vtrn2_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vtrn2_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vtrn2q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vtrn2q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vtrn2q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vtrn2q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vtrn2q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vtrn2q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_p8 (poly8x8_t a, poly8x8_t b)
- {
- uint8x8_t result;
-@@ -12946,930 +11827,7 @@
- : /* No clobbers */);
- return result;
- }
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vuzp1_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
-
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vuzp1_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vuzp1_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vuzp1_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vuzp1_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vuzp1_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vuzp1_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vuzp1_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vuzp1_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vuzp1q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vuzp1q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vuzp1q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vuzp1q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vuzp1q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vuzp1q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vuzp2_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vuzp2_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vuzp2_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vuzp2_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vuzp2_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vuzp2_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vuzp2_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vuzp2_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vuzp2_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vuzp2q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vuzp2q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vuzp2q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vuzp2q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vuzp2q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vuzp2q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vzip1_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vzip1_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vzip1_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vzip1_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vzip1_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vzip1_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vzip1_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vzip1_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vzip1_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vzip1q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vzip1q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vzip1q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vzip1q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vzip1q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vzip1q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vzip1q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vzip1q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vzip1q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vzip1q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vzip1q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vzip1q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vzip2_f32 (float32x2_t a, float32x2_t b)
--{
-- float32x2_t result;
-- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vzip2_p8 (poly8x8_t a, poly8x8_t b)
--{
-- poly8x8_t result;
-- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vzip2_p16 (poly16x4_t a, poly16x4_t b)
--{
-- poly16x4_t result;
-- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vzip2_s8 (int8x8_t a, int8x8_t b)
--{
-- int8x8_t result;
-- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vzip2_s16 (int16x4_t a, int16x4_t b)
--{
-- int16x4_t result;
-- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vzip2_s32 (int32x2_t a, int32x2_t b)
--{
-- int32x2_t result;
-- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vzip2_u8 (uint8x8_t a, uint8x8_t b)
--{
-- uint8x8_t result;
-- __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vzip2_u16 (uint16x4_t a, uint16x4_t b)
--{
-- uint16x4_t result;
-- __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vzip2_u32 (uint32x2_t a, uint32x2_t b)
--{
-- uint32x2_t result;
-- __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vzip2q_f32 (float32x4_t a, float32x4_t b)
--{
-- float32x4_t result;
-- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vzip2q_f64 (float64x2_t a, float64x2_t b)
--{
-- float64x2_t result;
-- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vzip2q_p8 (poly8x16_t a, poly8x16_t b)
--{
-- poly8x16_t result;
-- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vzip2q_p16 (poly16x8_t a, poly16x8_t b)
--{
-- poly16x8_t result;
-- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vzip2q_s8 (int8x16_t a, int8x16_t b)
--{
-- int8x16_t result;
-- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vzip2q_s16 (int16x8_t a, int16x8_t b)
--{
-- int16x8_t result;
-- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vzip2q_s32 (int32x4_t a, int32x4_t b)
--{
-- int32x4_t result;
-- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vzip2q_s64 (int64x2_t a, int64x2_t b)
--{
-- int64x2_t result;
-- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vzip2q_u8 (uint8x16_t a, uint8x16_t b)
--{
-- uint8x16_t result;
-- __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vzip2q_u16 (uint16x8_t a, uint16x8_t b)
--{
-- uint16x8_t result;
-- __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vzip2q_u32 (uint32x4_t a, uint32x4_t b)
--{
-- uint32x4_t result;
-- __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vzip2q_u64 (uint64x2_t a, uint64x2_t b)
--{
-- uint64x2_t result;
-- __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
-- : "=w"(result)
-- : "w"(a), "w"(b)
-- : /* No clobbers */);
-- return result;
--}
--
- /* End of temporary inline asm implementations. */
-
- /* Start of temporary inline asm for vldn, vstn and friends. */
-@@ -13953,46 +11911,6 @@
- __STRUCTN (float, 64, 4)
- #undef __STRUCTN
-
--#define __LD2R_FUNC(rettype, structtype, ptrtype, \
-- regsuffix, funcsuffix, Q) \
-- __extension__ static __inline rettype \
-- __attribute__ ((__always_inline__)) \
-- vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
-- { \
-- rettype result; \
-- __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
-- "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
-- : "=Q"(result) \
-- : "Q"(*(const structtype *)ptr) \
-- : "memory", "v16", "v17"); \
-- return result; \
-- }
--
--__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
--__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
--__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
--__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
--__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
--__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
--__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
--__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
--__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
--__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
--__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
--__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
--__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
--__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
--__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
--__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
--__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
--__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
--__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
--__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
--__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
--__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
--__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
--__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
--
- #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- __extension__ static __inline rettype \
-@@ -14035,46 +11953,6 @@
- __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
- __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-
--#define __LD3R_FUNC(rettype, structtype, ptrtype, \
-- regsuffix, funcsuffix, Q) \
-- __extension__ static __inline rettype \
-- __attribute__ ((__always_inline__)) \
-- vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
-- { \
-- rettype result; \
-- __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
-- "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
-- : "=Q"(result) \
-- : "Q"(*(const structtype *)ptr) \
-- : "memory", "v16", "v17", "v18"); \
-- return result; \
-- }
--
--__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
--__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
--__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
--__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
--__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
--__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
--__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
--__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
--__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
--__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
--__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
--__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
--__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
--__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
--__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
--__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
--__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
--__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
--__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
--__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
--__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
--__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
--__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
--__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
--
- #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- __extension__ static __inline rettype \
-@@ -14117,46 +11995,6 @@
- __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
- __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-
--#define __LD4R_FUNC(rettype, structtype, ptrtype, \
-- regsuffix, funcsuffix, Q) \
-- __extension__ static __inline rettype \
-- __attribute__ ((__always_inline__)) \
-- vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
-- { \
-- rettype result; \
-- __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
-- "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
-- : "=Q"(result) \
-- : "Q"(*(const structtype *)ptr) \
-- : "memory", "v16", "v17", "v18", "v19"); \
-- return result; \
-- }
--
--__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
--__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
--__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
--__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
--__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
--__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
--__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
--__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
--__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
--__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
--__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
--__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
--__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
--__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
--__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
--__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
--__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
--__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
--__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
--__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
--__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
--__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
--__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
--__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
--
- #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
- lnsuffix, funcsuffix, Q) \
- __extension__ static __inline rettype \
-@@ -14199,132 +12037,225 @@
- __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
- __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
-
--#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
-- lnsuffix, funcsuffix, Q) \
-- typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \
-- __extension__ static __inline void \
-- __attribute__ ((__always_inline__)) \
-- vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
-- intype b, const int c) \
-- { \
-- __ST2_LANE_STRUCTURE_##intype *__p = \
-- (__ST2_LANE_STRUCTURE_##intype *)ptr; \
-- __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
-- "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
-- : "=Q"(*__p) \
-- : "Q"(b), "i"(c) \
-- : "v16", "v17"); \
-- }
-+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
-+ mode, ptr_mode, funcsuffix, signedtype) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ __builtin_aarch64_simd_oi __o; \
-+ largetype __temp; \
-+ __temp.val[0] \
-+ = vcombine_##funcsuffix (__b.val[0], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[1] \
-+ = vcombine_##funcsuffix (__b.val[1], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __o = __builtin_aarch64_set_qregoi##mode (__o, \
-+ (signedtype) __temp.val[0], 0); \
-+ __o = __builtin_aarch64_set_qregoi##mode (__o, \
-+ (signedtype) __temp.val[1], 1); \
-+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __o, __c); \
-+}
-
--__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
--__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
--__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
--__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
--__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
--__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
--__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
--__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
--__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
--__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
--__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
--__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
--__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
--__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
--__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
--__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
--__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
--__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
--__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
--__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
--__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
--__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
--__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
--__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
-+ float32x4_t)
-+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
-+ float64x2_t)
-+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
-+ int16x8_t)
-+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
-+ int16x8_t)
-+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
-+ int32x4_t)
-+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
-+ int64x2_t)
-
--#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
-- lnsuffix, funcsuffix, Q) \
-- typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \
-- __extension__ static __inline void \
-- __attribute__ ((__always_inline__)) \
-- vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
-- intype b, const int c) \
-- { \
-- __ST3_LANE_STRUCTURE_##intype *__p = \
-- (__ST3_LANE_STRUCTURE_##intype *)ptr; \
-- __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
-- "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
-- : "=Q"(*__p) \
-- : "Q"(b), "i"(c) \
-- : "v16", "v17", "v18"); \
-- }
-+#undef __ST2_LANE_FUNC
-+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ union { intype __i; \
-+ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
-+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __temp.__o, __c); \
-+}
-
--__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
--__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
--__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
--__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
--__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
--__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
--__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
--__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
--__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
--__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
--__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
--__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
--__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
--__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
--__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
--__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
--__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
--__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
--__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
--__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
--__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
--__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
--__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
--__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
-+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
-+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
-+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
-+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
-+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
-+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
-+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
-+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
-+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
-+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
-+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
-
--#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
-- lnsuffix, funcsuffix, Q) \
-- typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \
-- __extension__ static __inline void \
-- __attribute__ ((__always_inline__)) \
-- vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \
-- intype b, const int c) \
-- { \
-- __ST4_LANE_STRUCTURE_##intype *__p = \
-- (__ST4_LANE_STRUCTURE_##intype *)ptr; \
-- __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
-- "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
-- : "=Q"(*__p) \
-- : "Q"(b), "i"(c) \
-- : "v16", "v17", "v18", "v19"); \
-- }
-+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
-+ mode, ptr_mode, funcsuffix, signedtype) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ __builtin_aarch64_simd_ci __o; \
-+ largetype __temp; \
-+ __temp.val[0] \
-+ = vcombine_##funcsuffix (__b.val[0], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[1] \
-+ = vcombine_##funcsuffix (__b.val[1], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[2] \
-+ = vcombine_##funcsuffix (__b.val[2], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __o = __builtin_aarch64_set_qregci##mode (__o, \
-+ (signedtype) __temp.val[0], 0); \
-+ __o = __builtin_aarch64_set_qregci##mode (__o, \
-+ (signedtype) __temp.val[1], 1); \
-+ __o = __builtin_aarch64_set_qregci##mode (__o, \
-+ (signedtype) __temp.val[2], 2); \
-+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __o, __c); \
-+}
-
--__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
--__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
--__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
--__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
--__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
--__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
--__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
--__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
--__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
--__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
--__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
--__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
--__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
--__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
--__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
--__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
--__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
--__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
--__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
--__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
--__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
--__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
--__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
--__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
-+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
-+ float32x4_t)
-+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
-+ float64x2_t)
-+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
-+ int16x8_t)
-+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
-+ int16x8_t)
-+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
-+ int32x4_t)
-+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
-+ int64x2_t)
-
-+#undef __ST3_LANE_FUNC
-+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ union { intype __i; \
-+ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
-+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __temp.__o, __c); \
-+}
-+
-+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
-+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
-+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
-+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
-+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
-+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
-+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
-+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
-+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
-+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
-+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
-+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
-+
-+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
-+ mode, ptr_mode, funcsuffix, signedtype) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ __builtin_aarch64_simd_xi __o; \
-+ largetype __temp; \
-+ __temp.val[0] \
-+ = vcombine_##funcsuffix (__b.val[0], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[1] \
-+ = vcombine_##funcsuffix (__b.val[1], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[2] \
-+ = vcombine_##funcsuffix (__b.val[2], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __temp.val[3] \
-+ = vcombine_##funcsuffix (__b.val[3], \
-+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
-+ (signedtype) __temp.val[0], 0); \
-+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
-+ (signedtype) __temp.val[1], 1); \
-+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
-+ (signedtype) __temp.val[2], 2); \
-+ __o = __builtin_aarch64_set_qregxi##mode (__o, \
-+ (signedtype) __temp.val[3], 3); \
-+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __o, __c); \
-+}
-+
-+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
-+ float32x4_t)
-+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
-+ float64x2_t)
-+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
-+ int16x8_t)
-+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
-+ int16x8_t)
-+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
-+ int32x4_t)
-+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
-+ int64x2_t)
-+
-+#undef __ST4_LANE_FUNC
-+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
-+__extension__ static __inline void \
-+__attribute__ ((__always_inline__)) \
-+vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
-+ intype __b, const int __c) \
-+{ \
-+ union { intype __i; \
-+ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
-+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+ __ptr, __temp.__o, __c); \
-+}
-+
-+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
-+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
-+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
-+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
-+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
-+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
-+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
-+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
-+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
-+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
-+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
-+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
-+
- __extension__ static __inline int64_t __attribute__ ((__always_inline__))
- vaddlv_s32 (int32x2_t a)
- {
-@@ -14341,12 +12272,6 @@
- return result;
- }
-
--__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
--vpaddd_s64 (int64x2_t __a)
--{
-- return __builtin_aarch64_addpdi (__a);
--}
--
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
- {
-@@ -15706,7 +13631,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
-+ return (uint32x2_t) (__a == __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15718,26 +13643,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_p8 (poly8x8_t __a, poly8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (uint8x8_t) (__a == __b);
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
-+ return (uint8x8_t) (__a == __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceq_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
-+ return (uint16x4_t) (__a == __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
-+ return (uint32x2_t) (__a == __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15749,22 +13673,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceq_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15776,72 +13697,67 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
-+ return (uint32x4_t) (__a == __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_f64 (float64x2_t __a, float64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
-+ return (uint64x2_t) (__a == __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (uint8x16_t) (__a == __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
-+ return (uint8x16_t) (__a == __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
-+ return (uint16x8_t) (__a == __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
-+ return (uint32x4_t) (__a == __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
-+ return (uint64x2_t) (__a == __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return (__a == __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return (__a == __b);
- }
-
- /* vceq - scalar. */
-@@ -15875,8 +13791,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_f32 (float32x2_t __a)
- {
-- float32x2_t __b = {0.0f, 0.0f};
-- return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
-+ return (uint32x2_t) (__a == 0.0f);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15888,30 +13803,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_p8 (poly8x8_t __a)
- {
-- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (uint8x8_t) (__a == 0);
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_s8 (int8x8_t __a)
- {
-- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
-+ return (uint8x8_t) (__a == 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceqz_s16 (int16x4_t __a)
- {
-- int16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
-+ return (uint16x4_t) (__a == 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_s32 (int32x2_t __a)
- {
-- int32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
-+ return (uint32x2_t) (__a == 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15923,25 +13833,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_u8 (uint8x8_t __a)
- {
-- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceqz_u16 (uint16x4_t __a)
- {
-- uint16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_u32 (uint32x2_t __a)
- {
-- uint32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15953,86 +13857,67 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_f32 (float32x4_t __a)
- {
-- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-- return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
-+ return (uint32x4_t) (__a == 0.0f);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_f64 (float64x2_t __a)
- {
-- float64x2_t __b = {0.0, 0.0};
-- return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
-+ return (uint64x2_t) (__a == 0.0f);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_p8 (poly8x16_t __a)
- {
-- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (uint8x16_t) (__a == 0);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_s8 (int8x16_t __a)
- {
-- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
-+ return (uint8x16_t) (__a == 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqzq_s16 (int16x8_t __a)
- {
-- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
-+ return (uint16x8_t) (__a == 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_s32 (int32x4_t __a)
- {
-- int32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
-+ return (uint32x4_t) (__a == 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_s64 (int64x2_t __a)
- {
-- int64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
-+ return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_u8 (uint8x16_t __a)
- {
-- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqzq_u16 (uint16x8_t __a)
- {
-- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_u32 (uint32x4_t __a)
- {
-- uint32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return (__a == 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_u64 (uint64x2_t __a)
- {
-- uint64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return (__a == __AARCH64_UINT64_C (0));
- }
-
- /* vceqz - scalar. */
-@@ -16066,7 +13951,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
-+ return (uint32x2_t) (__a >= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16076,28 +13961,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcge_p8 (poly8x8_t __a, poly8x8_t __b)
--{
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcge_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
-+ return (uint8x8_t) (__a >= __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcge_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
-+ return (uint16x4_t) (__a >= __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
-+ return (uint32x2_t) (__a >= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16109,22 +13987,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcge_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcge_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16136,72 +14011,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
-+ return (uint32x4_t) (__a >= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_f64 (float64x2_t __a, float64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
-+ return (uint64x2_t) (__a >= __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgeq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
-+ return (uint8x16_t) (__a >= __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgeq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
-+ return (uint16x8_t) (__a >= __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
-+ return (uint32x4_t) (__a >= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
-+ return (uint64x2_t) (__a >= __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return (__a >= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return (__a >= __b);
- }
-
- /* vcge - scalar. */
-@@ -16235,8 +14099,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgez_f32 (float32x2_t __a)
- {
-- float32x2_t __b = {0.0f, 0.0f};
-- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
-+ return (uint32x2_t) (__a >= 0.0f);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16246,32 +14109,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgez_p8 (poly8x8_t __a)
--{
-- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgez_s8 (int8x8_t __a)
- {
-- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
-+ return (uint8x8_t) (__a >= 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgez_s16 (int16x4_t __a)
- {
-- int16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
-+ return (uint16x4_t) (__a >= 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgez_s32 (int32x2_t __a)
- {
-- int32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
-+ return (uint32x2_t) (__a >= 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16280,121 +14132,42 @@
- return __a >= 0ll ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgez_u8 (uint8x8_t __a)
--{
-- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vcgez_u16 (uint16x4_t __a)
--{
-- uint16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vcgez_u32 (uint32x2_t __a)
--{
-- uint32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgez_u64 (uint64x1_t __a)
--{
-- return __a >= 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgezq_f32 (float32x4_t __a)
- {
-- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
-+ return (uint32x4_t) (__a >= 0.0f);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgezq_f64 (float64x2_t __a)
- {
-- float64x2_t __b = {0.0, 0.0};
-- return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
-+ return (uint64x2_t) (__a >= 0.0);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgezq_p8 (poly8x16_t __a)
--{
-- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgezq_s8 (int8x16_t __a)
- {
-- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
-+ return (uint8x16_t) (__a >= 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgezq_s16 (int16x8_t __a)
- {
-- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
-+ return (uint16x8_t) (__a >= 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgezq_s32 (int32x4_t __a)
- {
-- int32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
-+ return (uint32x4_t) (__a >= 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgezq_s64 (int64x2_t __a)
- {
-- int64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
-+ return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
- }
-
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgezq_u8 (uint8x16_t __a)
--{
-- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vcgezq_u16 (uint16x8_t __a)
--{
-- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vcgezq_u32 (uint32x4_t __a)
--{
-- uint32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vcgezq_u64 (uint64x2_t __a)
--{
-- uint64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
--}
--
- /* vcgez - scalar. */
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-@@ -16409,12 +14182,6 @@
- return __a >= 0 ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgezd_u64 (int64x1_t __a)
--{
-- return __a >= 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcgezd_f64 (float64_t __a)
- {
-@@ -16426,7 +14193,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
-+ return (uint32x2_t) (__a > __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16436,28 +14203,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
--{
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgt_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
-+ return (uint8x8_t) (__a > __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgt_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
-+ return (uint16x4_t) (__a > __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
-+ return (uint32x2_t) (__a > __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16469,22 +14229,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16496,72 +14253,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
-+ return (uint32x4_t) (__a > __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_f64 (float64x2_t __a, float64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
-+ return (uint64x2_t) (__a > __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
-+ return (uint8x16_t) (__a > __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
-+ return (uint16x8_t) (__a > __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
-+ return (uint32x4_t) (__a > __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
-+ return (uint64x2_t) (__a > __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return (__a > __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return (__a > __b);
- }
-
- /* vcgt - scalar. */
-@@ -16595,8 +14341,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgtz_f32 (float32x2_t __a)
- {
-- float32x2_t __b = {0.0f, 0.0f};
-- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
-+ return (uint32x2_t) (__a > 0.0f);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16606,32 +14351,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgtz_p8 (poly8x8_t __a)
--{
-- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgtz_s8 (int8x8_t __a)
- {
-- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
-+ return (uint8x8_t) (__a > 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgtz_s16 (int16x4_t __a)
- {
-- int16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
-+ return (uint16x4_t) (__a > 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgtz_s32 (int32x2_t __a)
- {
-- int32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
-+ return (uint32x2_t) (__a > 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16640,121 +14374,42 @@
- return __a > 0ll ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgtz_u8 (uint8x8_t __a)
--{
-- uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vcgtz_u16 (uint16x4_t __a)
--{
-- uint16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vcgtz_u32 (uint32x2_t __a)
--{
-- uint32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgtz_u64 (uint64x1_t __a)
--{
-- return __a > 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtzq_f32 (float32x4_t __a)
- {
-- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
-+ return (uint32x4_t) (__a > 0.0f);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtzq_f64 (float64x2_t __a)
- {
-- float64x2_t __b = {0.0, 0.0};
-- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
-+ return (uint64x2_t) (__a > 0.0);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtzq_p8 (poly8x16_t __a)
--{
-- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtzq_s8 (int8x16_t __a)
- {
-- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
-+ return (uint8x16_t) (__a > 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtzq_s16 (int16x8_t __a)
- {
-- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
-+ return (uint16x8_t) (__a > 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtzq_s32 (int32x4_t __a)
- {
-- int32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
-+ return (uint32x4_t) (__a > 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtzq_s64 (int64x2_t __a)
- {
-- int64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
-+ return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
- }
-
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtzq_u8 (uint8x16_t __a)
--{
-- uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vcgtzq_u16 (uint16x8_t __a)
--{
-- uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vcgtzq_u32 (uint32x4_t __a)
--{
-- uint32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vcgtzq_u64 (uint64x2_t __a)
--{
-- uint64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
--}
--
- /* vcgtz - scalar. */
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-@@ -16769,12 +14424,6 @@
- return __a > 0 ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgtzd_u64 (int64x1_t __a)
--{
-- return __a > 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcgtzd_f64 (float64_t __a)
- {
-@@ -16786,7 +14435,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
-+ return (uint32x2_t) (__a <= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16796,28 +14445,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcle_p8 (poly8x8_t __a, poly8x8_t __b)
--{
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
-- (int8x8_t) __a);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcle_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
-+ return (uint8x8_t) (__a <= __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcle_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
-+ return (uint16x4_t) (__a <= __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
-+ return (uint32x2_t) (__a <= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16829,22 +14471,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcle_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
-- (int8x8_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcle_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
-- (int16x4_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
-- (int32x2_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16856,72 +14495,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
-+ return (uint32x4_t) (__a <= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_f64 (float64x2_t __a, float64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
-+ return (uint64x2_t) (__a <= __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
-- (int8x16_t) __a);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcleq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
-+ return (uint8x16_t) (__a <= __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcleq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
-+ return (uint16x8_t) (__a <= __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
-+ return (uint32x4_t) (__a <= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
-+ return (uint64x2_t) (__a <= __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
-- (int8x16_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
-- (int16x8_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
-- (int32x4_t) __a);
-+ return (__a <= __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
-- (int64x2_t) __a);
-+ return (__a <= __b);
- }
-
- /* vcle - scalar. */
-@@ -16955,8 +14583,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclez_f32 (float32x2_t __a)
- {
-- float32x2_t __b = {0.0f, 0.0f};
-- return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
-+ return (uint32x2_t) (__a <= 0.0f);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16966,32 +14593,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vclez_p8 (poly8x8_t __a)
--{
-- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclez_s8 (int8x8_t __a)
- {
-- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
-+ return (uint8x8_t) (__a <= 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclez_s16 (int16x4_t __a)
- {
-- int16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
-+ return (uint16x4_t) (__a <= 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclez_s32 (int32x2_t __a)
- {
-- int32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
-+ return (uint32x2_t) (__a <= 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17000,62 +14616,40 @@
- return __a <= 0ll ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vclez_u64 (uint64x1_t __a)
--{
-- return __a <= 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vclezq_f32 (float32x4_t __a)
- {
-- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-- return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
-+ return (uint32x4_t) (__a <= 0.0f);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vclezq_f64 (float64x2_t __a)
- {
-- float64x2_t __b = {0.0, 0.0};
-- return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
-+ return (uint64x2_t) (__a <= 0.0);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vclezq_p8 (poly8x16_t __a)
--{
-- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vclezq_s8 (int8x16_t __a)
- {
-- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
-+ return (uint8x16_t) (__a <= 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vclezq_s16 (int16x8_t __a)
- {
-- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
-+ return (uint16x8_t) (__a <= 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vclezq_s32 (int32x4_t __a)
- {
-- int32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
-+ return (uint32x4_t) (__a <= 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vclezq_s64 (int64x2_t __a)
- {
-- int64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
-+ return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
- }
-
- /* vclez - scalar. */
-@@ -17072,12 +14666,6 @@
- return __a <= 0 ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vclezd_u64 (int64x1_t __a)
--{
-- return __a <= 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vclezd_f64 (float64_t __a)
- {
-@@ -17089,7 +14677,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
-+ return (uint32x2_t) (__a < __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17099,28 +14687,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vclt_p8 (poly8x8_t __a, poly8x8_t __b)
--{
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
-- (int8x8_t) __a);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclt_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
-+ return (uint8x8_t) (__a < __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclt_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
-+ return (uint16x4_t) (__a < __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
-+ return (uint32x2_t) (__a < __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17132,22 +14713,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclt_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
-- (int8x8_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclt_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
-- (int16x4_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
-- (int32x2_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17159,72 +14737,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
-+ return (uint32x4_t) (__a < __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_f64 (float64x2_t __a, float64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
-+ return (uint64x2_t) (__a < __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
-- (int8x16_t) __a);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
-+ return (uint8x16_t) (__a < __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
-+ return (uint16x8_t) (__a < __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
-+ return (uint32x4_t) (__a < __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
-+ return (uint64x2_t) (__a < __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
-- (int8x16_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
-- (int16x8_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
-- (int32x4_t) __a);
-+ return (__a < __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
-- (int64x2_t) __a);
-+ return (__a < __b);
- }
-
- /* vclt - scalar. */
-@@ -17258,8 +14825,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcltz_f32 (float32x2_t __a)
- {
-- float32x2_t __b = {0.0f, 0.0f};
-- return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
-+ return (uint32x2_t) (__a < 0.0f);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17269,32 +14835,21 @@
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcltz_p8 (poly8x8_t __a)
--{
-- poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcltz_s8 (int8x8_t __a)
- {
-- int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
-+ return (uint8x8_t) (__a < 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcltz_s16 (int16x4_t __a)
- {
-- int16x4_t __b = {0, 0, 0, 0};
-- return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
-+ return (uint16x4_t) (__a < 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcltz_s32 (int32x2_t __a)
- {
-- int32x2_t __b = {0, 0};
-- return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
-+ return (uint32x2_t) (__a < 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17306,53 +14861,37 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltzq_f32 (float32x4_t __a)
- {
-- float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
-- return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
-+ return (uint32x4_t) (__a < 0.0f);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltzq_f64 (float64x2_t __a)
- {
-- float64x2_t __b = {0.0, 0.0};
-- return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
-+ return (uint64x2_t) (__a < 0.0);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcltzq_p8 (poly8x16_t __a)
--{
-- poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltzq_s8 (int8x16_t __a)
- {
-- int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
-- 0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
-+ return (uint8x16_t) (__a < 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltzq_s16 (int16x8_t __a)
- {
-- int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
-- return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
-+ return (uint16x8_t) (__a < 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltzq_s32 (int32x4_t __a)
- {
-- int32x4_t __b = {0, 0, 0, 0};
-- return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
-+ return (uint32x4_t) (__a < 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltzq_s64 (int64x2_t __a)
- {
-- int64x2_t __b = {0, 0};
-- return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
-+ return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
- }
-
- /* vcltz - scalar. */
-@@ -17369,12 +14908,6 @@
- return __a < 0 ? -1ll : 0ll;
- }
-
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcltzd_u64 (int64x1_t __a)
--{
-- return __a < 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcltzd_f64 (float64_t __a)
- {
-@@ -18483,6 +16016,292 @@
- return __aarch64_vgetq_lane_u64 (__a, __b);
- }
-
-+/* vext */
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
-+{
-+ /* The only possible index to the assembler instruction returns element 0. */
-+ __builtin_aarch64_im_lane_boundsi (__c, 1);
-+ return __a;
-+}
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
-+{
-+ /* The only possible index to the assembler instruction returns element 0. */
-+ __builtin_aarch64_im_lane_boundsi (__c, 1);
-+ return __a;
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
-+{
-+ /* The only possible index to the assembler instruction returns element 0. */
-+ __builtin_aarch64_im_lane_boundsi (__c, 1);
-+ return __a;
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x16_t)
-+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint16x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x16_t)
-+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint16x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint8x16_t)
-+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint16x8_t)
-+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a,
-+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
-+{
-+ __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
- /* vfma_lane */
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -19712,6 +17531,872 @@
- return ret;
- }
-
-+/* vldn_dup */
-+
-+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s8 (const int8_t * __a)
-+{
-+ int8x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s16 (const int16_t * __a)
-+{
-+ int16x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s32 (const int32_t * __a)
-+{
-+ int32x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_f32 (const float32_t * __a)
-+{
-+ float32x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_f64 (const float64_t * __a)
-+{
-+ float64x1x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s64 (const int64_t * __a)
-+{
-+ int64x1x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x1x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s8 (const int8_t * __a)
-+{
-+ int8x16x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x16x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s16 (const int16_t * __a)
-+{
-+ int16x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s32 (const int32_t * __a)
-+{
-+ int32x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s64 (const int64_t * __a)
-+{
-+ int64x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x16x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x8x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_f32 (const float32_t * __a)
-+{
-+ float32x4x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_f64 (const float64_t * __a)
-+{
-+ float64x2x2_t ret;
-+ __builtin_aarch64_simd_oi __o;
-+ __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s64 (const int64_t * __a)
-+{
-+ int64x1x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x1x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_f64 (const float64_t * __a)
-+{
-+ float64x1x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-+ return ret;
-+}
-+
-+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s8 (const int8_t * __a)
-+{
-+ int8x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s16 (const int16_t * __a)
-+{
-+ int16x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s32 (const int32_t * __a)
-+{
-+ int32x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_f32 (const float32_t * __a)
-+{
-+ float32x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s8 (const int8_t * __a)
-+{
-+ int8x16x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x16x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s16 (const int16_t * __a)
-+{
-+ int16x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s32 (const int32_t * __a)
-+{
-+ int32x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s64 (const int64_t * __a)
-+{
-+ int64x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x16x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x8x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_f32 (const float32_t * __a)
-+{
-+ float32x4x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_f64 (const float64_t * __a)
-+{
-+ float64x2x3_t ret;
-+ __builtin_aarch64_simd_ci __o;
-+ __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s64 (const int64_t * __a)
-+{
-+ int64x1x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-+ ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x1x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-+ ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_f64 (const float64_t * __a)
-+{
-+ float64x1x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-+ ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-+ return ret;
-+}
-+
-+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s8 (const int8_t * __a)
-+{
-+ int8x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+ ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+ ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s16 (const int16_t * __a)
-+{
-+ int16x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+ ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+ ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s32 (const int32_t * __a)
-+{
-+ int32x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-+ ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+ ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+ ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-+ ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_f32 (const float32_t * __a)
-+{
-+ float32x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-+ ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s8 (const int8_t * __a)
-+{
-+ int8x16x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+ ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_p8 (const poly8_t * __a)
-+{
-+ poly8x16x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+ ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s16 (const int16_t * __a)
-+{
-+ int16x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+ ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_p16 (const poly16_t * __a)
-+{
-+ poly16x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+ ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s32 (const int32_t * __a)
-+{
-+ int32x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-+ ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s64 (const int64_t * __a)
-+{
-+ int64x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-+ ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u8 (const uint8_t * __a)
-+{
-+ uint8x16x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+ ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u16 (const uint16_t * __a)
-+{
-+ uint16x8x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+ ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u32 (const uint32_t * __a)
-+{
-+ uint32x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-+ ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u64 (const uint64_t * __a)
-+{
-+ uint64x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-+ ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_f32 (const float32_t * __a)
-+{
-+ float32x4x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-+ ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-+ return ret;
-+}
-+
-+__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_f64 (const float64_t * __a)
-+{
-+ float64x2x4_t ret;
-+ __builtin_aarch64_simd_xi __o;
-+ __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
-+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-+ ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-+ return ret;
-+}
-+
- /* vmax */
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -20911,6 +19596,65 @@
- return -__a;
- }
-
-+/* vpadd */
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vpadd_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+ return __builtin_aarch64_addpv8qi (__a, __b);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vpadd_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+ return __builtin_aarch64_addpv4hi (__a, __b);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vpadd_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+ return __builtin_aarch64_addpv2si (__a, __b);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+ return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-+ (int8x8_t) __b);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+ return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-+ (int16x4_t) __b);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+ return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-+ (int32x2_t) __b);
-+}
-+
-+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-+vpaddd_f64 (float64x2_t __a)
-+{
-+ return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
-+}
-+
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vpaddd_s64 (int64x2_t __a)
-+{
-+ return __builtin_aarch64_addpdi (__a);
-+}
-+
-+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
-+vpaddd_u64 (uint64x2_t __a)
-+{
-+ return __builtin_aarch64_addpdi ((int64x2_t) __a);
-+}
-+
- /* vqabs */
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-@@ -20937,6 +19681,12 @@
- return (int32_t) __builtin_aarch64_sqabssi (__a);
- }
-
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vqabsd_s64 (int64_t __a)
-+{
-+ return __builtin_aarch64_sqabsdi (__a);
-+}
-+
- /* vqadd */
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -20966,25 +19716,26 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqaddb_u8 (uint8_t __a, uint8_t __b)
- {
-- return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
-+ return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqaddh_u16 (uint16_t __a, uint16_t __b)
- {
-- return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
-+ return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqadds_u32 (uint32_t __a, uint32_t __b)
- {
-- return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
-+ return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
-+ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-+ (uint64_t) __b);
- }
-
- /* vqdmlal */
-@@ -21549,6 +20300,12 @@
- return (int32_t) __builtin_aarch64_sqnegsi (__a);
- }
-
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vqnegd_s64 (int64_t __a)
-+{
-+ return __builtin_aarch64_sqnegdi (__a);
-+}
-+
- /* vqrdmulh */
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-@@ -21628,25 +20385,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_uqrshldi_uus ( __a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -21676,25 +20433,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21724,25 +20481,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqrshlb_u8 (uint8_t __a, uint8_t __b)
- {
-- return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
-+ return __builtin_aarch64_uqrshlqi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqrshlh_u16 (uint16_t __a, uint16_t __b)
- {
-- return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
-+ return __builtin_aarch64_uqrshlhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqrshls_u32 (uint32_t __a, uint32_t __b)
- {
-- return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
-+ return __builtin_aarch64_uqrshlsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
-+ return __builtin_aarch64_uqrshldi_uus (__a, __b);
- }
-
- /* vqrshrn */
-@@ -21768,19 +20525,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqrshrn_n_u16 (uint16x8_t __a, const int __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqrshrn_n_u32 (uint32x4_t __a, const int __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqrshrn_n_u64 (uint64x2_t __a, const int __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21804,19 +20561,19 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqrshrnh_n_u16 (uint16_t __a, const int __b)
- {
-- return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
-+ return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqrshrns_n_u32 (uint32_t __a, const int __b)
- {
-- return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
-+ return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
-+ return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
- }
-
- /* vqrshrun */
-@@ -21886,25 +20643,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_uqshldi_uus ( __a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -21934,25 +20691,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21982,25 +20739,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshlb_u8 (uint8_t __a, uint8_t __b)
- {
-- return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
-+ return __builtin_aarch64_uqshlqi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshlh_u16 (uint16_t __a, uint16_t __b)
- {
-- return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
-+ return __builtin_aarch64_uqshlhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshls_u32 (uint32_t __a, uint32_t __b)
- {
-- return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
-+ return __builtin_aarch64_uqshlsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
-+ return __builtin_aarch64_uqshldi_uus (__a, __b);
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -22030,25 +20787,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshl_n_u8 (uint8x8_t __a, const int __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshl_n_u16 (uint16x4_t __a, const int __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshl_n_u32 (uint32x2_t __a, const int __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshl_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22078,25 +20835,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshlq_n_u8 (uint8x16_t __a, const int __b)
- {
-- return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshlq_n_u16 (uint16x8_t __a, const int __b)
- {
-- return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshlq_n_u32 (uint32x4_t __a, const int __b)
- {
-- return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshlq_n_u64 (uint64x2_t __a, const int __b)
- {
-- return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -22126,25 +20883,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshlb_n_u8 (uint8_t __a, const int __b)
- {
-- return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
-+ return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshlh_n_u16 (uint16_t __a, const int __b)
- {
-- return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
-+ return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshls_n_u32 (uint32_t __a, const int __b)
- {
-- return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
-+ return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshld_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
-+ return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
- }
-
- /* vqshlu */
-@@ -22152,73 +20909,73 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshlu_n_s8 (int8x8_t __a, const int __b)
- {
-- return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshlu_n_s16 (int16x4_t __a, const int __b)
- {
-- return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshlu_n_s32 (int32x2_t __a, const int __b)
- {
-- return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshlu_n_s64 (int64x1_t __a, const int __b)
- {
-- return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
-+ return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshluq_n_s8 (int8x16_t __a, const int __b)
- {
-- return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshluq_n_s16 (int16x8_t __a, const int __b)
- {
-- return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshluq_n_s32 (int32x4_t __a, const int __b)
- {
-- return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshluq_n_s64 (int64x2_t __a, const int __b)
- {
-- return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
-+ return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
- vqshlub_n_s8 (int8_t __a, const int __b)
- {
-- return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
-+ return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
- }
-
- __extension__ static __inline int16_t __attribute__ ((__always_inline__))
- vqshluh_n_s16 (int16_t __a, const int __b)
- {
-- return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
-+ return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
- }
-
- __extension__ static __inline int32_t __attribute__ ((__always_inline__))
- vqshlus_n_s32 (int32_t __a, const int __b)
- {
-- return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
-+ return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vqshlud_n_s64 (int64x1_t __a, const int __b)
- {
-- return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
-+ return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
- }
-
- /* vqshrn */
-@@ -22244,19 +21001,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshrn_n_u16 (uint16x8_t __a, const int __b)
- {
-- return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshrn_n_u32 (uint32x4_t __a, const int __b)
- {
-- return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshrn_n_u64 (uint64x2_t __a, const int __b)
- {
-- return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -22280,19 +21037,19 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshrnh_n_u16 (uint16_t __a, const int __b)
- {
-- return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
-+ return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshrns_n_u32 (uint32_t __a, const int __b)
- {
-- return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
-+ return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshrnd_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
-+ return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
- }
-
- /* vqshrun */
-@@ -22362,27 +21119,66 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqsubb_u8 (uint8_t __a, uint8_t __b)
- {
-- return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
-+ return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqsubh_u16 (uint16_t __a, uint16_t __b)
- {
-- return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
-+ return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqsubs_u32 (uint32_t __a, uint32_t __b)
- {
-- return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
-+ return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
-+ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-+ (uint64_t) __b);
- }
-
-+/* vrbit */
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrbit_p8 (poly8x8_t __a)
-+{
-+ return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrbit_s8 (int8x8_t __a)
-+{
-+ return __builtin_aarch64_rbitv8qi (__a);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrbit_u8 (uint8x8_t __a)
-+{
-+ return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrbitq_p8 (poly8x16_t __a)
-+{
-+ return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrbitq_s8 (int8x16_t __a)
-+{
-+ return __builtin_aarch64_rbitv16qi (__a);
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrbitq_u8 (uint8x16_t __a)
-+{
-+ return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
-+}
-+
- /* vrecpe */
-
- __extension__ static __inline float32_t __attribute__ ((__always_inline__))
-@@ -22461,6 +21257,234 @@
- return __builtin_aarch64_frecpxdf (__a);
- }
-
-+
-+/* vrev */
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev16_p8 (poly8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev16_s8 (int8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev16_u8 (uint8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev16q_p8 (poly8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev16q_s8 (int8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev16q_u8 (uint8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev32_p8 (poly8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vrev32_p16 (poly16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev32_s8 (int8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vrev32_s16 (int16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev32_u8 (uint8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vrev32_u16 (uint16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev32q_p8 (poly8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vrev32q_p16 (poly16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev32q_s8 (int8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vrev32q_s16 (int16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev32q_u8 (uint8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vrev32q_u16 (uint16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vrev64_f32 (float32x2_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev64_p8 (poly8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vrev64_p16 (poly16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev64_s8 (int8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vrev64_s16 (int16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vrev64_s32 (int32x2_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev64_u8 (uint8x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vrev64_u16 (uint16x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vrev64_u32 (uint32x2_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vrev64q_f32 (float32x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev64q_p8 (poly8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vrev64q_p16 (poly16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev64q_s8 (int8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vrev64q_s16 (int16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vrev64q_s32 (int32x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev64q_u8 (uint8x16_t a)
-+{
-+ return __builtin_shuffle (a,
-+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vrev64q_u16 (uint16x8_t a)
-+{
-+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vrev64q_u32 (uint32x4_t a)
-+{
-+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
- /* vrnd */
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -22469,6 +21493,12 @@
- return __builtin_aarch64_btruncv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrnd_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndq_f32 (float32x4_t __a)
- {
-@@ -22489,6 +21519,12 @@
- return __builtin_aarch64_roundv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrnda_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndaq_f32 (float32x4_t __a)
- {
-@@ -22509,6 +21545,12 @@
- return __builtin_aarch64_nearbyintv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndi_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndiq_f32 (float32x4_t __a)
- {
-@@ -22529,6 +21571,12 @@
- return __builtin_aarch64_floorv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndm_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndmq_f32 (float32x4_t __a)
- {
-@@ -22548,6 +21596,13 @@
- {
- return __builtin_aarch64_frintnv2sf (__a);
- }
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndn_f64 (float64x1_t __a)
-+{
-+ return __builtin_aarch64_frintndf (__a);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndnq_f32 (float32x4_t __a)
- {
-@@ -22568,6 +21623,12 @@
- return __builtin_aarch64_ceilv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndp_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndpq_f32 (float32x4_t __a)
- {
-@@ -22588,6 +21649,12 @@
- return __builtin_aarch64_rintv2sf (__a);
- }
-
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndx_f64 (float64x1_t __a)
-+{
-+ return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndxq_f32 (float32x4_t __a)
- {
-@@ -22629,25 +21696,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_urshlv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_urshlv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_urshlv2si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_urshldi_uus (__a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22677,25 +21744,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_urshlv16qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_urshlv8hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_urshlv4si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_urshlv2di_uus (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22707,7 +21774,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
-+ return __builtin_aarch64_urshldi_uus (__a, __b);
- }
-
- /* vrshr */
-@@ -22739,25 +21806,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrshr_n_u8 (uint8x8_t __a, const int __b)
- {
-- return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrshr_n_u16 (uint16x4_t __a, const int __b)
- {
-- return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrshr_n_u32 (uint32x2_t __a, const int __b)
- {
-- return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshr_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_urshr_ndi_uus (__a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22787,25 +21854,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrshrq_n_u8 (uint8x16_t __a, const int __b)
- {
-- return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrshrq_n_u16 (uint16x8_t __a, const int __b)
- {
-- return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrshrq_n_u32 (uint32x4_t __a, const int __b)
- {
-- return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrshrq_n_u64 (uint64x2_t __a, const int __b)
- {
-- return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22817,7 +21884,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshrd_n_u64 (uint64x1_t __a, const int __b)
- {
-- return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
-+ return __builtin_aarch64_urshr_ndi_uus (__a, __b);
- }
-
- /* vrsra */
-@@ -22849,29 +21916,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
-- return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
-- (int8x8_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
-- return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
-- (int16x4_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
-- return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
-- (int32x2_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
-- (int64x1_t) __b, __c);
-+ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22901,29 +21964,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
-- return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
-- (int8x16_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
-- return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
-- (int16x8_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
-- return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
-- (int32x4_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
-- return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
-- (int64x2_t) __b, __c);
-+ return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22935,7 +21994,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
-+ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
- }
-
- #ifdef __ARM_FEATURE_CRYPTO
-@@ -23128,109 +22187,109 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vshl_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
-+ return __builtin_aarch64_sshlv8qi (__a, __b);
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vshl_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
-+ return __builtin_aarch64_sshlv4hi (__a, __b);
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vshl_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
-+ return __builtin_aarch64_sshlv2si (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vshl_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
-+ return __builtin_aarch64_sshldi (__a, __b);
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_ushlv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_ushlv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_ushlv2si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_ushldi_uus (__a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vshlq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
-+ return __builtin_aarch64_sshlv16qi (__a, __b);
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vshlq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
-+ return __builtin_aarch64_sshlv8hi (__a, __b);
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vshlq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
-+ return __builtin_aarch64_sshlv4si (__a, __b);
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vshlq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
-+ return __builtin_aarch64_sshlv2di (__a, __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
-+ return __builtin_aarch64_ushlv16qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
-+ return __builtin_aarch64_ushlv8hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
-+ return __builtin_aarch64_ushlv4si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
-+ return __builtin_aarch64_ushlv2di_uus (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vshld_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
-+ return __builtin_aarch64_sshldi (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
-+ return __builtin_aarch64_ushldi_uus (__a, __b);
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -23290,19 +22349,19 @@
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vshll_n_u8 (uint8x8_t __a, const int __b)
- {
-- return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
-+ return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vshll_n_u16 (uint16x4_t __a, const int __b)
- {
-- return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
-+ return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vshll_n_u32 (uint32x2_t __a, const int __b)
- {
-- return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
-+ return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
- }
-
- /* vshr */
-@@ -23444,29 +22503,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
-- return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
-- (int8x8_t) __b, __c);
-+ return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
-- return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
-- (int16x4_t) __b, __c);
-+ return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
-- return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
-- (int32x2_t) __b, __c);
-+ return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
-- (int64x1_t) __b, __c);
-+ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23496,29 +22551,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
-- return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
-- (int8x16_t) __b, __c);
-+ return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
-- return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
-- (int16x8_t) __b, __c);
-+ return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
-- return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
-- (int32x4_t) __b, __c);
-+ return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
-- return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
-- (int64x2_t) __b, __c);
-+ return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23530,7 +22581,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
-+ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
- }
-
- /* vsqadd */
-@@ -23538,80 +22589,73 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return __builtin_aarch64_usqaddv2si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_usqadddi_uus (__a, __b);
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return __builtin_aarch64_usqaddv4si_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return __builtin_aarch64_usqaddv2di_uus (__a, __b);
- }
-
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vsqaddb_u8 (uint8_t __a, int8_t __b)
- {
-- return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
-+ return __builtin_aarch64_usqaddqi_uus (__a, __b);
- }
-
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vsqaddh_u16 (uint16_t __a, int16_t __b)
- {
-- return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
-+ return __builtin_aarch64_usqaddhi_uus (__a, __b);
- }
-
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vsqadds_u32 (uint32_t __a, int32_t __b)
- {
-- return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
-+ return __builtin_aarch64_usqaddsi_uus (__a, __b);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
- {
-- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
-+ return __builtin_aarch64_usqadddi_uus (__a, __b);
- }
-
- /* vsqrt */
-@@ -23662,29 +22706,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
-- return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
-- (int8x8_t) __b, __c);
-+ return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
-- return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
-- (int16x4_t) __b, __c);
-+ return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
-- return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
-- (int32x2_t) __b, __c);
-+ return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
-- (int64x1_t) __b, __c);
-+ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23714,29 +22754,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
-- return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
-- (int8x16_t) __b, __c);
-+ return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
-- return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
-- (int16x8_t) __b, __c);
-+ return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
-- return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
-- (int32x4_t) __b, __c);
-+ return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
-- return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
-- (int64x2_t) __b, __c);
-+ return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23748,7 +22784,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
-+ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
- }
-
- /* vsri */
-@@ -23780,29 +22816,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
-- return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
-- (int8x8_t) __b, __c);
-+ return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
-- return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
-- (int16x4_t) __b, __c);
-+ return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
-- return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
-- (int32x2_t) __b, __c);
-+ return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
-- (int64x1_t) __b, __c);
-+ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23832,29 +22864,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
-- return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
-- (int8x16_t) __b, __c);
-+ return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
-- return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
-- (int16x8_t) __b, __c);
-+ return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
-- return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
-- (int32x4_t) __b, __c);
-+ return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
-- return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
-- (int64x2_t) __b, __c);
-+ return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23866,7 +22894,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
-- return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
-+ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
- }
-
- /* vst1 */
-@@ -24970,6 +23998,438 @@
-
- /* vtrn */
-
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vtrn1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vtrn1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vtrn1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vtrn1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vtrn2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vtrn2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vtrn2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vtrn2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
- vtrn_f32 (float32x2_t a, float32x2_t b)
- {
-@@ -25083,19 +24543,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
-+ return (uint8x8_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vtst_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
-+ return (uint16x4_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vtst_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
-+ return (uint32x2_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25107,22 +24567,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
-- (int8x8_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vtst_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
-- (int16x4_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vtst_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
-- (int32x2_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25134,53 +24591,49 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vtstq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
-+ return (uint8x16_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vtstq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
-+ return (uint16x8_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vtstq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
-+ return (uint32x4_t) ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vtstq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
-+ return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
-- (int8x16_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
-- (int16x8_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
-- (int32x4_t) __b);
-+ return ((__a & __b) != 0);
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
-- (int64x2_t) __b);
-+ return ((__a & __b) != __AARCH64_UINT64_C (0));
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25200,73 +24653,73 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
- {
-- return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
-+ return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
- {
-- return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
-+ return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
- {
-- return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
-+ return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
- {
-- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
-+ return __builtin_aarch64_suqadddi_ssu (__a, __b);
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
- {
-- return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
-+ return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
- {
-- return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
-+ return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
- {
-- return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
-+ return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
- {
-- return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
-+ return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
- }
-
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
- vuqaddb_s8 (int8_t __a, uint8_t __b)
- {
-- return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
-+ return __builtin_aarch64_suqaddqi_ssu (__a, __b);
- }
-
- __extension__ static __inline int16_t __attribute__ ((__always_inline__))
- vuqaddh_s16 (int16_t __a, uint16_t __b)
- {
-- return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
-+ return __builtin_aarch64_suqaddhi_ssu (__a, __b);
- }
-
- __extension__ static __inline int32_t __attribute__ ((__always_inline__))
- vuqadds_s32 (int32_t __a, uint32_t __b)
- {
-- return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
-+ return __builtin_aarch64_suqaddsi_ssu (__a, __b);
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
- {
-- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
-+ return __builtin_aarch64_suqadddi_ssu (__a, __b);
- }
-
- #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
-@@ -25300,10 +24753,880 @@
-
- /* vuzp */
-
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vuzp1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vuzp1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vuzp1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vuzp1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vuzp2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vuzp2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vuzp2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vuzp2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+ return __builtin_shuffle (__a, __b,
-+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __INTERLEAVE_LIST (uzp)
-
- /* vzip */
-
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vzip1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vzip1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vzip1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vzip1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vzip1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vzip1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vzip1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vzip1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vzip1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vzip1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vzip2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vzip2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vzip2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vzip2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vzip2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vzip2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vzip2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vzip2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vzip2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vzip2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint8x16_t)
-+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint16x8_t)
-+ {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __INTERLEAVE_LIST (zip)
-
- #undef __INTERLEAVE_LIST
---- a/src/gcc/config/aarch64/t-aarch64-linux
-+++ b/src/gcc/config/aarch64/t-aarch64-linux
-@@ -22,10 +22,7 @@
- LIB1ASMFUNCS = _aarch64_sync_cache_range
-
- AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be)
--MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
-+MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
- MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu)
-
--# Disable the multilib for linux-gnu targets for the time being; focus
--# on the baremetal targets.
--MULTILIB_OPTIONS =
--MULTILIB_DIRNAMES =
-+MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32
---- a/src/gcc/config/aarch64/aarch64.md
-+++ b/src/gcc/config/aarch64/aarch64.md
-@@ -67,7 +67,14 @@
-
- (define_c_enum "unspec" [
- UNSPEC_CASESI
-- UNSPEC_CLS
-+ UNSPEC_CRC32B
-+ UNSPEC_CRC32CB
-+ UNSPEC_CRC32CH
-+ UNSPEC_CRC32CW
-+ UNSPEC_CRC32CX
-+ UNSPEC_CRC32H
-+ UNSPEC_CRC32W
-+ UNSPEC_CRC32X
- UNSPEC_FRECPE
- UNSPEC_FRECPS
- UNSPEC_FRECPX
-@@ -83,8 +90,11 @@
- UNSPEC_GOTTINYPIC
- UNSPEC_LD1
- UNSPEC_LD2
-+ UNSPEC_LD2_DUP
- UNSPEC_LD3
-+ UNSPEC_LD3_DUP
- UNSPEC_LD4
-+ UNSPEC_LD4_DUP
- UNSPEC_MB
- UNSPEC_NOP
- UNSPEC_PRLG_STK
-@@ -98,15 +108,24 @@
- UNSPEC_ST2
- UNSPEC_ST3
- UNSPEC_ST4
-+ UNSPEC_ST2_LANE
-+ UNSPEC_ST3_LANE
-+ UNSPEC_ST4_LANE
- UNSPEC_TLS
- UNSPEC_TLSDESC
- UNSPEC_USHL_2S
- UNSPEC_USHR64
- UNSPEC_VSTRUCTDUMMY
-+ UNSPEC_SP_SET
-+ UNSPEC_SP_TEST
- ])
-
- (define_c_enum "unspecv" [
- UNSPECV_EH_RETURN ; Represent EH_RETURN
-+ UNSPECV_GET_FPCR ; Represent fetch of FPCR content.
-+ UNSPECV_SET_FPCR ; Represent assign of FPCR content.
-+ UNSPECV_GET_FPSR ; Represent fetch of FPSR content.
-+ UNSPECV_SET_FPSR ; Represent assign of FPSR content.
- ]
- )
-
-@@ -159,7 +178,7 @@
-
- (define_attr "generic_sched" "yes,no"
- (const (if_then_else
-- (eq_attr "tune" "cortexa53,cortexa15")
-+ (eq_attr "tune" "cortexa53,cortexa15,thunderx")
- (const_string "no")
- (const_string "yes"))))
-
-@@ -166,6 +185,7 @@
- ;; Scheduling
- (include "../arm/cortex-a53.md")
- (include "../arm/cortex-a15.md")
-+(include "thunderx.md")
-
- ;; -------------------------------------------------------------------
- ;; Jumps and other miscellaneous insns
-@@ -514,6 +534,10 @@
- (use (match_operand 2 "" ""))])]
- ""
- {
-+ if (!REG_P (XEXP (operands[0], 0))
-+ && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
-+ XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
-+
- if (operands[2] == NULL_RTX)
- operands[2] = const0_rtx;
- }
-@@ -527,6 +551,10 @@
- (use (match_operand 3 "" ""))])]
- ""
- {
-+ if (!REG_P (XEXP (operands[1], 0))
-+ && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF))
-+ XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
-+
- if (operands[3] == NULL_RTX)
- operands[3] = const0_rtx;
- }
-@@ -533,25 +561,29 @@
- )
-
- (define_insn "*sibcall_insn"
-- [(call (mem:DI (match_operand:DI 0 "" "X"))
-+ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
- (match_operand 1 "" ""))
- (return)
- (use (match_operand 2 "" ""))]
-- "GET_CODE (operands[0]) == SYMBOL_REF"
-- "b\\t%a0"
-- [(set_attr "type" "branch")]
--
-+ "SIBLING_CALL_P (insn)"
-+ "@
-+ br\\t%0
-+ b\\t%a0"
-+ [(set_attr "type" "branch, branch")]
- )
-
- (define_insn "*sibcall_value_insn"
- [(set (match_operand 0 "" "")
-- (call (mem:DI (match_operand 1 "" "X"))
-+ (call (mem:DI
-+ (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
- (match_operand 2 "" "")))
- (return)
- (use (match_operand 3 "" ""))]
-- "GET_CODE (operands[1]) == SYMBOL_REF"
-- "b\\t%a1"
-- [(set_attr "type" "branch")]
-+ "SIBLING_CALL_P (insn)"
-+ "@
-+ br\\t%1
-+ b\\t%a1"
-+ [(set_attr "type" "branch, branch")]
- )
-
- ;; Call subroutine returning any type.
-@@ -641,17 +673,20 @@
- if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
- operands[1] = force_reg (<MODE>mode, operands[1]);
-
-- if (CONSTANT_P (operands[1]))
-- {
-- aarch64_expand_mov_immediate (operands[0], operands[1]);
-- DONE;
-- }
-+ /* FIXME: RR we still need to fix up what we are doing with
-+ symbol_refs and other types of constants. */
-+ if (CONSTANT_P (operands[1])
-+ && !CONST_INT_P (operands[1]))
-+ {
-+ aarch64_expand_mov_immediate (operands[0], operands[1]);
-+ DONE;
-+ }
- "
- )
-
--(define_insn "*movsi_aarch64"
-- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w")
-- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
-+(define_insn_and_split "*movsi_aarch64"
-+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w")
-+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
- "(register_operand (operands[0], SImode)
- || aarch64_reg_or_zero (operands[1], SImode))"
- "@
-@@ -659,6 +694,7 @@
- mov\\t%w0, %w1
- mov\\t%w0, %w1
- mov\\t%w0, %1
-+ #
- ldr\\t%w0, %1
- ldr\\t%s0, %1
- str\\t%w1, %0
-@@ -668,14 +704,20 @@
- fmov\\t%s0, %w1
- fmov\\t%w0, %s1
- fmov\\t%s0, %s1"
-- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
-- adr,adr,fmov,fmov,fmov")
-- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
-+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)"
-+ [(const_int 0)]
-+ "{
-+ aarch64_expand_mov_immediate (operands[0], operands[1]);
-+ DONE;
-+ }"
-+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
-+ adr,adr,f_mcr,f_mrc,fmov")
-+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
- )
-
--(define_insn "*movdi_aarch64"
-- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w")
-- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
-+(define_insn_and_split "*movdi_aarch64"
-+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w")
-+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
- "(register_operand (operands[0], DImode)
- || aarch64_reg_or_zero (operands[1], DImode))"
- "@
-@@ -683,6 +725,7 @@
- mov\\t%0, %x1
- mov\\t%x0, %1
- mov\\t%x0, %1
-+ #
- ldr\\t%x0, %1
- ldr\\t%d0, %1
- str\\t%x1, %0
-@@ -693,10 +736,16 @@
- fmov\\t%x0, %d1
- fmov\\t%d0, %d1
- movi\\t%d0, %1"
-- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
-- adr,adr,fmov,fmov,fmov,fmov")
-- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
-+ "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))"
-+ [(const_int 0)]
-+ "{
-+ aarch64_expand_mov_immediate (operands[0], operands[1]);
-+ DONE;
-+ }"
-+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
-+ adr,adr,f_mcr,f_mrc,fmov,fmov")
-+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
- )
-
- (define_insn "insv_imm<mode>"
-@@ -789,7 +838,7 @@
- str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
-- f_loads,f_stores,f_loads,f_stores,fmov")]
-+ f_loads,f_stores,f_loads,f_stores,mov_reg")]
- )
-
- (define_insn "*movdf_aarch64"
-@@ -863,6 +912,24 @@
- }
- )
-
-+;; 0 is dst
-+;; 1 is src
-+;; 2 is size of move in bytes
-+;; 3 is alignment
-+
-+(define_expand "movmemdi"
-+ [(match_operand:BLK 0 "memory_operand")
-+ (match_operand:BLK 1 "memory_operand")
-+ (match_operand:DI 2 "immediate_operand")
-+ (match_operand:DI 3 "immediate_operand")]
-+ "!STRICT_ALIGNMENT"
-+{
-+ if (aarch64_expand_movmem (operands))
-+ DONE;
-+ FAIL;
-+}
-+)
-+
- ;; Operands 1 and 3 are tied together by the final condition; so we allow
- ;; fairly lax checking on the second memory operation.
- (define_insn "load_pair<mode>"
-@@ -923,31 +990,45 @@
- [(set_attr "type" "neon_store1_2reg<q>")]
- )
-
--;; Load pair with writeback. This is primarily used in function epilogues
--;; when restoring [fp,lr]
-+;; Load pair with post-index writeback. This is primarily used in function
-+;; epilogues.
- (define_insn "loadwb_pair<GPI:mode>_<P:mode>"
- [(parallel
- [(set (match_operand:P 0 "register_operand" "=k")
- (plus:P (match_operand:P 1 "register_operand" "0")
-- (match_operand:P 4 "const_int_operand" "n")))
-+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (match_operand:GPI 2 "register_operand" "=r")
-- (mem:GPI (plus:P (match_dup 1)
-- (match_dup 4))))
-+ (mem:GPI (match_dup 1)))
- (set (match_operand:GPI 3 "register_operand" "=r")
- (mem:GPI (plus:P (match_dup 1)
- (match_operand:P 5 "const_int_operand" "n"))))])]
-- "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
-+ "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
- "ldp\\t%<w>2, %<w>3, [%1], %4"
- [(set_attr "type" "load2")]
- )
-
--;; Store pair with writeback. This is primarily used in function prologues
--;; when saving [fp,lr]
-+(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
-+ [(parallel
-+ [(set (match_operand:P 0 "register_operand" "=k")
-+ (plus:P (match_operand:P 1 "register_operand" "0")
-+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-+ (set (match_operand:GPF 2 "register_operand" "=w")
-+ (mem:GPF (match_dup 1)))
-+ (set (match_operand:GPF 3 "register_operand" "=w")
-+ (mem:GPF (plus:P (match_dup 1)
-+ (match_operand:P 5 "const_int_operand" "n"))))])]
-+ "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
-+ "ldp\\t%<w>2, %<w>3, [%1], %4"
-+ [(set_attr "type" "neon_load1_2reg")]
-+)
-+
-+;; Store pair with pre-index writeback. This is primarily used in function
-+;; prologues.
- (define_insn "storewb_pair<GPI:mode>_<P:mode>"
- [(parallel
- [(set (match_operand:P 0 "register_operand" "=&k")
- (plus:P (match_operand:P 1 "register_operand" "0")
-- (match_operand:P 4 "const_int_operand" "n")))
-+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
- (set (mem:GPI (plus:P (match_dup 0)
- (match_dup 4)))
- (match_operand:GPI 2 "register_operand" "r"))
-@@ -959,6 +1040,22 @@
- [(set_attr "type" "store2")]
- )
-
-+(define_insn "storewb_pair<GPF:mode>_<P:mode>"
-+ [(parallel
-+ [(set (match_operand:P 0 "register_operand" "=&k")
-+ (plus:P (match_operand:P 1 "register_operand" "0")
-+ (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-+ (set (mem:GPF (plus:P (match_dup 0)
-+ (match_dup 4)))
-+ (match_operand:GPF 2 "register_operand" "w"))
-+ (set (mem:GPF (plus:P (match_dup 0)
-+ (match_operand:P 5 "const_int_operand" "n")))
-+ (match_operand:GPF 3 "register_operand" "w"))])]
-+ "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
-+ "stp\\t%<w>2, %<w>3, [%0, %4]!"
-+ [(set_attr "type" "neon_store1_2reg<q>")]
-+)
-+
- ;; -------------------------------------------------------------------
- ;; Sign/Zero extension
- ;; -------------------------------------------------------------------
-@@ -1063,16 +1160,18 @@
-
- (define_insn "*addsi3_aarch64"
- [(set
-- (match_operand:SI 0 "register_operand" "=rk,rk,rk")
-+ (match_operand:SI 0 "register_operand" "=rk,rk,w,rk")
- (plus:SI
-- (match_operand:SI 1 "register_operand" "%rk,rk,rk")
-- (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))]
-+ (match_operand:SI 1 "register_operand" "%rk,rk,w,rk")
-+ (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))]
- ""
- "@
- add\\t%w0, %w1, %2
- add\\t%w0, %w1, %w2
-+ add\\t%0.2s, %1.2s, %2.2s
- sub\\t%w0, %w1, #%n2"
-- [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
-+ [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm")
-+ (set_attr "simd" "*,*,yes,*")]
- )
-
- ;; zero_extend version of above
-@@ -1106,7 +1205,26 @@
- (set_attr "simd" "*,*,*,yes")]
- )
-
--(define_insn "*add<mode>3_compare0"
-+(define_expand "addti3"
-+ [(set (match_operand:TI 0 "register_operand" "")
-+ (plus:TI (match_operand:TI 1 "register_operand" "")
-+ (match_operand:TI 2 "register_operand" "")))]
-+ ""
-+{
-+ rtx low = gen_reg_rtx (DImode);
-+ emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
-+ gen_lowpart (DImode, operands[2])));
-+
-+ rtx high = gen_reg_rtx (DImode);
-+ emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
-+ gen_highpart (DImode, operands[2])));
-+
-+ emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+ emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+ DONE;
-+})
-+
-+(define_insn "add<mode>3_compare0"
- [(set (reg:CC_NZ CC_REGNUM)
- (compare:CC_NZ
- (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
-@@ -1390,7 +1508,7 @@
- [(set_attr "type" "alu_ext")]
- )
-
--(define_insn "*add<mode>3_carryin"
-+(define_insn "add<mode>3_carryin"
- [(set
- (match_operand:GPI 0 "register_operand" "=r")
- (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-@@ -1558,8 +1676,26 @@
- (set_attr "simd" "*,yes")]
- )
-
-+(define_expand "subti3"
-+ [(set (match_operand:TI 0 "register_operand" "")
-+ (minus:TI (match_operand:TI 1 "register_operand" "")
-+ (match_operand:TI 2 "register_operand" "")))]
-+ ""
-+{
-+ rtx low = gen_reg_rtx (DImode);
-+ emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
-+ gen_lowpart (DImode, operands[2])));
-
--(define_insn "*sub<mode>3_compare0"
-+ rtx high = gen_reg_rtx (DImode);
-+ emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
-+ gen_highpart (DImode, operands[2])));
-+
-+ emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+ emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+ DONE;
-+})
-+
-+(define_insn "sub<mode>3_compare0"
- [(set (reg:CC_NZ CC_REGNUM)
- (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "register_operand" "r"))
-@@ -1706,7 +1842,7 @@
- [(set_attr "type" "alu_ext")]
- )
-
--(define_insn "*sub<mode>3_carryin"
-+(define_insn "sub<mode>3_carryin"
- [(set
- (match_operand:GPI 0 "register_operand" "=r")
- (minus:GPI (minus:GPI
-@@ -1935,7 +2071,7 @@
- [(set_attr "type" "mul")]
- )
-
--(define_insn "*madd<mode>"
-+(define_insn "madd<mode>"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "register_operand" "r"))
-@@ -2045,6 +2181,48 @@
- [(set_attr "type" "<su>mull")]
- )
-
-+(define_expand "<su_optab>mulditi3"
-+ [(set (match_operand:TI 0 "register_operand")
-+ (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand"))
-+ (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))]
-+ ""
-+{
-+ rtx low = gen_reg_rtx (DImode);
-+ emit_insn (gen_muldi3 (low, operands[1], operands[2]));
-+
-+ rtx high = gen_reg_rtx (DImode);
-+ emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
-+
-+ emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+ emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+ DONE;
-+})
-+
-+;; The default expansion of multi3 using umuldi3_highpart will perform
-+;; the additions in an order that fails to combine into two madd insns.
-+(define_expand "multi3"
-+ [(set (match_operand:TI 0 "register_operand")
-+ (mult:TI (match_operand:TI 1 "register_operand")
-+ (match_operand:TI 2 "register_operand")))]
-+ ""
-+{
-+ rtx l0 = gen_reg_rtx (DImode);
-+ rtx l1 = gen_lowpart (DImode, operands[1]);
-+ rtx l2 = gen_lowpart (DImode, operands[2]);
-+ rtx h0 = gen_reg_rtx (DImode);
-+ rtx h1 = gen_highpart (DImode, operands[1]);
-+ rtx h2 = gen_highpart (DImode, operands[2]);
-+
-+ emit_insn (gen_muldi3 (l0, l1, l2));
-+ emit_insn (gen_umuldi3_highpart (h0, l1, l2));
-+ emit_insn (gen_madddi (h0, h1, l2, h0));
-+ emit_insn (gen_madddi (h0, l1, h2, h0));
-+
-+ emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
-+ emit_move_insn (gen_highpart (DImode, operands[0]), h0);
-+ DONE;
-+})
-+
- (define_insn "<su>muldi3_highpart"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (truncate:DI
-@@ -2345,11 +2523,46 @@
- }
- )
-
-+(define_expand "mov<mode>cc"
-+ [(set (match_operand:GPF 0 "register_operand" "")
-+ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "")
-+ (match_operand:GPF 2 "register_operand" "")
-+ (match_operand:GPF 3 "register_operand" "")))]
-+ ""
-+ {
-+ rtx ccreg;
-+ enum rtx_code code = GET_CODE (operands[1]);
-+
-+ if (code == UNEQ || code == LTGT)
-+ FAIL;
-+
-+ ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-+ XEXP (operands[1], 1));
-+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
-+ }
-+)
-+
-+
-+;; CRC32 instructions.
-+(define_insn "aarch64_<crc_variant>"
-+ [(set (match_operand:SI 0 "register_operand" "=r")
-+ (unspec:SI [(match_operand:SI 1 "register_operand" "r")
-+ (match_operand:<crc_mode> 2 "register_operand" "r")]
-+ CRC))]
-+ "TARGET_CRC32"
-+ {
-+ if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64)
-+ return "<crc_variant>\\t%w0, %w1, %x2";
-+ else
-+ return "<crc_variant>\\t%w0, %w1, %w2";
-+ }
-+ [(set_attr "type" "crc")]
-+)
-+
- (define_insn "*csinc2<mode>_insn"
- [(set (match_operand:GPI 0 "register_operand" "=r")
-- (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator"
-- [(match_operand:CC 3 "cc_register" "") (const_int 0)])
-- (match_operand:GPI 1 "register_operand" "r")))]
-+ (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
-+ (match_operand:GPI 1 "register_operand" "r")))]
- ""
- "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
- [(set_attr "type" "csel")]
-@@ -2358,13 +2571,12 @@
- (define_insn "csinc3<mode>_insn"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (if_then_else:GPI
-- (match_operator:GPI 1 "aarch64_comparison_operator"
-- [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-- (plus:GPI (match_operand:GPI 3 "register_operand" "r")
-+ (match_operand 1 "aarch64_comparison_operation" "")
-+ (plus:GPI (match_operand:GPI 2 "register_operand" "r")
- (const_int 1))
-- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
- ""
-- "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
-+ "csinc\\t%<w>0, %<w>3, %<w>2, %M1"
- [(set_attr "type" "csel")]
- )
-
-@@ -2371,12 +2583,11 @@
- (define_insn "*csinv3<mode>_insn"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (if_then_else:GPI
-- (match_operator:GPI 1 "aarch64_comparison_operator"
-- [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-- (not:GPI (match_operand:GPI 3 "register_operand" "r"))
-- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+ (match_operand 1 "aarch64_comparison_operation" "")
-+ (not:GPI (match_operand:GPI 2 "register_operand" "r"))
-+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
- ""
-- "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
-+ "csinv\\t%<w>0, %<w>3, %<w>2, %M1"
- [(set_attr "type" "csel")]
- )
-
-@@ -2383,12 +2594,11 @@
- (define_insn "*csneg3<mode>_insn"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (if_then_else:GPI
-- (match_operator:GPI 1 "aarch64_comparison_operator"
-- [(match_operand:CC 2 "cc_register" "") (const_int 0)])
-- (neg:GPI (match_operand:GPI 3 "register_operand" "r"))
-- (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+ (match_operand 1 "aarch64_comparison_operation" "")
-+ (neg:GPI (match_operand:GPI 2 "register_operand" "r"))
-+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
- ""
-- "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
-+ "csneg\\t%<w>0, %<w>3, %<w>2, %M1"
- [(set_attr "type" "csel")]
- )
-
-@@ -2486,7 +2696,18 @@
- [(set_attr "type" "logic_shift_imm")]
- )
-
--;; zero_extend version of above
-+(define_insn "*<optab>_rol<mode>3"
-+ [(set (match_operand:GPI 0 "register_operand" "=r")
-+ (LOGICAL:GPI (rotate:GPI
-+ (match_operand:GPI 1 "register_operand" "r")
-+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
-+ (match_operand:GPI 3 "register_operand" "r")))]
-+ ""
-+ "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)"
-+ [(set_attr "type" "logic_shift_imm")]
-+)
-+
-+;; zero_extend versions of above
- (define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (zero_extend:DI
-@@ -2499,6 +2720,18 @@
- [(set_attr "type" "logic_shift_imm")]
- )
-
-+(define_insn "*<optab>_rolsi3_uxtw"
-+ [(set (match_operand:DI 0 "register_operand" "=r")
-+ (zero_extend:DI
-+ (LOGICAL:SI (rotate:SI
-+ (match_operand:SI 1 "register_operand" "r")
-+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
-+ (match_operand:SI 3 "register_operand" "r"))))]
-+ ""
-+ "<logical>\\t%w0, %w3, %w1, ror (32 - %2)"
-+ [(set_attr "type" "logic_shift_imm")]
-+)
-+
- (define_insn "one_cmpl<mode>2"
- [(set (match_operand:GPI 0 "register_operand" "=r")
- (not:GPI (match_operand:GPI 1 "register_operand" "r")))]
-@@ -2622,7 +2855,7 @@
-
- emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
- emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
-- emit_insn (gen_csinc3<mode>_insn (operands[0], x, ccreg, operands[0], const0_rtx));
-+ emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx));
- DONE;
- }
- )
-@@ -2629,7 +2862,7 @@
-
- (define_insn "clrsb<mode>2"
- [(set (match_operand:GPI 0 "register_operand" "=r")
-- (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))]
-+ (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
- ""
- "cls\\t%<w>0, %<w>1"
- [(set_attr "type" "clz")]
-@@ -3125,7 +3358,7 @@
- [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
- (match_operand 1 "const_int_operand" "n")
- (const_int 0))
-- (zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r")
-+ (zero_extract:GPI (match_operand:GPI 2 "register_operand" "r")
- (match_dup 1)
- (match_operand 3 "const_int_operand" "n")))]
- "!(UINTVAL (operands[1]) == 0
-@@ -3180,6 +3413,38 @@
- [(set_attr "type" "rev")]
- )
-
-+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
-+;; operations within an IOR/AND RTX, therefore we have two patterns matching
-+;; each valid permutation.
-+
-+(define_insn "rev16<mode>2"
-+ [(set (match_operand:GPI 0 "register_operand" "=r")
-+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-+ (const_int 8))
-+ (match_operand:GPI 3 "const_int_operand" "n"))
-+ (and:GPI (lshiftrt:GPI (match_dup 1)
-+ (const_int 8))
-+ (match_operand:GPI 2 "const_int_operand" "n"))))]
-+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
-+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
-+ "rev16\\t%<w>0, %<w>1"
-+ [(set_attr "type" "rev")]
-+)
-+
-+(define_insn "rev16<mode>2_alt"
-+ [(set (match_operand:GPI 0 "register_operand" "=r")
-+ (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
-+ (const_int 8))
-+ (match_operand:GPI 2 "const_int_operand" "n"))
-+ (and:GPI (ashift:GPI (match_dup 1)
-+ (const_int 8))
-+ (match_operand:GPI 3 "const_int_operand" "n"))))]
-+ "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
-+ && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
-+ "rev16\\t%<w>0, %<w>1"
-+ [(set_attr "type" "rev")]
-+)
-+
- ;; zero_extend version of above
- (define_insn "*bswapsi2_uxtw"
- [(set (match_operand:DI 0 "register_operand" "=r")
-@@ -3194,7 +3459,7 @@
- ;; -------------------------------------------------------------------
-
- ;; frint floating-point round to integral standard patterns.
--;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
-+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
-
- (define_insn "<frint_pattern><mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
-@@ -3305,20 +3570,24 @@
- [(set_attr "type" "f_cvtf2i")]
- )
-
--(define_insn "float<GPI:mode><GPF:mode>2"
-- [(set (match_operand:GPF 0 "register_operand" "=w")
-- (float:GPF (match_operand:GPI 1 "register_operand" "r")))]
-- "TARGET_FLOAT"
-- "scvtf\\t%<GPF:s>0, %<GPI:w>1"
-- [(set_attr "type" "f_cvti2f")]
-+(define_insn "<optab><fcvt_target><GPF:mode>2"
-+ [(set (match_operand:GPF 0 "register_operand" "=w,w")
-+ (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))]
-+ ""
-+ "@
-+ <su_optab>cvtf\t%<GPF:s>0, %<s>1
-+ <su_optab>cvtf\t%<GPF:s>0, %<w1>1"
-+ [(set_attr "simd" "yes,no")
-+ (set_attr "fp" "no,yes")
-+ (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")]
- )
-
--(define_insn "floatuns<GPI:mode><GPF:mode>2"
-+(define_insn "<optab><fcvt_iesize><GPF:mode>2"
- [(set (match_operand:GPF 0 "register_operand" "=w")
-- (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))]
-+ (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
- "TARGET_FLOAT"
-- "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
-- [(set_attr "type" "f_cvt")]
-+ "<su_optab>cvtf\t%<GPF:s>0, %<w2>1"
-+ [(set_attr "type" "f_cvti2f")]
- )
-
- ;; -------------------------------------------------------------------
-@@ -3490,7 +3759,7 @@
- (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
- "reload_completed || reload_in_progress"
- "fmov\\t%d0, %d1"
-- [(set_attr "type" "f_mcr")
-+ [(set_attr "type" "fmov")
- (set_attr "length" "4")
- ])
-
-@@ -3588,36 +3857,63 @@
- [(set_attr "type" "call")
- (set_attr "length" "16")])
-
--(define_insn "tlsie_small"
-- [(set (match_operand:DI 0 "register_operand" "=r")
-- (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")]
-+(define_insn "tlsie_small_<mode>"
-+ [(set (match_operand:PTR 0 "register_operand" "=r")
-+ (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")]
- UNSPEC_GOTSMALLTLS))]
- ""
-- "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
-+ "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]"
- [(set_attr "type" "load1")
- (set_attr "length" "8")]
- )
-
--(define_insn "tlsle_small"
-+(define_insn "tlsie_small_sidi"
- [(set (match_operand:DI 0 "register_operand" "=r")
-- (unspec:DI [(match_operand:DI 1 "register_operand" "r")
-- (match_operand:DI 2 "aarch64_tls_le_symref" "S")]
-+ (zero_extend:DI
-+ (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")]
-+ UNSPEC_GOTSMALLTLS)))]
-+ ""
-+ "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]"
-+ [(set_attr "type" "load1")
-+ (set_attr "length" "8")]
-+)
-+
-+(define_expand "tlsle_small"
-+ [(set (match_operand 0 "register_operand" "=r")
-+ (unspec [(match_operand 1 "register_operand" "r")
-+ (match_operand 2 "aarch64_tls_le_symref" "S")]
-+ UNSPEC_GOTSMALLTLS))]
-+ ""
-+{
-+ enum machine_mode mode = GET_MODE (operands[0]);
-+ emit_insn ((mode == DImode
-+ ? gen_tlsle_small_di
-+ : gen_tlsle_small_si) (operands[0],
-+ operands[1],
-+ operands[2]));
-+ DONE;
-+})
-+
-+(define_insn "tlsle_small_<mode>"
-+ [(set (match_operand:P 0 "register_operand" "=r")
-+ (unspec:P [(match_operand:P 1 "register_operand" "r")
-+ (match_operand 2 "aarch64_tls_le_symref" "S")]
- UNSPEC_GOTSMALLTLS))]
- ""
-- "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
-+ "add\\t%<w>0, %<w>1, #%G2\;add\\t%<w>0, %<w>0, #%L2"
- [(set_attr "type" "alu_reg")
- (set_attr "length" "8")]
- )
-
--(define_insn "tlsdesc_small"
-- [(set (reg:DI R0_REGNUM)
-- (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
-+(define_insn "tlsdesc_small_<mode>"
-+ [(set (reg:PTR R0_REGNUM)
-+ (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
- UNSPEC_TLSDESC))
- (clobber (reg:DI LR_REGNUM))
- (clobber (reg:CC CC_REGNUM))
- (clobber (match_scratch:DI 1 "=r"))]
- "TARGET_TLS_DESC"
-- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
-+ "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
- [(set_attr "type" "call")
- (set_attr "length" "16")])
-
-@@ -3642,6 +3938,135 @@
- DONE;
- })
-
-+;; Named patterns for stack smashing protection.
-+(define_expand "stack_protect_set"
-+ [(match_operand 0 "memory_operand")
-+ (match_operand 1 "memory_operand")]
-+ ""
-+{
-+ enum machine_mode mode = GET_MODE (operands[0]);
-+
-+ emit_insn ((mode == DImode
-+ ? gen_stack_protect_set_di
-+ : gen_stack_protect_set_si) (operands[0], operands[1]));
-+ DONE;
-+})
-+
-+(define_insn "stack_protect_set_<mode>"
-+ [(set (match_operand:PTR 0 "memory_operand" "=m")
-+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
-+ UNSPEC_SP_SET))
-+ (set (match_scratch:PTR 2 "=&r") (const_int 0))]
-+ ""
-+ "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0"
-+ [(set_attr "length" "12")
-+ (set_attr "type" "multiple")])
-+
-+(define_expand "stack_protect_test"
-+ [(match_operand 0 "memory_operand")
-+ (match_operand 1 "memory_operand")
-+ (match_operand 2)]
-+ ""
-+{
-+ rtx result;
-+ enum machine_mode mode = GET_MODE (operands[0]);
-+
-+ result = gen_reg_rtx(mode);
-+
-+ emit_insn ((mode == DImode
-+ ? gen_stack_protect_test_di
-+ : gen_stack_protect_test_si) (result,
-+ operands[0],
-+ operands[1]));
-+
-+ if (mode == DImode)
-+ emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-+ result, const0_rtx, operands[2]));
-+ else
-+ emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-+ result, const0_rtx, operands[2]));
-+ DONE;
-+})
-+
-+(define_insn "stack_protect_test_<mode>"
-+ [(set (match_operand:PTR 0 "register_operand" "=r")
-+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
-+ (match_operand:PTR 2 "memory_operand" "m")]
-+ UNSPEC_SP_TEST))
-+ (clobber (match_scratch:PTR 3 "=&r"))]
-+ ""
-+ "ldr\t%<w>3, %x1\;ldr\t%<w>0, %x2\;eor\t%<w>0, %<w>3, %<w>0"
-+ [(set_attr "length" "12")
-+ (set_attr "type" "multiple")])
-+
-+;; Write Floating-point Control Register.
-+(define_insn "set_fpcr"
-+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
-+ ""
-+ "msr\\tfpcr, %0"
-+ [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Control Register.
-+(define_insn "get_fpcr"
-+ [(set (match_operand:SI 0 "register_operand" "=r")
-+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
-+ ""
-+ "mrs\\t%0, fpcr"
-+ [(set_attr "type" "mrs")])
-+
-+;; Write Floating-point Status Register.
-+(define_insn "set_fpsr"
-+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
-+ ""
-+ "msr\\tfpsr, %0"
-+ [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Status Register.
-+(define_insn "get_fpsr"
-+ [(set (match_operand:SI 0 "register_operand" "=r")
-+ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
-+ ""
-+ "mrs\\t%0, fpsr"
-+ [(set_attr "type" "mrs")])
-+
-+
-+;; Define the subtract-one-and-jump insns so loop.c
-+;; knows what to generate.
-+(define_expand "doloop_end"
-+ [(use (match_operand 0 "" "")) ; loop pseudo
-+ (use (match_operand 1 "" ""))] ; label
-+ "optimize > 0 && flag_modulo_sched"
-+{
-+ rtx s0;
-+ rtx bcomp;
-+ rtx loc_ref;
-+ rtx cc_reg;
-+ rtx insn;
-+ rtx cmp;
-+
-+ /* Currently SMS relies on the do-loop pattern to recognize loops
-+ where (1) the control part consists of all insns defining and/or
-+ using a certain 'count' register and (2) the loop count can be
-+ adjusted by modifying this register prior to the loop.
-+ ??? The possible introduction of a new block to initialize the
-+ new IV can potentially affect branch optimizations. */
-+
-+ if (GET_MODE (operands[0]) != DImode)
-+ FAIL;
-+
-+ s0 = operands [0];
-+ insn = emit_insn (gen_adddi3_compare0 (s0, s0, GEN_INT (-1)));
-+
-+ cmp = XVECEXP (PATTERN (insn), 0, 0);
-+ cc_reg = SET_DEST (cmp);
-+ bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
-+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
-+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
-+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
-+ loc_ref, pc_rtx)));
-+ DONE;
-+})
-+
- ;; AdvSIMD Stuff
- (include "aarch64-simd.md")
-
---- a/src/gcc/config/aarch64/t-aarch64
-+++ b/src/gcc/config/aarch64/t-aarch64
-@@ -31,10 +31,17 @@
- $(SYSTEM_H) coretypes.h $(TM_H) \
- $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \
- $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \
-- $(srcdir)/config/aarch64/aarch64-simd-builtins.def
-+ $(srcdir)/config/aarch64/aarch64-simd-builtins.def \
-+ aarch64-builtin-iterators.h
- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
- $(srcdir)/config/aarch64/aarch64-builtins.c
-
-+aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
-+ $(srcdir)/config/aarch64/iterators.md
-+ $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \
-+ $(srcdir)/config/aarch64/iterators.md > \
-+ aarch64-builtin-iterators.h
-+
- aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
- coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
- $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
---- a/src/gcc/config/aarch64/arm_acle.h
-+++ b/src/gcc/config/aarch64/arm_acle.h
-@@ -0,0 +1,90 @@
-+/* AArch64 Non-NEON ACLE intrinsics include file.
-+
-+ Copyright (C) 2014 Free Software Foundation, Inc.
-+ Contributed by ARM Ltd.
-+
-+ This file is part of GCC.
-+
-+ GCC is free software; you can redistribute it and/or modify it
-+ under the terms of the GNU General Public License as published
-+ by the Free Software Foundation; either version 3, or (at your
-+ option) any later version.
-+
-+ GCC is distributed in the hope that it will be useful, but WITHOUT
-+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
-+ License for more details.
-+
-+ Under Section 7 of GPL version 3, you are granted additional
-+ permissions described in the GCC Runtime Library Exception, version
-+ 3.1, as published by the Free Software Foundation.
-+
-+ You should have received a copy of the GNU General Public License and
-+ a copy of the GCC Runtime Library Exception along with this program;
-+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-+ <http://www.gnu.org/licenses/>. */
-+
-+#ifndef _GCC_ARM_ACLE_H
-+#define _GCC_ARM_ACLE_H
-+
-+#include <stdint.h>
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+#ifdef __ARM_FEATURE_CRC32
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32b (uint32_t __a, uint8_t __b)
-+{
-+ return __builtin_aarch64_crc32b (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cb (uint32_t __a, uint8_t __b)
-+{
-+ return __builtin_aarch64_crc32cb (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32ch (uint32_t __a, uint16_t __b)
-+{
-+ return __builtin_aarch64_crc32ch (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cw (uint32_t __a, uint32_t __b)
-+{
-+ return __builtin_aarch64_crc32cw (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cd (uint32_t __a, uint64_t __b)
-+{
-+ return __builtin_aarch64_crc32cx (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32h (uint32_t __a, uint16_t __b)
-+{
-+ return __builtin_aarch64_crc32h (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32w (uint32_t __a, uint32_t __b)
-+{
-+ return __builtin_aarch64_crc32w (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32d (uint32_t __a, uint64_t __b)
-+{
-+ return __builtin_aarch64_crc32x (__a, __b);
-+}
-+
-+#endif
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
---- a/src/gcc/config/aarch64/aarch64-cost-tables.h
-+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
-@@ -0,0 +1,131 @@
-+/* RTX cost tables for AArch64.
-+
-+ Copyright (C) 2014 Free Software Foundation, Inc.
-+
-+ This file is part of GCC.
-+
-+ GCC is free software; you can redistribute it and/or modify it
-+ under the terms of the GNU General Public License as published
-+ by the Free Software Foundation; either version 3, or (at your
-+ option) any later version.
-+
-+ GCC is distributed in the hope that it will be useful, but WITHOUT
-+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
-+ License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with GCC; see the file COPYING3. If not see
-+ <http://www.gnu.org/licenses/>. */
-+
-+#ifndef GCC_AARCH64_COST_TABLES_H
-+#define GCC_AARCH64_COST_TABLES_H
-+
-+#include "config/arm/aarch-cost-tables.h"
-+
-+/* ThunderX does not have implement AArch32. */
-+const struct cpu_cost_table thunderx_extra_costs =
-+{
-+ /* ALU */
-+ {
-+ 0, /* Arith. */
-+ 0, /* Logical. */
-+ 0, /* Shift. */
-+ 0, /* Shift_reg. */
-+ COSTS_N_INSNS (1), /* Arith_shift. */
-+ COSTS_N_INSNS (1), /* Arith_shift_reg. */
-+ COSTS_N_INSNS (1), /* UNUSED: Log_shift. */
-+ COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */
-+ 0, /* Extend. */
-+ COSTS_N_INSNS (1), /* Extend_arith. */
-+ 0, /* Bfi. */
-+ 0, /* Bfx. */
-+ COSTS_N_INSNS (5), /* Clz. */
-+ 0, /* rev. */
-+ 0, /* UNUSED: non_exec. */
-+ false /* UNUSED: non_exec_costs_exec. */
-+ },
-+ {
-+ /* MULT SImode */
-+ {
-+ COSTS_N_INSNS (3), /* Simple. */
-+ 0, /* Flag_setting. */
-+ 0, /* Extend. */
-+ 0, /* Add. */
-+ COSTS_N_INSNS (1), /* Extend_add. */
-+ COSTS_N_INSNS (21) /* Idiv. */
-+ },
-+ /* MULT DImode */
-+ {
-+ COSTS_N_INSNS (3), /* Simple. */
-+ 0, /* Flag_setting. */
-+ 0, /* Extend. */
-+ 0, /* Add. */
-+ COSTS_N_INSNS (1), /* Extend_add. */
-+ COSTS_N_INSNS (37) /* Idiv. */
-+ },
-+ },
-+ /* LD/ST */
-+ {
-+ COSTS_N_INSNS (2), /* Load. */
-+ COSTS_N_INSNS (2), /* Load_sign_extend. */
-+ COSTS_N_INSNS (2), /* Ldrd. */
-+ 0, /* N/A: Ldm_1st. */
-+ 0, /* N/A: Ldm_regs_per_insn_1st. */
-+ 0, /* N/A: Ldm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (3), /* Loadf. */
-+ COSTS_N_INSNS (3), /* Loadd. */
-+ 0, /* N/A: Load_unaligned. */
-+ 0, /* Store. */
-+ 0, /* Strd. */
-+ 0, /* N/A: Stm_1st. */
-+ 0, /* N/A: Stm_regs_per_insn_1st. */
-+ 0, /* N/A: Stm_regs_per_insn_subsequent. */
-+ 0, /* Storef. */
-+ 0, /* Stored. */
-+ COSTS_N_INSNS (1) /* Store_unaligned. */
-+ },
-+ {
-+ /* FP SFmode */
-+ {
-+ COSTS_N_INSNS (11), /* Div. */
-+ COSTS_N_INSNS (5), /* Mult. */
-+ COSTS_N_INSNS (5), /* Mult_addsub. */
-+ COSTS_N_INSNS (5), /* Fma. */
-+ COSTS_N_INSNS (3), /* Addsub. */
-+ 0, /* Fpconst. */
-+ COSTS_N_INSNS (1), /* Neg. */
-+ 0, /* Compare. */
-+ COSTS_N_INSNS (5), /* Widen. */
-+ COSTS_N_INSNS (5), /* Narrow. */
-+ COSTS_N_INSNS (5), /* Toint. */
-+ COSTS_N_INSNS (5), /* Fromint. */
-+ COSTS_N_INSNS (1) /* Roundint. */
-+ },
-+ /* FP DFmode */
-+ {
-+ COSTS_N_INSNS (21), /* Div. */
-+ COSTS_N_INSNS (5), /* Mult. */
-+ COSTS_N_INSNS (5), /* Mult_addsub. */
-+ COSTS_N_INSNS (5), /* Fma. */
-+ COSTS_N_INSNS (3), /* Addsub. */
-+ 0, /* Fpconst. */
-+ COSTS_N_INSNS (1), /* Neg. */
-+ 0, /* Compare. */
-+ COSTS_N_INSNS (5), /* Widen. */
-+ COSTS_N_INSNS (5), /* Narrow. */
-+ COSTS_N_INSNS (5), /* Toint. */
-+ COSTS_N_INSNS (5), /* Fromint. */
-+ COSTS_N_INSNS (1) /* Roundint. */
-+ }
-+ },
-+ /* Vector */
-+ {
-+ COSTS_N_INSNS (1) /* Alu. */
-+ }
-+};
-+
-+
-+
-+#endif
-+
---- a/src/gcc/config/aarch64/aarch64-cores.def
-+++ b/src/gcc/config/aarch64/aarch64-cores.def
-@@ -34,9 +34,10 @@
-
- /* V8 Architecture Processors. */
-
--AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53)
--AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
-+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa53)
-+AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57)
-+AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
-
- /* V8 big.LITTLE implementations. */
-
--AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
-+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57)
---- a/src/gcc/config/aarch64/atomics.md
-+++ b/src/gcc/config/aarch64/atomics.md
-@@ -119,7 +119,7 @@
- [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
- (unspec_volatile:ALLI
- [(atomic_op:ALLI (match_dup 0)
-- (match_operand:ALLI 1 "<atomic_op_operand>" "rn"))
-+ (match_operand:ALLI 1 "<atomic_op_operand>" "r<lconst_atomic>"))
- (match_operand:SI 2 "const_int_operand")] ;; model
- UNSPECV_ATOMIC_OP))
- (clobber (reg:CC CC_REGNUM))
-@@ -141,7 +141,7 @@
- (unspec_volatile:ALLI
- [(not:ALLI
- (and:ALLI (match_dup 0)
-- (match_operand:ALLI 1 "aarch64_logical_operand" "rn")))
-+ (match_operand:ALLI 1 "aarch64_logical_operand" "r<lconst_atomic>")))
- (match_operand:SI 2 "const_int_operand")] ;; model
- UNSPECV_ATOMIC_OP))
- (clobber (reg:CC CC_REGNUM))
-@@ -164,7 +164,7 @@
- (set (match_dup 1)
- (unspec_volatile:ALLI
- [(atomic_op:ALLI (match_dup 1)
-- (match_operand:ALLI 2 "<atomic_op_operand>" "rn"))
-+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>"))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPECV_ATOMIC_OP))
- (clobber (reg:CC CC_REGNUM))
-@@ -188,7 +188,7 @@
- (unspec_volatile:ALLI
- [(not:ALLI
- (and:ALLI (match_dup 1)
-- (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))
-+ (match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>")))
- (match_operand:SI 3 "const_int_operand")] ;; model
- UNSPECV_ATOMIC_OP))
- (clobber (reg:CC CC_REGNUM))
-@@ -209,7 +209,7 @@
- [(set (match_operand:ALLI 0 "register_operand" "=&r")
- (atomic_op:ALLI
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
-- (match_operand:ALLI 2 "<atomic_op_operand>" "rn")))
-+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>")))
- (set (match_dup 1)
- (unspec_volatile:ALLI
- [(match_dup 1) (match_dup 2)
-@@ -233,7 +233,7 @@
- (not:ALLI
- (and:ALLI
- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
-- (match_operand:ALLI 2 "aarch64_logical_operand" "rn"))))
-+ (match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>"))))
- (set (match_dup 1)
- (unspec_volatile:ALLI
- [(match_dup 1) (match_dup 2)
---- a/src/gcc/config/aarch64/aarch64-tune.md
-+++ b/src/gcc/config/aarch64/aarch64-tune.md
-@@ -1,5 +1,5 @@
- ;; -*- buffer-read-only: t -*-
- ;; Generated automatically by gentune.sh from aarch64-cores.def
- (define_attr "tune"
-- "cortexa53,cortexa15,cortexa57cortexa53"
-+ "cortexa53,cortexa15,thunderx,cortexa57cortexa53"
- (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
---- a/src/gcc/config/aarch64/aarch64-builtins.c
-+++ b/src/gcc/config/aarch64/aarch64-builtins.c
-@@ -47,52 +47,27 @@
- #include "gimple.h"
- #include "gimple-iterator.h"
-
--enum aarch64_simd_builtin_type_mode
--{
-- T_V8QI,
-- T_V4HI,
-- T_V2SI,
-- T_V2SF,
-- T_DI,
-- T_DF,
-- T_V16QI,
-- T_V8HI,
-- T_V4SI,
-- T_V4SF,
-- T_V2DI,
-- T_V2DF,
-- T_TI,
-- T_EI,
-- T_OI,
-- T_XI,
-- T_SI,
-- T_SF,
-- T_HI,
-- T_QI,
-- T_MAX
--};
--
--#define v8qi_UP T_V8QI
--#define v4hi_UP T_V4HI
--#define v2si_UP T_V2SI
--#define v2sf_UP T_V2SF
--#define di_UP T_DI
--#define df_UP T_DF
--#define v16qi_UP T_V16QI
--#define v8hi_UP T_V8HI
--#define v4si_UP T_V4SI
--#define v4sf_UP T_V4SF
--#define v2di_UP T_V2DI
--#define v2df_UP T_V2DF
--#define ti_UP T_TI
--#define ei_UP T_EI
--#define oi_UP T_OI
--#define xi_UP T_XI
--#define si_UP T_SI
--#define sf_UP T_SF
--#define hi_UP T_HI
--#define qi_UP T_QI
--
-+#define v8qi_UP V8QImode
-+#define v4hi_UP V4HImode
-+#define v2si_UP V2SImode
-+#define v2sf_UP V2SFmode
-+#define di_UP DImode
-+#define df_UP DFmode
-+#define v16qi_UP V16QImode
-+#define v8hi_UP V8HImode
-+#define v4si_UP V4SImode
-+#define v4sf_UP V4SFmode
-+#define v2di_UP V2DImode
-+#define v2df_UP V2DFmode
-+#define ti_UP TImode
-+#define ei_UP EImode
-+#define oi_UP OImode
-+#define ci_UP CImode
-+#define xi_UP XImode
-+#define si_UP SImode
-+#define sf_UP SFmode
-+#define hi_UP HImode
-+#define qi_UP QImode
- #define UP(X) X##_UP
-
- #define SIMD_MAX_BUILTIN_ARGS 5
-@@ -107,8 +82,6 @@
- qualifier_const = 0x2, /* 1 << 1 */
- /* T *foo. */
- qualifier_pointer = 0x4, /* 1 << 2 */
-- /* const T *foo. */
-- qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer */
- /* Used when expanding arguments if an operand could
- be an immediate. */
- qualifier_immediate = 0x8, /* 1 << 3 */
-@@ -123,7 +96,7 @@
- qualifier_map_mode = 0x80, /* 1 << 7 */
- /* qualifier_pointer | qualifier_map_mode */
- qualifier_pointer_map_mode = 0x84,
-- /* qualifier_const_pointer | qualifier_map_mode */
-+ /* qualifier_const | qualifier_pointer | qualifier_map_mode */
- qualifier_const_pointer_map_mode = 0x86,
- /* Polynomial types. */
- qualifier_poly = 0x100
-@@ -132,7 +105,7 @@
- typedef struct
- {
- const char *name;
-- enum aarch64_simd_builtin_type_mode mode;
-+ enum machine_mode mode;
- const enum insn_code code;
- unsigned int fcode;
- enum aarch64_type_qualifiers *qualifiers;
-@@ -147,16 +120,49 @@
- = { qualifier_unsigned, qualifier_unsigned };
- #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
- #define TYPES_CREATE (aarch64_types_unop_qualifiers)
--#define TYPES_REINTERP (aarch64_types_unop_qualifiers)
-+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_none, qualifier_unsigned };
-+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_none, qualifier_poly };
-+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_unsigned, qualifier_none };
-+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_poly, qualifier_none };
-+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
- #define TYPES_BINOP (aarch64_types_binop_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_none, qualifier_none, qualifier_none,
-+ qualifier_internal, qualifier_internal };
-+#define TYPES_TST (aarch64_types_cmtst_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_void, qualifier_none, qualifier_none };
-+#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
- #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
-+#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_none, qualifier_none, qualifier_unsigned };
-+#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_poly, qualifier_poly, qualifier_poly };
- #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
-@@ -172,10 +178,10 @@
- #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
-
- static enum aarch64_type_qualifiers
--aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_none,
-- qualifier_none, qualifier_none };
--#define TYPES_QUADOP (aarch64_types_quadop_qualifiers)
-+ qualifier_none, qualifier_immediate };
-+#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
-
- static enum aarch64_type_qualifiers
- aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-@@ -183,9 +189,14 @@
- #define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
- #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_unsigned, qualifier_none, qualifier_immediate };
-+#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
- #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
-+
- static enum aarch64_type_qualifiers
- aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
-@@ -194,6 +205,13 @@
- #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers)
-
- static enum aarch64_type_qualifiers
-+aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
-+ qualifier_immediate };
-+#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
-+
-+
-+static enum aarch64_type_qualifiers
- aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_none };
- #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
-@@ -230,6 +248,11 @@
- = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
- #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
- #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+ = { qualifier_void, qualifier_pointer_map_mode,
-+ qualifier_none, qualifier_none };
-+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
-
- #define CF0(N, X) CODE_FOR_aarch64_##N##X
- #define CF1(N, X) CODE_FOR_##N##X##1
-@@ -239,7 +262,7 @@
- #define CF10(N, X) CODE_FOR_##N##X
-
- #define VAR1(T, N, MAP, A) \
-- {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T},
-+ {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
- #define VAR2(T, N, MAP, A, B) \
- VAR1 (T, N, MAP, A) \
- VAR1 (T, N, MAP, B)
-@@ -274,96 +297,34 @@
- VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
- VAR1 (T, N, MAP, L)
-
--/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
-- modes as is given for each define_mode_iterator in
-- config/aarch64/iterators.md. */
-+#include "aarch64-builtin-iterators.h"
-
--#define BUILTIN_DX(T, N, MAP) \
-- VAR2 (T, N, MAP, di, df)
--#define BUILTIN_GPF(T, N, MAP) \
-- VAR2 (T, N, MAP, sf, df)
--#define BUILTIN_SDQ_I(T, N, MAP) \
-- VAR4 (T, N, MAP, qi, hi, si, di)
--#define BUILTIN_SD_HSI(T, N, MAP) \
-- VAR2 (T, N, MAP, hi, si)
--#define BUILTIN_V2F(T, N, MAP) \
-- VAR2 (T, N, MAP, v2sf, v2df)
--#define BUILTIN_VALL(T, N, MAP) \
-- VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-- v4si, v2di, v2sf, v4sf, v2df)
--#define BUILTIN_VALLDI(T, N, MAP) \
-- VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-- v4si, v2di, v2sf, v4sf, v2df, di)
--#define BUILTIN_VALLDIF(T, N, MAP) \
-- VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
-- v4si, v2di, v2sf, v4sf, v2df, di, df)
--#define BUILTIN_VB(T, N, MAP) \
-- VAR2 (T, N, MAP, v8qi, v16qi)
--#define BUILTIN_VD(T, N, MAP) \
-- VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
--#define BUILTIN_VDC(T, N, MAP) \
-- VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
--#define BUILTIN_VDIC(T, N, MAP) \
-- VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VDN(T, N, MAP) \
-- VAR3 (T, N, MAP, v4hi, v2si, di)
--#define BUILTIN_VDQ(T, N, MAP) \
-- VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
--#define BUILTIN_VDQF(T, N, MAP) \
-- VAR3 (T, N, MAP, v2sf, v4sf, v2df)
--#define BUILTIN_VDQH(T, N, MAP) \
-- VAR2 (T, N, MAP, v4hi, v8hi)
--#define BUILTIN_VDQHS(T, N, MAP) \
-- VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQIF(T, N, MAP) \
-- VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
--#define BUILTIN_VDQM(T, N, MAP) \
-- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQV(T, N, MAP) \
-- VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
--#define BUILTIN_VDQQH(T, N, MAP) \
-- VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi)
--#define BUILTIN_VDQ_BHSI(T, N, MAP) \
-- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQ_I(T, N, MAP) \
-- VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
--#define BUILTIN_VDW(T, N, MAP) \
-- VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VD_BHSI(T, N, MAP) \
-- VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VD_HSI(T, N, MAP) \
-- VAR2 (T, N, MAP, v4hi, v2si)
--#define BUILTIN_VD_RE(T, N, MAP) \
-- VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
--#define BUILTIN_VQ(T, N, MAP) \
-- VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
--#define BUILTIN_VQN(T, N, MAP) \
-- VAR3 (T, N, MAP, v8hi, v4si, v2di)
--#define BUILTIN_VQW(T, N, MAP) \
-- VAR3 (T, N, MAP, v16qi, v8hi, v4si)
--#define BUILTIN_VQ_HSI(T, N, MAP) \
-- VAR2 (T, N, MAP, v8hi, v4si)
--#define BUILTIN_VQ_S(T, N, MAP) \
-- VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VSDQ_HSI(T, N, MAP) \
-- VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
--#define BUILTIN_VSDQ_I(T, N, MAP) \
-- VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
--#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
-- VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
--#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
-- VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
--#define BUILTIN_VSD_HSI(T, N, MAP) \
-- VAR4 (T, N, MAP, v4hi, v2si, hi, si)
--#define BUILTIN_VSQN_HSDI(T, N, MAP) \
-- VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
--#define BUILTIN_VSTRUCT(T, N, MAP) \
-- VAR3 (T, N, MAP, oi, ci, xi)
--
- static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
- #include "aarch64-simd-builtins.def"
- };
-
-+/* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
-+#define AARCH64_CRC32_BUILTINS \
-+ CRC32_BUILTIN (crc32b, QI) \
-+ CRC32_BUILTIN (crc32h, HI) \
-+ CRC32_BUILTIN (crc32w, SI) \
-+ CRC32_BUILTIN (crc32x, DI) \
-+ CRC32_BUILTIN (crc32cb, QI) \
-+ CRC32_BUILTIN (crc32ch, HI) \
-+ CRC32_BUILTIN (crc32cw, SI) \
-+ CRC32_BUILTIN (crc32cx, DI)
-+
-+typedef struct
-+{
-+ const char *name;
-+ enum machine_mode mode;
-+ const enum insn_code icode;
-+ unsigned int fcode;
-+} aarch64_crc_builtin_datum;
-+
-+#define CRC32_BUILTIN(N, M) \
-+ AARCH64_BUILTIN_##N,
-+
- #undef VAR1
- #define VAR1(T, N, MAP, A) \
- AARCH64_SIMD_BUILTIN_##T##_##N##A,
-@@ -371,13 +332,32 @@
- enum aarch64_builtins
- {
- AARCH64_BUILTIN_MIN,
-+
-+ AARCH64_BUILTIN_GET_FPCR,
-+ AARCH64_BUILTIN_SET_FPCR,
-+ AARCH64_BUILTIN_GET_FPSR,
-+ AARCH64_BUILTIN_SET_FPSR,
-+
- AARCH64_SIMD_BUILTIN_BASE,
- #include "aarch64-simd-builtins.def"
- AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
- + ARRAY_SIZE (aarch64_simd_builtin_data),
-+ AARCH64_CRC32_BUILTIN_BASE,
-+ AARCH64_CRC32_BUILTINS
-+ AARCH64_CRC32_BUILTIN_MAX,
- AARCH64_BUILTIN_MAX
- };
-
-+#undef CRC32_BUILTIN
-+#define CRC32_BUILTIN(N, M) \
-+ {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
-+
-+static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
-+ AARCH64_CRC32_BUILTINS
-+};
-+
-+#undef CRC32_BUILTIN
-+
- static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
-
- #define NUM_DREG_TYPES 6
-@@ -639,25 +619,10 @@
- bool print_type_signature_p = false;
- char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 };
- aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
-- const char *const modenames[] =
-- {
-- "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
-- "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
-- "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
-- };
-- const enum machine_mode modes[] =
-- {
-- V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
-- V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
-- V2DFmode, TImode, EImode, OImode, XImode, SImode,
-- SFmode, HImode, QImode
-- };
- char namebuf[60];
- tree ftype = NULL;
- tree fndecl = NULL;
-
-- gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
--
- d->fcode = fcode;
-
- /* We must track two variables here. op_num is
-@@ -705,7 +670,7 @@
- /* Some builtins have different user-facing types
- for certain arguments, encoded in d->mode. */
- if (qualifiers & qualifier_map_mode)
-- op_mode = modes[d->mode];
-+ op_mode = d->mode;
-
- /* For pointers, we want a pointer to the basic type
- of the vector. */
-@@ -737,11 +702,11 @@
- gcc_assert (ftype != NULL);
-
- if (print_type_signature_p)
-- snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s",
-- d->name, modenames[d->mode], type_signature);
-+ snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
-+ d->name, type_signature);
- else
-- snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
-- d->name, modenames[d->mode]);
-+ snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
-+ d->name);
-
- fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
- NULL, NULL_TREE);
-@@ -749,11 +714,49 @@
- }
- }
-
-+static void
-+aarch64_init_crc32_builtins ()
-+{
-+ tree usi_type = aarch64_build_unsigned_type (SImode);
-+ unsigned int i = 0;
-+
-+ for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
-+ {
-+ aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
-+ tree argtype = aarch64_build_unsigned_type (d->mode);
-+ tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
-+ tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
-+ BUILT_IN_MD, NULL, NULL_TREE);
-+
-+ aarch64_builtin_decls[d->fcode] = fndecl;
-+ }
-+}
-+
- void
- aarch64_init_builtins (void)
- {
-+ tree ftype_set_fpr
-+ = build_function_type_list (void_type_node, unsigned_type_node, NULL);
-+ tree ftype_get_fpr
-+ = build_function_type_list (unsigned_type_node, NULL);
-+
-+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
-+ = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
-+ AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
-+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
-+ = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
-+ AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
-+ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
-+ = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
-+ AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
-+ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
-+ = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
-+ AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
-+
- if (TARGET_SIMD)
- aarch64_init_simd_builtins ();
-+ if (TARGET_CRC32)
-+ aarch64_init_crc32_builtins ();
- }
-
- tree
-@@ -774,9 +777,8 @@
-
- static rtx
- aarch64_simd_expand_args (rtx target, int icode, int have_retval,
-- tree exp, ...)
-+ tree exp, builtin_simd_arg *args)
- {
-- va_list ap;
- rtx pat;
- tree arg[SIMD_MAX_BUILTIN_ARGS];
- rtx op[SIMD_MAX_BUILTIN_ARGS];
-@@ -790,11 +792,9 @@
- || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
- target = gen_reg_rtx (tmode);
-
-- va_start (ap, exp);
--
- for (;;)
- {
-- builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int);
-+ builtin_simd_arg thisarg = args[argc];
-
- if (thisarg == SIMD_ARG_STOP)
- break;
-@@ -818,8 +818,11 @@
- case SIMD_ARG_CONSTANT:
- if (!(*insn_data[icode].operand[argc + have_retval].predicate)
- (op[argc], mode[argc]))
-+ {
- error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
- "expected %<const int%>", argc + 1);
-+ return const0_rtx;
-+ }
- break;
-
- case SIMD_ARG_STOP:
-@@ -830,8 +833,6 @@
- }
- }
-
-- va_end (ap);
--
- if (have_retval)
- switch (argc)
- {
-@@ -886,7 +887,7 @@
- }
-
- if (!pat)
-- return 0;
-+ return NULL_RTX;
-
- emit_insn (pat);
-
-@@ -945,14 +946,45 @@
- /* The interface to aarch64_simd_expand_args expects a 0 if
- the function is void, and a 1 if it is not. */
- return aarch64_simd_expand_args
-- (target, icode, !is_void, exp,
-- args[1],
-- args[2],
-- args[3],
-- args[4],
-- SIMD_ARG_STOP);
-+ (target, icode, !is_void, exp, &args[1]);
- }
-
-+rtx
-+aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
-+{
-+ rtx pat;
-+ aarch64_crc_builtin_datum *d
-+ = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
-+ enum insn_code icode = d->icode;
-+ tree arg0 = CALL_EXPR_ARG (exp, 0);
-+ tree arg1 = CALL_EXPR_ARG (exp, 1);
-+ rtx op0 = expand_normal (arg0);
-+ rtx op1 = expand_normal (arg1);
-+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
-+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-+
-+ if (! target
-+ || GET_MODE (target) != tmode
-+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-+ target = gen_reg_rtx (tmode);
-+
-+ gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
-+ && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-+
-+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-+ op0 = copy_to_mode_reg (mode0, op0);
-+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-+ op1 = copy_to_mode_reg (mode1, op1);
-+
-+ pat = GEN_FCN (icode) (target, op0, op1);
-+ if (!pat)
-+ return NULL_RTX;
-+
-+ emit_insn (pat);
-+ return target;
-+}
-+
- /* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient. */
- rtx
-@@ -964,11 +996,43 @@
- {
- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- int fcode = DECL_FUNCTION_CODE (fndecl);
-+ int icode;
-+ rtx pat, op0;
-+ tree arg0;
-
-- if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
-+ switch (fcode)
-+ {
-+ case AARCH64_BUILTIN_GET_FPCR:
-+ case AARCH64_BUILTIN_SET_FPCR:
-+ case AARCH64_BUILTIN_GET_FPSR:
-+ case AARCH64_BUILTIN_SET_FPSR:
-+ if ((fcode == AARCH64_BUILTIN_GET_FPCR)
-+ || (fcode == AARCH64_BUILTIN_GET_FPSR))
-+ {
-+ icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
-+ CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
-+ target = gen_reg_rtx (SImode);
-+ pat = GEN_FCN (icode) (target);
-+ }
-+ else
-+ {
-+ target = NULL_RTX;
-+ icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
-+ CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
-+ arg0 = CALL_EXPR_ARG (exp, 0);
-+ op0 = expand_normal (arg0);
-+ pat = GEN_FCN (icode) (op0);
-+ }
-+ emit_insn (pat);
-+ return target;
-+ }
-+
-+ if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
- return aarch64_simd_expand_builtin (fcode, exp, target);
-+ else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
-+ return aarch64_crc32_expand_builtin (fcode, exp, target);
-
-- return NULL_RTX;
-+ gcc_unreachable ();
- }
-
- tree
-@@ -1086,7 +1150,29 @@
-
- return aarch64_builtin_decls[builtin];
- }
--
-+ case BUILT_IN_BSWAP16:
-+#undef AARCH64_CHECK_BUILTIN_MODE
-+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-+ (out_mode == N##Imode && out_n == C \
-+ && in_mode == N##Imode && in_n == C)
-+ if (AARCH64_CHECK_BUILTIN_MODE (4, H))
-+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
-+ else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
-+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
-+ else
-+ return NULL_TREE;
-+ case BUILT_IN_BSWAP32:
-+ if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
-+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
-+ else
-+ return NULL_TREE;
-+ case BUILT_IN_BSWAP64:
-+ if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
-+ else
-+ return NULL_TREE;
- default:
- return NULL_TREE;
- }
-@@ -1111,22 +1197,25 @@
- BUILTIN_VALLDI (UNOP, abs, 2)
- return fold_build1 (ABS_EXPR, type, args[0]);
- break;
-- BUILTIN_VALLDI (BINOP, cmge, 0)
-- return fold_build2 (GE_EXPR, type, args[0], args[1]);
-- break;
-- BUILTIN_VALLDI (BINOP, cmgt, 0)
-- return fold_build2 (GT_EXPR, type, args[0], args[1]);
-- break;
-- BUILTIN_VALLDI (BINOP, cmeq, 0)
-- return fold_build2 (EQ_EXPR, type, args[0], args[1]);
-- break;
-- BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
-- {
-- tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
-- tree vec_zero_node = build_zero_cst (type);
-- return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
-- break;
-- }
-+ VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
-+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-+ VAR1 (REINTERP_US, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-+ VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
-+ return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
- VAR1 (UNOP, floatv2si, 2, v2sf)
- VAR1 (UNOP, floatv4si, 2, v4sf)
- VAR1 (UNOP, floatv2di, 2, v2df)
-@@ -1146,6 +1235,20 @@
- tree call = gimple_call_fn (stmt);
- tree fndecl;
- gimple new_stmt = NULL;
-+
-+ /* The operations folded below are reduction operations. These are
-+ defined to leave their result in the 0'th element (from the perspective
-+ of GCC). The architectural instruction we are folding will leave the
-+ result in the 0'th element (from the perspective of the architecture).
-+ For big-endian systems, these perspectives are not aligned.
-+
-+ It is therefore wrong to perform this fold on big-endian. There
-+ are some tricks we could play with shuffling, but the mid-end is
-+ inconsistent in the way it treats reduction operations, so we will
-+ end up in difficulty. Until we fix the ambiguity - just bail out. */
-+ if (BYTES_BIG_ENDIAN)
-+ return false;
-+
- if (call)
- {
- fndecl = gimple_call_fndecl (stmt);
-@@ -1196,43 +1299,108 @@
- return changed;
- }
-
-+void
-+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
-+{
-+ const unsigned AARCH64_FE_INVALID = 1;
-+ const unsigned AARCH64_FE_DIVBYZERO = 2;
-+ const unsigned AARCH64_FE_OVERFLOW = 4;
-+ const unsigned AARCH64_FE_UNDERFLOW = 8;
-+ const unsigned AARCH64_FE_INEXACT = 16;
-+ const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
-+ | AARCH64_FE_DIVBYZERO
-+ | AARCH64_FE_OVERFLOW
-+ | AARCH64_FE_UNDERFLOW
-+ | AARCH64_FE_INEXACT);
-+ const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
-+ tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
-+ tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
-+ tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
-+ tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
-+
-+ /* Generate the equivalence of :
-+ unsigned int fenv_cr;
-+ fenv_cr = __builtin_aarch64_get_fpcr ();
-+
-+ unsigned int fenv_sr;
-+ fenv_sr = __builtin_aarch64_get_fpsr ();
-+
-+ Now set all exceptions to non-stop
-+ unsigned int mask_cr
-+ = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
-+ unsigned int masked_cr;
-+ masked_cr = fenv_cr & mask_cr;
-+
-+ And clear all exception flags
-+ unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
-+ unsigned int masked_cr;
-+ masked_sr = fenv_sr & mask_sr;
-+
-+ __builtin_aarch64_set_cr (masked_cr);
-+ __builtin_aarch64_set_sr (masked_sr); */
-+
-+ fenv_cr = create_tmp_var (unsigned_type_node, NULL);
-+ fenv_sr = create_tmp_var (unsigned_type_node, NULL);
-+
-+ get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
-+ set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
-+ get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
-+ set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
-+
-+ mask_cr = build_int_cst (unsigned_type_node,
-+ ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
-+ mask_sr = build_int_cst (unsigned_type_node,
-+ ~(AARCH64_FE_ALL_EXCEPT));
-+
-+ ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
-+ fenv_cr, build_call_expr (get_fpcr, 0));
-+ ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
-+ fenv_sr, build_call_expr (get_fpsr, 0));
-+
-+ masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
-+ masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
-+
-+ hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
-+ hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
-+
-+ hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
-+ hold_fnclex_sr);
-+ masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
-+ masked_fenv_sr);
-+ ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
-+
-+ *hold = build2 (COMPOUND_EXPR, void_type_node,
-+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
-+ hold_fnclex);
-+
-+ /* Store the value of masked_fenv to clear the exceptions:
-+ __builtin_aarch64_set_fpsr (masked_fenv_sr); */
-+
-+ *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
-+
-+ /* Generate the equivalent of :
-+ unsigned int new_fenv_var;
-+ new_fenv_var = __builtin_aarch64_get_fpsr ();
-+
-+ __builtin_aarch64_set_fpsr (fenv_sr);
-+
-+ __atomic_feraiseexcept (new_fenv_var); */
-+
-+ new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
-+ new_fenv_var, build_call_expr (get_fpsr, 0));
-+ restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
-+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
-+ update_call = build_call_expr (atomic_feraiseexcept, 1,
-+ fold_convert (integer_type_node, new_fenv_var));
-+ *update = build2 (COMPOUND_EXPR, void_type_node,
-+ build2 (COMPOUND_EXPR, void_type_node,
-+ reload_fenv, restore_fnenv), update_call);
-+}
-+
-+
- #undef AARCH64_CHECK_BUILTIN_MODE
- #undef AARCH64_FIND_FRINT_VARIANT
--#undef BUILTIN_DX
--#undef BUILTIN_SDQ_I
--#undef BUILTIN_SD_HSI
--#undef BUILTIN_V2F
--#undef BUILTIN_VALL
--#undef BUILTIN_VB
--#undef BUILTIN_VD
--#undef BUILTIN_VDC
--#undef BUILTIN_VDIC
--#undef BUILTIN_VDN
--#undef BUILTIN_VDQ
--#undef BUILTIN_VDQF
--#undef BUILTIN_VDQH
--#undef BUILTIN_VDQHS
--#undef BUILTIN_VDQIF
--#undef BUILTIN_VDQM
--#undef BUILTIN_VDQV
--#undef BUILTIN_VDQ_BHSI
--#undef BUILTIN_VDQ_I
--#undef BUILTIN_VDW
--#undef BUILTIN_VD_BHSI
--#undef BUILTIN_VD_HSI
--#undef BUILTIN_VD_RE
--#undef BUILTIN_VQ
--#undef BUILTIN_VQN
--#undef BUILTIN_VQW
--#undef BUILTIN_VQ_HSI
--#undef BUILTIN_VQ_S
--#undef BUILTIN_VSDQ_HSI
--#undef BUILTIN_VSDQ_I
--#undef BUILTIN_VSDQ_I_BHSI
--#undef BUILTIN_VSDQ_I_DI
--#undef BUILTIN_VSD_HSI
--#undef BUILTIN_VSQN_HSDI
--#undef BUILTIN_VSTRUCT
- #undef CF0
- #undef CF1
- #undef CF2
-@@ -1251,3 +1419,4 @@
- #undef VAR10
- #undef VAR11
-
-+#include "gt-aarch64-builtins.h"
---- a/src/gcc/config/aarch64/thunderx.md
-+++ b/src/gcc/config/aarch64/thunderx.md
-@@ -0,0 +1,260 @@
-+;; Cavium ThunderX pipeline description
-+;; Copyright (C) 2014 Free Software Foundation, Inc.
-+;;
-+;; Written by Andrew Pinski <apinski@cavium.com>
-+
-+;; This file is part of GCC.
-+
-+;; GCC is free software; you can redistribute it and/or modify
-+;; it under the terms of the GNU General Public License as published by
-+;; the Free Software Foundation; either version 3, or (at your option)
-+;; any later version.
-+
-+;; GCC is distributed in the hope that it will be useful,
-+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+;; GNU General Public License for more details.
-+
-+;; You should have received a copy of the GNU General Public License
-+;; along with GCC; see the file COPYING3. If not see
-+;; <http://www.gnu.org/licenses/>.
-+;; Copyright (C) 2004, 2005, 2006 Cavium Networks.
-+
-+
-+;; Thunder is a dual-issue processor that can issue all instructions on
-+;; pipe0 and a subset on pipe1.
-+
-+
-+(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
-+
-+(define_cpu_unit "thunderx_pipe0" "thunderx_main")
-+(define_cpu_unit "thunderx_pipe1" "thunderx_main")
-+(define_cpu_unit "thunderx_mult" "thunderx_mult")
-+(define_cpu_unit "thunderx_divide" "thunderx_divide")
-+(define_cpu_unit "thunderx_simd" "thunderx_simd")
-+
-+(define_insn_reservation "thunderx_add" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_reg,alus_imm,alus_reg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
-+ "thunderx_pipe0 | thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_shift" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
-+ "thunderx_pipe0 | thunderx_pipe1")
-+
-+
-+;; Arthimentic instructions with an extra shift or extend is two cycles.
-+;; FIXME: This needs more attributes on aarch64 than what is currently there;
-+;; this is conserative for now.
-+;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
-+;; Except this is not correct as this is only for !(zero extend)
-+
-+(define_insn_reservation "thunderx_arith_shift" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
-+ "thunderx_pipe0 | thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_csel" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "csel"))
-+ "thunderx_pipe0 | thunderx_pipe1")
-+
-+;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
-+
-+(define_insn_reservation "thunderx_mul" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
-+ "thunderx_pipe1 + thunderx_mult")
-+
-+;; Multiply high instructions take an extra cycle and cause the muliply unit to
-+;; be busy for an extra cycle.
-+
-+;(define_insn_reservation "thunderx_mul_high" 5
-+; (and (eq_attr "tune" "thunderx")
-+; (eq_attr "type" "smull,umull"))
-+; "thunderx_pipe1 + thunderx_mult")
-+
-+(define_insn_reservation "thunderx_div32" 22
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "udiv,sdiv"))
-+ "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
-+
-+;(define_insn_reservation "thunderx_div64" 38
-+; (and (eq_attr "tune" "thunderx")
-+; (eq_attr "type" "udiv,sdiv")
-+; (eq_attr "mode" "DI"))
-+; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
-+
-+;; Stores take one cycle in pipe 0
-+(define_insn_reservation "thunderx_store" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "store1"))
-+ "thunderx_pipe0")
-+
-+;; Store pair are single issued
-+(define_insn_reservation "thunderx_storepair" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "store2"))
-+ "thunderx_pipe0 + thunderx_pipe1")
-+
-+
-+;; loads (and load pairs) from L1 take 3 cycles in pipe 0
-+(define_insn_reservation "thunderx_load" 3
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "load1, load2"))
-+ "thunderx_pipe0")
-+
-+(define_insn_reservation "thunderx_brj" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "branch,trap,call"))
-+ "thunderx_pipe1")
-+
-+;; FPU
-+
-+(define_insn_reservation "thunderx_fadd" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "faddd,fadds"))
-+ "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fconst" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fconsts,fconstd"))
-+ "thunderx_pipe1")
-+
-+;; Moves between fp are 2 cycles including min/max/select/abs/neg
-+(define_insn_reservation "thunderx_fmov" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
-+ "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fmovgpr" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "f_mrc, f_mcr"))
-+ "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fmul" 6
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
-+ "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fdivs" 12
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fdivs"))
-+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
-+
-+(define_insn_reservation "thunderx_fdivd" 22
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fdivd"))
-+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
-+
-+(define_insn_reservation "thunderx_fsqrts" 17
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fsqrts"))
-+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
-+
-+(define_insn_reservation "thunderx_fsqrtd" 28
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "fsqrtd"))
-+ "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
-+
-+;; The rounding conversion inside fp is 4 cycles
-+(define_insn_reservation "thunderx_frint" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "f_rints,f_rintd"))
-+ "thunderx_pipe1")
-+
-+;; Float to integer with a move from int to/from float is 6 cycles
-+(define_insn_reservation "thunderx_f_cvt" 6
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
-+ "thunderx_pipe1")
-+
-+;; FP/SIMD load/stores happen in pipe 0
-+;; 64bit Loads register/pairs are 4 cycles from L1
-+(define_insn_reservation "thunderx_64simd_fp_load" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
-+ neon_load1_1reg_q,neon_load1_2reg"))
-+ "thunderx_pipe0")
-+
-+;; 128bit load pair is singled issue and 4 cycles from L1
-+(define_insn_reservation "thunderx_128simd_pair_load" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_load1_2reg_q"))
-+ "thunderx_pipe0+thunderx_pipe1")
-+
-+;; FP/SIMD Stores takes one cycle in pipe 0
-+(define_insn_reservation "thunderx_simd_fp_store" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
-+ "thunderx_pipe0")
-+
-+;; 64bit neon store pairs are single issue for one cycle
-+(define_insn_reservation "thunderx_64neon_storepair" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_store1_2reg"))
-+ "thunderx_pipe0 + thunderx_pipe1")
-+
-+;; 128bit neon store pair are single issued for two cycles
-+(define_insn_reservation "thunderx_128neon_storepair" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_store1_2reg_q"))
-+ "(thunderx_pipe0 + thunderx_pipe1)*2")
-+
-+
-+;; SIMD/NEON (q forms take an extra cycle)
-+
-+;; Thunder simd move instruction types - 2/3 cycles
-+(define_insn_reservation "thunderx_neon_move" 2
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
-+ neon_fp_compare_d, neon_move"))
-+ "thunderx_pipe1 + thunderx_simd")
-+
-+(define_insn_reservation "thunderx_neon_move_q" 3
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
-+ neon_fp_compare_d_q, neon_move_q"))
-+ "thunderx_pipe1 + thunderx_simd, thunderx_simd")
-+
-+
-+;; Thunder simd simple/add instruction types - 4/5 cycles
-+
-+(define_insn_reservation "thunderx_neon_add" 4
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
-+ neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
-+ neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
-+ neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
-+ neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
-+ neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
-+ "thunderx_pipe1 + thunderx_simd")
-+
-+;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
-+
-+(define_insn_reservation "thunderx_neon_add_q" 5
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
-+ neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
-+ neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
-+ neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
-+ neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
-+ neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
-+ neon_add_long, neon_sub_long"))
-+ "thunderx_pipe1 + thunderx_simd, thunderx_simd")
-+
-+
-+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
-+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-+
-+;; Assume both pipes are needed for unknown and multiple-instruction
-+;; patterns.
-+
-+(define_insn_reservation "thunderx_unknown" 1
-+ (and (eq_attr "tune" "thunderx")
-+ (eq_attr "type" "untyped,multiple"))
-+ "thunderx_pipe0 + thunderx_pipe1")
-+
-+
---- a/src/gcc/config/aarch64/aarch64-protos.h
-+++ b/src/gcc/config/aarch64/aarch64-protos.h
-@@ -108,9 +108,22 @@
- cost models and vectors for address cost calculations, register
- move costs and memory move costs. */
-
-+/* Scaled addressing modes can vary cost depending on the mode of the
-+ value to be loaded/stored. QImode values cannot use scaled
-+ addressing modes. */
-+
-+struct scale_addr_mode_cost
-+{
-+ const int hi;
-+ const int si;
-+ const int di;
-+ const int ti;
-+};
-+
- /* Additional cost for addresses. */
- struct cpu_addrcost_table
- {
-+ const struct scale_addr_mode_cost addr_scale_costs;
- const int pre_modify;
- const int post_modify;
- const int register_offset;
-@@ -160,6 +173,7 @@
- };
-
- HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
-+int aarch64_get_condition_code (rtx);
- bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
- bool aarch64_cannot_change_mode_class (enum machine_mode,
- enum machine_mode,
-@@ -166,7 +180,9 @@
- enum reg_class);
- enum aarch64_symbol_type
- aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
-+bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
- bool aarch64_constant_address_p (rtx);
-+bool aarch64_expand_movmem (rtx *);
- bool aarch64_float_const_zero_rtx_p (rtx);
- bool aarch64_function_arg_regno_p (unsigned);
- bool aarch64_gen_movmemqi (rtx *);
-@@ -175,9 +191,12 @@
- bool aarch64_is_long_call_p (rtx);
- bool aarch64_label_mentioned_p (rtx);
- bool aarch64_legitimate_pic_operand_p (rtx);
-+bool aarch64_modes_tieable_p (enum machine_mode mode1,
-+ enum machine_mode mode2);
- bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
- bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
- enum machine_mode);
-+bool aarch64_offset_7bit_signed_scaled_p (enum machine_mode, HOST_WIDE_INT);
- char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
- char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
- bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
-@@ -184,6 +203,8 @@
- bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
- bool aarch64_regno_ok_for_base_p (int, bool);
- bool aarch64_regno_ok_for_index_p (int, bool);
-+bool aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
-+ bool high);
- bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
- bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
- bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
-@@ -200,6 +221,8 @@
- enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
- enum reg_class aarch64_regno_regclass (unsigned);
- int aarch64_asm_preferred_eh_data_format (int, int);
-+enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
-+ enum machine_mode);
- int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode);
- int aarch64_hard_regno_nregs (unsigned, enum machine_mode);
- int aarch64_simd_attr_length_move (rtx);
-@@ -291,4 +314,5 @@
- extern void aarch64_final_prescan_insn (rtx);
- extern bool
- aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
- #endif /* GCC_AARCH64_PROTOS_H */
---- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
-+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
-@@ -47,36 +47,44 @@
- VAR1 (UNOP, addp, 0, di)
- BUILTIN_VDQ_BHSI (UNOP, clz, 2)
-
-- BUILTIN_VALL (GETLANE, get_lane, 0)
-- VAR1 (GETLANE, get_lane, 0, di)
- BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
-
-- BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
-- BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
-- BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
-- BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
-- BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
-- BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
-+ VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
-
-- BUILTIN_VDQ_I (BINOP, dup_lane, 0)
-+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-+
-+ VAR1 (REINTERP_US, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-+
-+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-+
-+ VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
-+
- /* Implemented by aarch64_<sur>q<r>shl<mode>. */
- BUILTIN_VSDQ_I (BINOP, sqshl, 0)
-- BUILTIN_VSDQ_I (BINOP, uqshl, 0)
-+ BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0)
- BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
-- BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
-+ BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0)
- /* Implemented by aarch64_<su_optab><optab><mode>. */
- BUILTIN_VSDQ_I (BINOP, sqadd, 0)
-- BUILTIN_VSDQ_I (BINOP, uqadd, 0)
-+ BUILTIN_VSDQ_I (BINOPU, uqadd, 0)
- BUILTIN_VSDQ_I (BINOP, sqsub, 0)
-- BUILTIN_VSDQ_I (BINOP, uqsub, 0)
-+ BUILTIN_VSDQ_I (BINOPU, uqsub, 0)
- /* Implemented by aarch64_<sur>qadd<mode>. */
-- BUILTIN_VSDQ_I (BINOP, suqadd, 0)
-- BUILTIN_VSDQ_I (BINOP, usqadd, 0)
-+ BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0)
-+ BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0)
-
- /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */
- BUILTIN_VDC (GETLANE, get_dregoi, 0)
-@@ -98,6 +106,10 @@
- BUILTIN_VQ (LOADSTRUCT, ld2, 0)
- BUILTIN_VQ (LOADSTRUCT, ld3, 0)
- BUILTIN_VQ (LOADSTRUCT, ld4, 0)
-+ /* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>. */
-+ BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
-+ BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
-+ BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
- /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
- BUILTIN_VDC (STORESTRUCT, st2, 0)
- BUILTIN_VDC (STORESTRUCT, st3, 0)
-@@ -107,6 +119,10 @@
- BUILTIN_VQ (STORESTRUCT, st3, 0)
- BUILTIN_VQ (STORESTRUCT, st4, 0)
-
-+ BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
-+ BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
-+ BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
-+
- BUILTIN_VQW (BINOP, saddl2, 0)
- BUILTIN_VQW (BINOP, uaddl2, 0)
- BUILTIN_VQW (BINOP, ssubl2, 0)
-@@ -142,19 +158,19 @@
- BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
- BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
- /* Implemented by aarch64_s<optab><mode>. */
-- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
-- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
-+ BUILTIN_VSDQ_I (UNOP, sqabs, 0)
-+ BUILTIN_VSDQ_I (UNOP, sqneg, 0)
-
-- BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
-- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
-- BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
-- BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
-+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_lane, 0)
-+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_lane, 0)
-+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_laneq, 0)
-+ BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_laneq, 0)
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
-- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
-- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
-- BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
-- BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
-+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_lane, 0)
-+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_lane, 0)
-+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_laneq, 0)
-+ BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_laneq, 0)
- BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
- BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
- /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */
-@@ -186,9 +202,9 @@
- BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
- /* Implemented by aarch64_<sur>shl<mode>. */
- BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
-- BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
-+ BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0)
- BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
-- BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
-+ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0)
-
- BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
- VAR1 (SHIFTIMM, ashr_simd, 0, di)
-@@ -196,15 +212,15 @@
- VAR1 (USHIFTIMM, lshr_simd, 0, di)
- /* Implemented by aarch64_<sur>shr_n<mode>. */
- BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
-- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
-+ BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0)
- /* Implemented by aarch64_<sur>sra_n<mode>. */
- BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
-- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
-+ BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0)
- BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
-- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
-+ BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0)
- /* Implemented by aarch64_<sur>shll_n<mode>. */
- BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
-- BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
-+ BUILTIN_VDW (USHIFTIMM, ushll_n, 0)
- /* Implemented by aarch64_<sur>shll2_n<mode>. */
- BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
- BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
-@@ -212,30 +228,19 @@
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
-- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
-+ BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0)
- BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
-- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
-+ BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0)
- /* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
-- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
-+ BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
- BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
-- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
-+ BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
- /* Implemented by aarch64_<sur>qshl<u>_n<mode>. */
-- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
-+ BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
- BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
-- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
-+ BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0)
-
-- /* Implemented by aarch64_cm<cmp><mode>. */
-- BUILTIN_VALLDI (BINOP, cmeq, 0)
-- BUILTIN_VALLDI (BINOP, cmge, 0)
-- BUILTIN_VALLDI (BINOP, cmgt, 0)
-- BUILTIN_VALLDI (BINOP, cmle, 0)
-- BUILTIN_VALLDI (BINOP, cmlt, 0)
-- /* Implemented by aarch64_cm<cmp><mode>. */
-- BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
-- BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
-- BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
--
- /* Implemented by reduc_<sur>plus_<mode>. */
- BUILTIN_VALL (UNOP, reduc_splus_, 10)
- BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
-@@ -265,7 +270,7 @@
- BUILTIN_VDQF (UNOP, nearbyint, 2)
- BUILTIN_VDQF (UNOP, rint, 2)
- BUILTIN_VDQF (UNOP, round, 2)
-- BUILTIN_VDQF (UNOP, frintn, 2)
-+ BUILTIN_VDQF_DF (UNOP, frintn, 2)
-
- /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
- VAR1 (UNOP, lbtruncv2sf, 2, v2si)
-@@ -330,6 +335,10 @@
- VAR1 (UNOP, floatunsv4si, 2, v4sf)
- VAR1 (UNOP, floatunsv2di, 2, v2df)
-
-+ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
-+
-+ BUILTIN_VB (UNOP, rbit, 0)
-+
- /* Implemented by
- aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
- BUILTIN_VALL (BINOP, zip1, 0)
-@@ -393,3 +402,6 @@
- /* Implemented by aarch64_crypto_pmull<mode>. */
- VAR1 (BINOPP, crypto_pmull, 0, di)
- VAR1 (BINOPP, crypto_pmull, 0, v2di)
-+
-+ /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */
-+ VAR1 (BINOPV, im_lane_bound, 0, si)
---- a/src/gcc/config/aarch64/constraints.md
-+++ b/src/gcc/config/aarch64/constraints.md
-@@ -21,6 +21,9 @@
- (define_register_constraint "k" "STACK_REG"
- "@internal The stack register.")
-
-+(define_register_constraint "Ucs" "CALLER_SAVE_REGS"
-+ "@internal The caller save registers.")
-+
- (define_register_constraint "w" "FP_REGS"
- "Floating point and SIMD vector registers.")
-
-@@ -92,6 +95,10 @@
- (and (match_code "const_int")
- (match_test "(unsigned HOST_WIDE_INT) ival < 64")))
-
-+(define_constraint "Usf"
-+ "@internal Usf is a symbol reference."
-+ (match_code "symbol_ref"))
-+
- (define_constraint "UsM"
- "@internal
- A constraint that matches the immediate constant -1."
---- a/src/gcc/config/aarch64/aarch64.c
-+++ b/src/gcc/config/aarch64/aarch64.c
-@@ -62,7 +62,8 @@
- #include "dwarf2.h"
- #include "cfgloop.h"
- #include "tree-vectorizer.h"
--#include "config/arm/aarch-cost-tables.h"
-+#include "aarch64-cost-tables.h"
-+#include "dumpfile.h"
-
- /* Defined for convenience. */
- #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
-@@ -136,12 +137,13 @@
- static void aarch64_override_options_after_change (void);
- static bool aarch64_vector_mode_supported_p (enum machine_mode);
- static unsigned bit_count (unsigned HOST_WIDE_INT);
--static bool aarch64_const_vec_all_same_int_p (rtx,
-- HOST_WIDE_INT, HOST_WIDE_INT);
--
- static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
- const unsigned char *sel);
-+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
-
-+/* Major revision number of the ARM Architecture implemented by the target. */
-+unsigned aarch64_architecture_version;
-+
- /* The processor for which instructions should be scheduled. */
- enum aarch64_processor aarch64_tune = cortexa53;
-
-@@ -171,6 +173,15 @@
- #endif
- static const struct cpu_addrcost_table generic_addrcost_table =
- {
-+#if HAVE_DESIGNATED_INITIALIZERS
-+ .addr_scale_costs =
-+#endif
-+ {
-+ NAMED_PARAM (hi, 0),
-+ NAMED_PARAM (si, 0),
-+ NAMED_PARAM (di, 0),
-+ NAMED_PARAM (ti, 0),
-+ },
- NAMED_PARAM (pre_modify, 0),
- NAMED_PARAM (post_modify, 0),
- NAMED_PARAM (register_offset, 0),
-@@ -181,14 +192,60 @@
- #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
- __extension__
- #endif
-+static const struct cpu_addrcost_table cortexa57_addrcost_table =
-+{
-+#if HAVE_DESIGNATED_INITIALIZERS
-+ .addr_scale_costs =
-+#endif
-+ {
-+ NAMED_PARAM (hi, 1),
-+ NAMED_PARAM (si, 0),
-+ NAMED_PARAM (di, 0),
-+ NAMED_PARAM (ti, 1),
-+ },
-+ NAMED_PARAM (pre_modify, 0),
-+ NAMED_PARAM (post_modify, 0),
-+ NAMED_PARAM (register_offset, 0),
-+ NAMED_PARAM (register_extend, 0),
-+ NAMED_PARAM (imm_offset, 0),
-+};
-+
-+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
-+__extension__
-+#endif
- static const struct cpu_regmove_cost generic_regmove_cost =
- {
- NAMED_PARAM (GP2GP, 1),
- NAMED_PARAM (GP2FP, 2),
- NAMED_PARAM (FP2GP, 2),
-- /* We currently do not provide direct support for TFmode Q->Q move.
-- Therefore we need to raise the cost above 2 in order to have
-- reload handle the situation. */
-+ NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost cortexa57_regmove_cost =
-+{
-+ NAMED_PARAM (GP2GP, 1),
-+ /* Avoid the use of slow int<->fp moves for spilling by setting
-+ their cost higher than memmov_cost. */
-+ NAMED_PARAM (GP2FP, 5),
-+ NAMED_PARAM (FP2GP, 5),
-+ NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost cortexa53_regmove_cost =
-+{
-+ NAMED_PARAM (GP2GP, 1),
-+ /* Avoid the use of slow int<->fp moves for spilling by setting
-+ their cost higher than memmov_cost. */
-+ NAMED_PARAM (GP2FP, 5),
-+ NAMED_PARAM (FP2GP, 5),
-+ NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost thunderx_regmove_cost =
-+{
-+ NAMED_PARAM (GP2GP, 2),
-+ NAMED_PARAM (GP2FP, 2),
-+ NAMED_PARAM (FP2GP, 6),
- NAMED_PARAM (FP2FP, 4)
- };
-
-@@ -212,9 +269,29 @@
- NAMED_PARAM (cond_not_taken_branch_cost, 1)
- };
-
-+/* Generic costs for vector insn classes. */
- #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
- __extension__
- #endif
-+static const struct cpu_vector_cost cortexa57_vector_cost =
-+{
-+ NAMED_PARAM (scalar_stmt_cost, 1),
-+ NAMED_PARAM (scalar_load_cost, 4),
-+ NAMED_PARAM (scalar_store_cost, 1),
-+ NAMED_PARAM (vec_stmt_cost, 3),
-+ NAMED_PARAM (vec_to_scalar_cost, 8),
-+ NAMED_PARAM (scalar_to_vec_cost, 8),
-+ NAMED_PARAM (vec_align_load_cost, 5),
-+ NAMED_PARAM (vec_unalign_load_cost, 5),
-+ NAMED_PARAM (vec_unalign_store_cost, 1),
-+ NAMED_PARAM (vec_store_cost, 1),
-+ NAMED_PARAM (cond_taken_branch_cost, 1),
-+ NAMED_PARAM (cond_not_taken_branch_cost, 1)
-+};
-+
-+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
-+__extension__
-+#endif
- static const struct tune_params generic_tunings =
- {
- &cortexa57_extra_costs,
-@@ -229,7 +306,7 @@
- {
- &cortexa53_extra_costs,
- &generic_addrcost_table,
-- &generic_regmove_cost,
-+ &cortexa53_regmove_cost,
- &generic_vector_cost,
- NAMED_PARAM (memmov_cost, 4),
- NAMED_PARAM (issue_rate, 2)
-@@ -238,13 +315,23 @@
- static const struct tune_params cortexa57_tunings =
- {
- &cortexa57_extra_costs,
-- &generic_addrcost_table,
-- &generic_regmove_cost,
-- &generic_vector_cost,
-+ &cortexa57_addrcost_table,
-+ &cortexa57_regmove_cost,
-+ &cortexa57_vector_cost,
- NAMED_PARAM (memmov_cost, 4),
- NAMED_PARAM (issue_rate, 3)
- };
-
-+static const struct tune_params thunderx_tunings =
-+{
-+ &thunderx_extra_costs,
-+ &generic_addrcost_table,
-+ &thunderx_regmove_cost,
-+ &generic_vector_cost,
-+ NAMED_PARAM (memmov_cost, 6),
-+ NAMED_PARAM (issue_rate, 2)
-+};
-+
- /* A processor implementing AArch64. */
- struct processor
- {
-@@ -251,6 +338,7 @@
- const char *const name;
- enum aarch64_processor core;
- const char *arch;
-+ unsigned architecture_version;
- const unsigned long flags;
- const struct tune_params *const tune;
- };
-@@ -259,11 +347,13 @@
- static const struct processor all_cores[] =
- {
- #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
-- {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
-+ {NAME, IDENT, #ARCH, ARCH,\
-+ FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
- #include "aarch64-cores.def"
- #undef AARCH64_CORE
-- {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
-- {NULL, aarch64_none, NULL, 0, NULL}
-+ {"generic", cortexa53, "8", 8,\
-+ AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
-+ {NULL, aarch64_none, NULL, 0, 0, NULL}
- };
-
- /* Architectures implementing AArch64. */
-@@ -270,10 +360,10 @@
- static const struct processor all_architectures[] =
- {
- #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
-- {NAME, CORE, #ARCH, FLAGS, NULL},
-+ {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
- #include "aarch64-arches.def"
- #undef AARCH64_ARCH
-- {NULL, aarch64_none, NULL, 0, NULL}
-+ {NULL, aarch64_none, NULL, 0, 0, NULL}
- };
-
- /* Target specification. These are populated as commandline arguments
-@@ -424,6 +514,24 @@
- return 0;
- }
-
-+/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
-+enum machine_mode
-+aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
-+ enum machine_mode mode)
-+{
-+ /* Handle modes that fit within single registers. */
-+ if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
-+ {
-+ if (GET_MODE_SIZE (mode) >= 4)
-+ return mode;
-+ else
-+ return SImode;
-+ }
-+ /* Fall back to generic for multi-reg and very large modes. */
-+ else
-+ return choose_hard_reg_mode (regno, nregs, false);
-+}
-+
- /* Return true if calls to DECL should be treated as
- long-calls (ie called via a register). */
- static bool
-@@ -444,7 +552,7 @@
- represent an expression that matches an extend operation. The
- operands represent the paramters from
-
-- (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
-+ (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
- bool
- aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
- rtx extract_imm)
-@@ -636,12 +744,24 @@
-
- case SYMBOL_SMALL_TLSDESC:
- {
-- rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
-+ enum machine_mode mode = GET_MODE (dest);
-+ rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
- rtx tp;
-
-- emit_insn (gen_tlsdesc_small (imm));
-+ gcc_assert (mode == Pmode || mode == ptr_mode);
-+
-+ /* In ILP32, the got entry is always of SImode size. Unlike
-+ small GOT, the dest is fixed at reg 0. */
-+ if (TARGET_ILP32)
-+ emit_insn (gen_tlsdesc_small_si (imm));
-+ else
-+ emit_insn (gen_tlsdesc_small_di (imm));
- tp = aarch64_load_tp (NULL);
-- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
-+
-+ if (mode != Pmode)
-+ tp = gen_lowpart (mode, tp);
-+
-+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
- set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
- return;
- }
-@@ -648,10 +768,34 @@
-
- case SYMBOL_SMALL_GOTTPREL:
- {
-- rtx tmp_reg = gen_reg_rtx (Pmode);
-+ /* In ILP32, the mode of dest can be either SImode or DImode,
-+ while the got entry is always of SImode size. The mode of
-+ dest depends on how dest is used: if dest is assigned to a
-+ pointer (e.g. in the memory), it has SImode; it may have
-+ DImode if dest is dereferenced to access the memeory.
-+ This is why we have to handle three different tlsie_small
-+ patterns here (two patterns for ILP32). */
-+ enum machine_mode mode = GET_MODE (dest);
-+ rtx tmp_reg = gen_reg_rtx (mode);
- rtx tp = aarch64_load_tp (NULL);
-- emit_insn (gen_tlsie_small (tmp_reg, imm));
-- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
-+
-+ if (mode == ptr_mode)
-+ {
-+ if (mode == DImode)
-+ emit_insn (gen_tlsie_small_di (tmp_reg, imm));
-+ else
-+ {
-+ emit_insn (gen_tlsie_small_si (tmp_reg, imm));
-+ tp = gen_lowpart (mode, tp);
-+ }
-+ }
-+ else
-+ {
-+ gcc_assert (mode == Pmode);
-+ emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
-+ }
-+
-+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
- set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
- return;
- }
-@@ -889,10 +1033,10 @@
- return plus_constant (mode, reg, offset);
- }
-
--void
--aarch64_expand_mov_immediate (rtx dest, rtx imm)
-+static int
-+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
-+ machine_mode mode)
- {
-- enum machine_mode mode = GET_MODE (dest);
- unsigned HOST_WIDE_INT mask;
- int i;
- bool first;
-@@ -899,86 +1043,15 @@
- unsigned HOST_WIDE_INT val;
- bool subtargets;
- rtx subtarget;
-- int one_match, zero_match;
-+ int one_match, zero_match, first_not_ffff_match;
-+ int num_insns = 0;
-
-- gcc_assert (mode == SImode || mode == DImode);
--
-- /* Check on what type of symbol it is. */
-- if (GET_CODE (imm) == SYMBOL_REF
-- || GET_CODE (imm) == LABEL_REF
-- || GET_CODE (imm) == CONST)
-- {
-- rtx mem, base, offset;
-- enum aarch64_symbol_type sty;
--
-- /* If we have (const (plus symbol offset)), separate out the offset
-- before we start classifying the symbol. */
-- split_const (imm, &base, &offset);
--
-- sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
-- switch (sty)
-- {
-- case SYMBOL_FORCE_TO_MEM:
-- if (offset != const0_rtx
-- && targetm.cannot_force_const_mem (mode, imm))
-- {
-- gcc_assert (can_create_pseudo_p ());
-- base = aarch64_force_temporary (mode, dest, base);
-- base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-- aarch64_emit_move (dest, base);
-- return;
-- }
-- mem = force_const_mem (ptr_mode, imm);
-- gcc_assert (mem);
-- if (mode != ptr_mode)
-- mem = gen_rtx_ZERO_EXTEND (mode, mem);
-- emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-- return;
--
-- case SYMBOL_SMALL_TLSGD:
-- case SYMBOL_SMALL_TLSDESC:
-- case SYMBOL_SMALL_GOTTPREL:
-- case SYMBOL_SMALL_GOT:
-- case SYMBOL_TINY_GOT:
-- if (offset != const0_rtx)
-- {
-- gcc_assert(can_create_pseudo_p ());
-- base = aarch64_force_temporary (mode, dest, base);
-- base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-- aarch64_emit_move (dest, base);
-- return;
-- }
-- /* FALLTHRU */
--
-- case SYMBOL_SMALL_TPREL:
-- case SYMBOL_SMALL_ABSOLUTE:
-- case SYMBOL_TINY_ABSOLUTE:
-- aarch64_load_symref_appropriately (dest, imm, sty);
-- return;
--
-- default:
-- gcc_unreachable ();
-- }
-- }
--
- if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
- {
-- emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-- return;
-- }
--
-- if (!CONST_INT_P (imm))
-- {
-- if (GET_CODE (imm) == HIGH)
-+ if (generate)
- emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-- else
-- {
-- rtx mem = force_const_mem (mode, imm);
-- gcc_assert (mem);
-- emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-- }
--
-- return;
-+ num_insns++;
-+ return num_insns;
- }
-
- if (mode == SImode)
-@@ -986,10 +1059,15 @@
- /* We know we can't do this in 1 insn, and we must be able to do it
- in two; so don't mess around looking for sequences that don't buy
- us anything. */
-- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
-- emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-- GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-- return;
-+ if (generate)
-+ {
-+ emit_insn (gen_rtx_SET (VOIDmode, dest,
-+ GEN_INT (INTVAL (imm) & 0xffff)));
-+ emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-+ GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
-
- /* Remaining cases are all for DImode. */
-@@ -1000,29 +1078,34 @@
- one_match = 0;
- zero_match = 0;
- mask = 0xffff;
-+ first_not_ffff_match = -1;
-
- for (i = 0; i < 64; i += 16, mask <<= 16)
- {
-- if ((val & mask) == 0)
-- zero_match++;
-- else if ((val & mask) == mask)
-+ if ((val & mask) == mask)
- one_match++;
-+ else
-+ {
-+ if (first_not_ffff_match < 0)
-+ first_not_ffff_match = i;
-+ if ((val & mask) == 0)
-+ zero_match++;
-+ }
- }
-
- if (one_match == 2)
- {
-- mask = 0xffff;
-- for (i = 0; i < 64; i += 16, mask <<= 16)
-+ /* Set one of the quarters and then insert back into result. */
-+ mask = 0xffffll << first_not_ffff_match;
-+ if (generate)
- {
-- if ((val & mask) != mask)
-- {
-- emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-- GEN_INT ((val >> i) & 0xffff)));
-- return;
-- }
-+ emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-+ emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-+ GEN_INT ((val >> first_not_ffff_match)
-+ & 0xffff)));
- }
-- gcc_unreachable ();
-+ num_insns += 2;
-+ return num_insns;
- }
-
- if (zero_match == 2)
-@@ -1035,42 +1118,55 @@
-
- if (aarch64_uimm12_shift (val - (val & mask)))
- {
-- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
-- emit_insn (gen_adddi3 (dest, subtarget,
-- GEN_INT (val - (val & mask))));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT (val & mask)));
-+ emit_insn (gen_adddi3 (dest, subtarget,
-+ GEN_INT (val - (val & mask))));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
- {
-- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT ((val + comp) & mask)));
-- emit_insn (gen_adddi3 (dest, subtarget,
-- GEN_INT (val - ((val + comp) & mask))));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT ((val + comp) & mask)));
-+ emit_insn (gen_adddi3 (dest, subtarget,
-+ GEN_INT (val - ((val + comp) & mask))));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
- {
-- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT ((val - comp) | ~mask)));
-- emit_insn (gen_adddi3 (dest, subtarget,
-- GEN_INT (val - ((val - comp) | ~mask))));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT ((val - comp) | ~mask)));
-+ emit_insn (gen_adddi3 (dest, subtarget,
-+ GEN_INT (val - ((val - comp) | ~mask))));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
- {
-- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT (val | ~mask)));
-- emit_insn (gen_adddi3 (dest, subtarget,
-- GEN_INT (val - (val | ~mask))));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT (val | ~mask)));
-+ emit_insn (gen_adddi3 (dest, subtarget,
-+ GEN_INT (val - (val | ~mask))));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- }
-
-@@ -1084,12 +1180,16 @@
- if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
- || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
- {
-- subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT (aarch64_bitmasks[i])));
-- emit_insn (gen_adddi3 (dest, subtarget,
-- GEN_INT (val - aarch64_bitmasks[i])));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT (aarch64_bitmasks[i])));
-+ emit_insn (gen_adddi3 (dest, subtarget,
-+ GEN_INT (val - aarch64_bitmasks[i])));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
-
- for (j = 0; j < 64; j += 16, mask <<= 16)
-@@ -1096,11 +1196,15 @@
- {
- if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
- {
-- emit_insn (gen_rtx_SET (VOIDmode, dest,
-- GEN_INT (aarch64_bitmasks[i])));
-- emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-- GEN_INT ((val >> j) & 0xffff)));
-- return;
-+ if (generate)
-+ {
-+ emit_insn (gen_rtx_SET (VOIDmode, dest,
-+ GEN_INT (aarch64_bitmasks[i])));
-+ emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-+ GEN_INT ((val >> j) & 0xffff)));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- }
- }
-@@ -1115,12 +1219,16 @@
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
- {
-- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT (aarch64_bitmasks[i])));
-- emit_insn (gen_iordi3 (dest, subtarget,
-- GEN_INT (aarch64_bitmasks[j])));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT (aarch64_bitmasks[i])));
-+ emit_insn (gen_iordi3 (dest, subtarget,
-+ GEN_INT (aarch64_bitmasks[j])));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- }
- else if ((val & aarch64_bitmasks[i]) == val)
-@@ -1130,17 +1238,44 @@
- for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
- {
--
-- subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-- emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-- GEN_INT (aarch64_bitmasks[j])));
-- emit_insn (gen_anddi3 (dest, subtarget,
-- GEN_INT (aarch64_bitmasks[i])));
-- return;
-+ if (generate)
-+ {
-+ subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-+ emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+ GEN_INT (aarch64_bitmasks[j])));
-+ emit_insn (gen_anddi3 (dest, subtarget,
-+ GEN_INT (aarch64_bitmasks[i])));
-+ }
-+ num_insns += 2;
-+ return num_insns;
- }
- }
- }
-
-+ if (one_match > zero_match)
-+ {
-+ /* Set either first three quarters or all but the third. */
-+ mask = 0xffffll << (16 - first_not_ffff_match);
-+ if (generate)
-+ emit_insn (gen_rtx_SET (VOIDmode, dest,
-+ GEN_INT (val | mask | 0xffffffff00000000ull)));
-+ num_insns ++;
-+
-+ /* Now insert other two quarters. */
-+ for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-+ i < 64; i += 16, mask <<= 16)
-+ {
-+ if ((val & mask) != mask)
-+ {
-+ if (generate)
-+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-+ GEN_INT ((val >> i) & 0xffff)));
-+ num_insns ++;
-+ }
-+ }
-+ return num_insns;
-+ }
-+
- simple_sequence:
- first = true;
- mask = 0xffff;
-@@ -1150,30 +1285,113 @@
- {
- if (first)
- {
-- emit_insn (gen_rtx_SET (VOIDmode, dest,
-- GEN_INT (val & mask)));
-+ if (generate)
-+ emit_insn (gen_rtx_SET (VOIDmode, dest,
-+ GEN_INT (val & mask)));
-+ num_insns ++;
- first = false;
- }
- else
-- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-- GEN_INT ((val >> i) & 0xffff)));
-+ {
-+ if (generate)
-+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-+ GEN_INT ((val >> i) & 0xffff)));
-+ num_insns ++;
-+ }
- }
- }
-+
-+ return num_insns;
- }
-
--static bool
--aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
-+
-+void
-+aarch64_expand_mov_immediate (rtx dest, rtx imm)
- {
-- /* Indirect calls are not currently supported. */
-- if (decl == NULL)
-- return false;
-+ machine_mode mode = GET_MODE (dest);
-
-- /* Cannot tail-call to long-calls, since these are outside of the
-- range of a branch instruction (we could handle this if we added
-- support for indirect tail-calls. */
-- if (aarch64_decl_is_long_call_p (decl))
-- return false;
-+ gcc_assert (mode == SImode || mode == DImode);
-
-+ /* Check on what type of symbol it is. */
-+ if (GET_CODE (imm) == SYMBOL_REF
-+ || GET_CODE (imm) == LABEL_REF
-+ || GET_CODE (imm) == CONST)
-+ {
-+ rtx mem, base, offset;
-+ enum aarch64_symbol_type sty;
-+
-+ /* If we have (const (plus symbol offset)), separate out the offset
-+ before we start classifying the symbol. */
-+ split_const (imm, &base, &offset);
-+
-+ sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
-+ switch (sty)
-+ {
-+ case SYMBOL_FORCE_TO_MEM:
-+ if (offset != const0_rtx
-+ && targetm.cannot_force_const_mem (mode, imm))
-+ {
-+ gcc_assert (can_create_pseudo_p ());
-+ base = aarch64_force_temporary (mode, dest, base);
-+ base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-+ aarch64_emit_move (dest, base);
-+ return;
-+ }
-+ mem = force_const_mem (ptr_mode, imm);
-+ gcc_assert (mem);
-+ if (mode != ptr_mode)
-+ mem = gen_rtx_ZERO_EXTEND (mode, mem);
-+ emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-+ return;
-+
-+ case SYMBOL_SMALL_TLSGD:
-+ case SYMBOL_SMALL_TLSDESC:
-+ case SYMBOL_SMALL_GOTTPREL:
-+ case SYMBOL_SMALL_GOT:
-+ case SYMBOL_TINY_GOT:
-+ if (offset != const0_rtx)
-+ {
-+ gcc_assert(can_create_pseudo_p ());
-+ base = aarch64_force_temporary (mode, dest, base);
-+ base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-+ aarch64_emit_move (dest, base);
-+ return;
-+ }
-+ /* FALLTHRU */
-+
-+ case SYMBOL_SMALL_TPREL:
-+ case SYMBOL_SMALL_ABSOLUTE:
-+ case SYMBOL_TINY_ABSOLUTE:
-+ aarch64_load_symref_appropriately (dest, imm, sty);
-+ return;
-+
-+ default:
-+ gcc_unreachable ();
-+ }
-+ }
-+
-+ if (!CONST_INT_P (imm))
-+ {
-+ if (GET_CODE (imm) == HIGH)
-+ emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-+ else
-+ {
-+ rtx mem = force_const_mem (mode, imm);
-+ gcc_assert (mem);
-+ emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-+ }
-+
-+ return;
-+ }
-+
-+ aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
-+}
-+
-+static bool
-+aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
-+ tree exp ATTRIBUTE_UNUSED)
-+{
-+ /* Currently, always true. */
- return true;
- }
-
-@@ -1688,11 +1906,6 @@
- static bool
- aarch64_frame_pointer_required (void)
- {
-- /* If the function contains dynamic stack allocations, we need to
-- use the frame pointer to access the static parts of the frame. */
-- if (cfun->calls_alloca)
-- return true;
--
- /* In aarch64_override_options_after_change
- flag_omit_leaf_frame_pointer turns off the frame pointer by
- default. Turn it back on now if we've not got a leaf
-@@ -1716,268 +1929,312 @@
- if (reload_completed && cfun->machine->frame.laid_out)
- return;
-
-- cfun->machine->frame.fp_lr_offset = 0;
-+#define SLOT_NOT_REQUIRED (-2)
-+#define SLOT_REQUIRED (-1)
-
-+ cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
-+ cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
-+
- /* First mark all the registers that really need to be saved... */
- for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-- cfun->machine->frame.reg_offset[regno] = -1;
-+ cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
-
- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-- cfun->machine->frame.reg_offset[regno] = -1;
-+ cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
-
- /* ... that includes the eh data registers (if needed)... */
- if (crtl->calls_eh_return)
- for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
-- cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
-+ cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
-+ = SLOT_REQUIRED;
-
- /* ... and any callee saved register that dataflow says is live. */
- for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
- if (df_regs_ever_live_p (regno)
- && !call_used_regs[regno])
-- cfun->machine->frame.reg_offset[regno] = 0;
-+ cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
-
- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
- if (df_regs_ever_live_p (regno)
- && !call_used_regs[regno])
-- cfun->machine->frame.reg_offset[regno] = 0;
-+ cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
-
- if (frame_pointer_needed)
- {
-- cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
-+ /* FP and LR are placed in the linkage record. */
- cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
-+ cfun->machine->frame.wb_candidate1 = R29_REGNUM;
-+ cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
-+ cfun->machine->frame.wb_candidate2 = R30_REGNUM;
- cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
-+ offset += 2 * UNITS_PER_WORD;
- }
-
- /* Now assign stack slots for them. */
-- for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
-- if (cfun->machine->frame.reg_offset[regno] != -1)
-+ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-+ if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
- {
- cfun->machine->frame.reg_offset[regno] = offset;
-+ if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
-+ cfun->machine->frame.wb_candidate1 = regno;
-+ else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
-+ cfun->machine->frame.wb_candidate2 = regno;
- offset += UNITS_PER_WORD;
- }
-
- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-- if (cfun->machine->frame.reg_offset[regno] != -1)
-+ if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
- {
- cfun->machine->frame.reg_offset[regno] = offset;
-+ if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
-+ cfun->machine->frame.wb_candidate1 = regno;
-+ else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
-+ && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
-+ cfun->machine->frame.wb_candidate2 = regno;
- offset += UNITS_PER_WORD;
- }
-
-- if (frame_pointer_needed)
-- {
-- cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
-- offset += UNITS_PER_WORD;
-- cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
-- }
--
-- if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
-- {
-- cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
-- offset += UNITS_PER_WORD;
-- cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
-- }
--
- cfun->machine->frame.padding0 =
- (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
- offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-
- cfun->machine->frame.saved_regs_size = offset;
-+
-+ cfun->machine->frame.hard_fp_offset
-+ = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
-+ + get_frame_size ()
-+ + cfun->machine->frame.saved_regs_size,
-+ STACK_BOUNDARY / BITS_PER_UNIT);
-+
-+ cfun->machine->frame.frame_size
-+ = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
-+ + crtl->outgoing_args_size,
-+ STACK_BOUNDARY / BITS_PER_UNIT);
-+
- cfun->machine->frame.laid_out = true;
- }
-
--/* Make the last instruction frame-related and note that it performs
-- the operation described by FRAME_PATTERN. */
-+static bool
-+aarch64_register_saved_on_entry (int regno)
-+{
-+ return cfun->machine->frame.reg_offset[regno] >= 0;
-+}
-
-+static unsigned
-+aarch64_next_callee_save (unsigned regno, unsigned limit)
-+{
-+ while (regno <= limit && !aarch64_register_saved_on_entry (regno))
-+ regno ++;
-+ return regno;
-+}
-+
- static void
--aarch64_set_frame_expr (rtx frame_pattern)
-+aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
-+ HOST_WIDE_INT adjustment)
-+ {
-+ rtx base_rtx = stack_pointer_rtx;
-+ rtx insn, reg, mem;
-+
-+ reg = gen_rtx_REG (mode, regno);
-+ mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
-+ plus_constant (Pmode, base_rtx, -adjustment));
-+ mem = gen_rtx_MEM (mode, mem);
-+
-+ insn = emit_move_insn (mem, reg);
-+ RTX_FRAME_RELATED_P (insn) = 1;
-+}
-+
-+static rtx
-+aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
-+ HOST_WIDE_INT adjustment)
- {
-+ switch (mode)
-+ {
-+ case DImode:
-+ return gen_storewb_pairdi_di (base, base, reg, reg2,
-+ GEN_INT (-adjustment),
-+ GEN_INT (UNITS_PER_WORD - adjustment));
-+ case DFmode:
-+ return gen_storewb_pairdf_di (base, base, reg, reg2,
-+ GEN_INT (-adjustment),
-+ GEN_INT (UNITS_PER_WORD - adjustment));
-+ default:
-+ gcc_unreachable ();
-+ }
-+}
-+
-+static void
-+aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
-+ unsigned regno2, HOST_WIDE_INT adjustment)
-+{
- rtx insn;
-+ rtx reg1 = gen_rtx_REG (mode, regno1);
-+ rtx reg2 = gen_rtx_REG (mode, regno2);
-
-- insn = get_last_insn ();
-+ insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
-+ reg2, adjustment));
-+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-+
-+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
- RTX_FRAME_RELATED_P (insn) = 1;
-- RTX_FRAME_RELATED_P (frame_pattern) = 1;
-- REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
-- frame_pattern,
-- REG_NOTES (insn));
- }
-
--static bool
--aarch64_register_saved_on_entry (int regno)
-+static rtx
-+aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
-+ HOST_WIDE_INT adjustment)
- {
-- return cfun->machine->frame.reg_offset[regno] != -1;
-+ switch (mode)
-+ {
-+ case DImode:
-+ return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
-+ GEN_INT (UNITS_PER_WORD));
-+ case DFmode:
-+ return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
-+ GEN_INT (UNITS_PER_WORD));
-+ default:
-+ gcc_unreachable ();
-+ }
- }
-
-+static rtx
-+aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
-+ rtx reg2)
-+{
-+ switch (mode)
-+ {
-+ case DImode:
-+ return gen_store_pairdi (mem1, reg1, mem2, reg2);
-
--static void
--aarch64_save_or_restore_fprs (int start_offset, int increment,
-- bool restore, rtx base_rtx)
-+ case DFmode:
-+ return gen_store_pairdf (mem1, reg1, mem2, reg2);
-
-+ default:
-+ gcc_unreachable ();
-+ }
-+}
-+
-+static rtx
-+aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
-+ rtx mem2)
- {
-+ switch (mode)
-+ {
-+ case DImode:
-+ return gen_load_pairdi (reg1, mem1, reg2, mem2);
-+
-+ case DFmode:
-+ return gen_load_pairdf (reg1, mem1, reg2, mem2);
-+
-+ default:
-+ gcc_unreachable ();
-+ }
-+}
-+
-+
-+static void
-+aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
-+ unsigned start, unsigned limit, bool skip_wb)
-+{
-+ rtx insn;
-+ rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
-+ ? gen_frame_mem : gen_rtx_MEM);
- unsigned regno;
- unsigned regno2;
-- rtx insn;
-- rtx (*gen_mem_ref)(enum machine_mode, rtx)
-- = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
-
--
-- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-+ for (regno = aarch64_next_callee_save (start, limit);
-+ regno <= limit;
-+ regno = aarch64_next_callee_save (regno + 1, limit))
- {
-- if (aarch64_register_saved_on_entry (regno))
-- {
-- rtx mem;
-- mem = gen_mem_ref (DFmode,
-- plus_constant (Pmode,
-- base_rtx,
-- start_offset));
-+ rtx reg, mem;
-+ HOST_WIDE_INT offset;
-
-- for (regno2 = regno + 1;
-- regno2 <= V31_REGNUM
-- && !aarch64_register_saved_on_entry (regno2);
-- regno2++)
-- {
-- /* Empty loop. */
-- }
-- if (regno2 <= V31_REGNUM &&
-- aarch64_register_saved_on_entry (regno2))
-- {
-- rtx mem2;
-- /* Next highest register to be saved. */
-- mem2 = gen_mem_ref (DFmode,
-- plus_constant
-- (Pmode,
-- base_rtx,
-- start_offset + increment));
-- if (restore == false)
-- {
-- insn = emit_insn
-- ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
-- mem2, gen_rtx_REG (DFmode, regno2)));
-+ if (skip_wb
-+ && (regno == cfun->machine->frame.wb_candidate1
-+ || regno == cfun->machine->frame.wb_candidate2))
-+ continue;
-
-- }
-- else
-- {
-- insn = emit_insn
-- ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
-- gen_rtx_REG (DFmode, regno2), mem2));
-+ reg = gen_rtx_REG (mode, regno);
-+ offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+ mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-+ offset));
-
-- add_reg_note (insn, REG_CFA_RESTORE,
-- gen_rtx_REG (DFmode, regno));
-- add_reg_note (insn, REG_CFA_RESTORE,
-- gen_rtx_REG (DFmode, regno2));
-- }
-+ regno2 = aarch64_next_callee_save (regno + 1, limit);
-
-- /* The first part of a frame-related parallel insn
-- is always assumed to be relevant to the frame
-- calculations; subsequent parts, are only
-- frame-related if explicitly marked. */
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-- regno = regno2;
-- start_offset += increment * 2;
-- }
-- else
-- {
-- if (restore == false)
-- insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
-- else
-- {
-- insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
-- add_reg_note (insn, REG_CFA_RESTORE,
-- gen_rtx_REG (DImode, regno));
-- }
-- start_offset += increment;
-- }
-- RTX_FRAME_RELATED_P (insn) = 1;
-+ if (regno2 <= limit
-+ && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-+ == cfun->machine->frame.reg_offset[regno2]))
-+
-+ {
-+ rtx reg2 = gen_rtx_REG (mode, regno2);
-+ rtx mem2;
-+
-+ offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-+ mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-+ offset));
-+ insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
-+ reg2));
-+
-+ /* The first part of a frame-related parallel insn is
-+ always assumed to be relevant to the frame
-+ calculations; subsequent parts, are only
-+ frame-related if explicitly marked. */
-+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-+ regno = regno2;
- }
-+ else
-+ insn = emit_move_insn (mem, reg);
-+
-+ RTX_FRAME_RELATED_P (insn) = 1;
- }
--
- }
-
--
--/* offset from the stack pointer of where the saves and
-- restore's have to happen. */
- static void
--aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
-- bool restore)
-+aarch64_restore_callee_saves (enum machine_mode mode,
-+ HOST_WIDE_INT start_offset, unsigned start,
-+ unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
-- rtx insn;
- rtx base_rtx = stack_pointer_rtx;
-- HOST_WIDE_INT start_offset = offset;
-- HOST_WIDE_INT increment = UNITS_PER_WORD;
-- rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
-- unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
-+ rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
-+ ? gen_frame_mem : gen_rtx_MEM);
- unsigned regno;
- unsigned regno2;
-+ HOST_WIDE_INT offset;
-
-- for (regno = R0_REGNUM; regno <= limit; regno++)
-+ for (regno = aarch64_next_callee_save (start, limit);
-+ regno <= limit;
-+ regno = aarch64_next_callee_save (regno + 1, limit))
- {
-- if (aarch64_register_saved_on_entry (regno))
-- {
-- rtx mem;
-- mem = gen_mem_ref (Pmode,
-- plus_constant (Pmode,
-- base_rtx,
-- start_offset));
-+ rtx reg, mem;
-
-- for (regno2 = regno + 1;
-- regno2 <= limit
-- && !aarch64_register_saved_on_entry (regno2);
-- regno2++)
-- {
-- /* Empty loop. */
-- }
-- if (regno2 <= limit &&
-- aarch64_register_saved_on_entry (regno2))
-- {
-- rtx mem2;
-- /* Next highest register to be saved. */
-- mem2 = gen_mem_ref (Pmode,
-- plus_constant
-- (Pmode,
-- base_rtx,
-- start_offset + increment));
-- if (restore == false)
-- {
-- insn = emit_insn
-- ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
-- mem2, gen_rtx_REG (DImode, regno2)));
-+ if (skip_wb
-+ && (regno == cfun->machine->frame.wb_candidate1
-+ || regno == cfun->machine->frame.wb_candidate2))
-+ continue;
-
-- }
-- else
-- {
-- insn = emit_insn
-- ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
-- gen_rtx_REG (DImode, regno2), mem2));
-+ reg = gen_rtx_REG (mode, regno);
-+ offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+ mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
-
-- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
-- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
-- }
-+ regno2 = aarch64_next_callee_save (regno + 1, limit);
-
-- /* The first part of a frame-related parallel insn
-- is always assumed to be relevant to the frame
-- calculations; subsequent parts, are only
-- frame-related if explicitly marked. */
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
-- 1)) = 1;
-- regno = regno2;
-- start_offset += increment * 2;
-- }
-- else
-- {
-- if (restore == false)
-- insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
-- else
-- {
-- insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
-- add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
-- }
-- start_offset += increment;
-- }
-- RTX_FRAME_RELATED_P (insn) = 1;
-+ if (regno2 <= limit
-+ && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-+ == cfun->machine->frame.reg_offset[regno2]))
-+ {
-+ rtx reg2 = gen_rtx_REG (mode, regno2);
-+ rtx mem2;
-+
-+ offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-+ mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
-+ emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
-+
-+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
-+ regno = regno2;
- }
-+ else
-+ emit_move_insn (reg, mem);
-+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
- }
--
-- aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
--
- }
-
- /* AArch64 stack frames generated by this compiler look like:
-@@ -1986,37 +2243,35 @@
- | |
- | incoming stack arguments |
- | |
-- +-------------------------------+ <-- arg_pointer_rtx
-- | |
-+ +-------------------------------+
-+ | | <-- incoming stack pointer (aligned)
- | callee-allocated save area |
- | for register varargs |
- | |
-- +-------------------------------+ <-- frame_pointer_rtx
-+ +-------------------------------+
-+ | local variables | <-- frame_pointer_rtx
- | |
-- | local variables |
-- | |
- +-------------------------------+
- | padding0 | \
- +-------------------------------+ |
-- | | |
-- | | |
- | callee-saved registers | | frame.saved_regs_size
-- | | |
- +-------------------------------+ |
- | LR' | |
- +-------------------------------+ |
-- | FP' | /
-- P +-------------------------------+ <-- hard_frame_pointer_rtx
-+ | FP' | / <- hard_frame_pointer_rtx (aligned)
-+ +-------------------------------+
- | dynamic allocation |
- +-------------------------------+
-- | |
-- | outgoing stack arguments |
-- | |
-- +-------------------------------+ <-- stack_pointer_rtx
-+ | padding |
-+ +-------------------------------+
-+ | outgoing stack arguments | <-- arg_pointer
-+ | |
-+ +-------------------------------+
-+ | | <-- stack_pointer_rtx (aligned)
-
-- Dynamic stack allocations such as alloca insert data at point P.
-- They decrease stack_pointer_rtx but leave frame_pointer_rtx and
-- hard_frame_pointer_rtx unchanged. */
-+ Dynamic stack allocations via alloca() decrease stack_pointer_rtx
-+ but leave frame_pointer_rtx and hard_frame_pointer_rtx
-+ unchanged. */
-
- /* Generate the prologue instructions for entry into a function.
- Establish the stack frame by decreasing the stack pointer with a
-@@ -2034,27 +2289,20 @@
-
- sub sp, sp, <final_adjustment_if_any>
- */
-- HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
- HOST_WIDE_INT frame_size, offset;
-- HOST_WIDE_INT fp_offset; /* FP offset from SP */
-+ HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
-+ HOST_WIDE_INT hard_fp_offset;
- rtx insn;
-
- aarch64_layout_frame ();
-- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-- gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
-- && (cfun->stdarg || !cfun->machine->saved_varargs_size));
-- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-- + crtl->outgoing_args_size);
-- offset = frame_size = AARCH64_ROUND_UP (frame_size,
-- STACK_BOUNDARY / BITS_PER_UNIT);
-
-+ offset = frame_size = cfun->machine->frame.frame_size;
-+ hard_fp_offset = cfun->machine->frame.hard_fp_offset;
-+ fp_offset = frame_size - hard_fp_offset;
-+
- if (flag_stack_usage_info)
- current_function_static_stack_size = frame_size;
-
-- fp_offset = (offset
-- - original_frame_size
-- - cfun->machine->frame.saved_regs_size);
--
- /* Store pairs and load pairs have a range only -512 to 504. */
- if (offset >= 512)
- {
-@@ -2064,7 +2312,7 @@
- register area. This will allow the pre-index write-back
- store pair instructions to be used for setting up the stack frame
- efficiently. */
-- offset = original_frame_size + cfun->machine->frame.saved_regs_size;
-+ offset = hard_fp_offset;
- if (offset >= 512)
- offset = cfun->machine->frame.saved_regs_size;
-
-@@ -2075,29 +2323,29 @@
- {
- rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
- emit_move_insn (op0, GEN_INT (-frame_size));
-- emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-- aarch64_set_frame_expr (gen_rtx_SET
-- (Pmode, stack_pointer_rtx,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- -frame_size)));
-+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-+
-+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
-+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
-+ plus_constant (Pmode, stack_pointer_rtx,
-+ -frame_size)));
-+ RTX_FRAME_RELATED_P (insn) = 1;
- }
- else if (frame_size > 0)
- {
-- if ((frame_size & 0xfff) != frame_size)
-+ int hi_ofs = frame_size & 0xfff000;
-+ int lo_ofs = frame_size & 0x000fff;
-+
-+ if (hi_ofs)
- {
- insn = emit_insn (gen_add2_insn
-- (stack_pointer_rtx,
-- GEN_INT (-(frame_size
-- & ~(HOST_WIDE_INT)0xfff))));
-+ (stack_pointer_rtx, GEN_INT (-hi_ofs)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
-- if ((frame_size & 0xfff) != 0)
-+ if (lo_ofs)
- {
- insn = emit_insn (gen_add2_insn
-- (stack_pointer_rtx,
-- GEN_INT (-(frame_size
-- & (HOST_WIDE_INT)0xfff))));
-+ (stack_pointer_rtx, GEN_INT (-lo_ofs)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
- }
-@@ -2107,12 +2355,11 @@
-
- if (offset > 0)
- {
-- /* Save the frame pointer and lr if the frame pointer is needed
-- first. Make the frame pointer point to the location of the
-- old frame pointer on the stack. */
-+ bool skip_wb = false;
-+
- if (frame_pointer_needed)
- {
-- rtx mem_fp, mem_lr;
-+ skip_wb = true;
-
- if (fp_offset)
- {
-@@ -2119,67 +2366,52 @@
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (-offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
-- aarch64_set_frame_expr (gen_rtx_SET
-- (Pmode, stack_pointer_rtx,
-- gen_rtx_MINUS (Pmode,
-- stack_pointer_rtx,
-- GEN_INT (offset))));
-- mem_fp = gen_frame_mem (DImode,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- fp_offset));
-- mem_lr = gen_frame_mem (DImode,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- fp_offset
-- + UNITS_PER_WORD));
-- insn = emit_insn (gen_store_pairdi (mem_fp,
-- hard_frame_pointer_rtx,
-- mem_lr,
-- gen_rtx_REG (DImode,
-- LR_REGNUM)));
-+
-+ aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
-+ R30_REGNUM, false);
- }
- else
-- {
-- insn = emit_insn (gen_storewb_pairdi_di
-- (stack_pointer_rtx, stack_pointer_rtx,
-- hard_frame_pointer_rtx,
-- gen_rtx_REG (DImode, LR_REGNUM),
-- GEN_INT (-offset),
-- GEN_INT (GET_MODE_SIZE (DImode) - offset)));
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-- }
-+ aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
-
-- /* The first part of a frame-related parallel insn is always
-- assumed to be relevant to the frame calculations;
-- subsequent parts, are only frame-related if explicitly
-- marked. */
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-- RTX_FRAME_RELATED_P (insn) = 1;
--
- /* Set up frame pointer to point to the location of the
- previous frame pointer on the stack. */
- insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (fp_offset)));
-- aarch64_set_frame_expr (gen_rtx_SET
-- (Pmode, hard_frame_pointer_rtx,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- fp_offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
-- insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
-- hard_frame_pointer_rtx));
-+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- }
- else
- {
-- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-- GEN_INT (-offset)));
-- RTX_FRAME_RELATED_P (insn) = 1;
-+ unsigned reg1 = cfun->machine->frame.wb_candidate1;
-+ unsigned reg2 = cfun->machine->frame.wb_candidate2;
-+
-+ if (fp_offset
-+ || reg1 == FIRST_PSEUDO_REGISTER
-+ || (reg2 == FIRST_PSEUDO_REGISTER
-+ && offset >= 256))
-+ {
-+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-+ GEN_INT (-offset)));
-+ RTX_FRAME_RELATED_P (insn) = 1;
-+ }
-+ else
-+ {
-+ enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
-+
-+ skip_wb = true;
-+
-+ if (reg2 == FIRST_PSEUDO_REGISTER)
-+ aarch64_pushwb_single_reg (mode1, reg1, offset);
-+ else
-+ aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
-+ }
- }
-
-- aarch64_save_or_restore_callee_save_registers
-- (fp_offset + cfun->machine->frame.hardfp_offset, 0);
-+ aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
-+ skip_wb);
-+ aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
-+ skip_wb);
- }
-
- /* when offset >= 512,
-@@ -2200,28 +2432,21 @@
- void
- aarch64_expand_epilogue (bool for_sibcall)
- {
-- HOST_WIDE_INT original_frame_size, frame_size, offset;
-+ HOST_WIDE_INT frame_size, offset;
- HOST_WIDE_INT fp_offset;
-+ HOST_WIDE_INT hard_fp_offset;
- rtx insn;
-- rtx cfa_reg;
-
- aarch64_layout_frame ();
-- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-- + crtl->outgoing_args_size);
-- offset = frame_size = AARCH64_ROUND_UP (frame_size,
-- STACK_BOUNDARY / BITS_PER_UNIT);
-
-- fp_offset = (offset
-- - original_frame_size
-- - cfun->machine->frame.saved_regs_size);
-+ offset = frame_size = cfun->machine->frame.frame_size;
-+ hard_fp_offset = cfun->machine->frame.hard_fp_offset;
-+ fp_offset = frame_size - hard_fp_offset;
-
-- cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
--
- /* Store pairs and load pairs have a range only -512 to 504. */
- if (offset >= 512)
- {
-- offset = original_frame_size + cfun->machine->frame.saved_regs_size;
-+ offset = hard_fp_offset;
- if (offset >= 512)
- offset = cfun->machine->frame.saved_regs_size;
-
-@@ -2247,72 +2472,51 @@
- {
- insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
- hard_frame_pointer_rtx,
-- GEN_INT (- fp_offset)));
-- RTX_FRAME_RELATED_P (insn) = 1;
-- /* As SP is set to (FP - fp_offset), according to the rules in
-- dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
-- from the value of SP from now on. */
-- cfa_reg = stack_pointer_rtx;
-+ GEN_INT (0)));
-+ offset = offset - fp_offset;
- }
-
-- aarch64_save_or_restore_callee_save_registers
-- (fp_offset + cfun->machine->frame.hardfp_offset, 1);
--
-- /* Restore the frame pointer and lr if the frame pointer is needed. */
- if (offset > 0)
- {
-+ unsigned reg1 = cfun->machine->frame.wb_candidate1;
-+ unsigned reg2 = cfun->machine->frame.wb_candidate2;
-+ bool skip_wb = true;
-+ rtx cfi_ops = NULL;
-+
- if (frame_pointer_needed)
-+ fp_offset = 0;
-+ else if (fp_offset
-+ || reg1 == FIRST_PSEUDO_REGISTER
-+ || (reg2 == FIRST_PSEUDO_REGISTER
-+ && offset >= 256))
-+ skip_wb = false;
-+
-+ aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
-+ skip_wb, &cfi_ops);
-+ aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
-+ skip_wb, &cfi_ops);
-+
-+ if (skip_wb)
- {
-- rtx mem_fp, mem_lr;
-+ enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
-+ rtx rreg1 = gen_rtx_REG (mode1, reg1);
-
-- if (fp_offset)
-+ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
-+ if (reg2 == FIRST_PSEUDO_REGISTER)
- {
-- mem_fp = gen_frame_mem (DImode,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- fp_offset));
-- mem_lr = gen_frame_mem (DImode,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- fp_offset
-- + UNITS_PER_WORD));
-- insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
-- mem_fp,
-- gen_rtx_REG (DImode,
-- LR_REGNUM),
-- mem_lr));
-+ rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
-+ mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
-+ mem = gen_rtx_MEM (mode1, mem);
-+ insn = emit_move_insn (rreg1, mem);
- }
- else
- {
-- insn = emit_insn (gen_loadwb_pairdi_di
-- (stack_pointer_rtx,
-- stack_pointer_rtx,
-- hard_frame_pointer_rtx,
-- gen_rtx_REG (DImode, LR_REGNUM),
-- GEN_INT (offset),
-- GEN_INT (GET_MODE_SIZE (DImode) + offset)));
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-- add_reg_note (insn, REG_CFA_ADJUST_CFA,
-- (gen_rtx_SET (Pmode, stack_pointer_rtx,
-- plus_constant (Pmode, cfa_reg,
-- offset))));
-- }
-+ rtx rreg2 = gen_rtx_REG (mode1, reg2);
-
-- /* The first part of a frame-related parallel insn
-- is always assumed to be relevant to the frame
-- calculations; subsequent parts, are only
-- frame-related if explicitly marked. */
-- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-- RTX_FRAME_RELATED_P (insn) = 1;
-- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
-- add_reg_note (insn, REG_CFA_RESTORE,
-- gen_rtx_REG (DImode, LR_REGNUM));
--
-- if (fp_offset)
-- {
-- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-- GEN_INT (offset)));
-- RTX_FRAME_RELATED_P (insn) = 1;
-+ cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
-+ insn = emit_insn (aarch64_gen_loadwb_pair
-+ (mode1, stack_pointer_rtx, rreg1,
-+ rreg2, offset));
- }
- }
- else
-@@ -2319,79 +2523,57 @@
- {
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (offset)));
-- RTX_FRAME_RELATED_P (insn) = 1;
- }
-- }
-
-- /* Stack adjustment for exception handler. */
-- if (crtl->calls_eh_return)
-- {
-- /* We need to unwind the stack by the offset computed by
-- EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
-- based on SP. Ideally we would update the SP and define the
-- CFA along the lines of:
--
-- SP = SP + EH_RETURN_STACKADJ_RTX
-- (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
--
-- However the dwarf emitter only understands a constant
-- register offset.
--
-- The solution chosen here is to use the otherwise unused IP0
-- as a temporary register to hold the current SP value. The
-- CFA is described using IP0 then SP is modified. */
--
-- rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
--
-- insn = emit_move_insn (ip0, stack_pointer_rtx);
-- add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
-+ /* Reset the CFA to be SP + FRAME_SIZE. */
-+ rtx new_cfa = stack_pointer_rtx;
-+ if (frame_size > 0)
-+ new_cfa = plus_constant (Pmode, new_cfa, frame_size);
-+ cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
-+ REG_NOTES (insn) = cfi_ops;
- RTX_FRAME_RELATED_P (insn) = 1;
--
-- emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
--
-- /* Ensure the assignment to IP0 does not get optimized away. */
-- emit_use (ip0);
- }
-
-- if (frame_size > -1)
-+ if (frame_size > 0)
- {
- if (frame_size >= 0x1000000)
- {
- rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
- emit_move_insn (op0, GEN_INT (frame_size));
-- emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-- aarch64_set_frame_expr (gen_rtx_SET
-- (Pmode, stack_pointer_rtx,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- frame_size)));
-+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
- }
-- else if (frame_size > 0)
-+ else
- {
-- if ((frame_size & 0xfff) != 0)
-+ int hi_ofs = frame_size & 0xfff000;
-+ int lo_ofs = frame_size & 0x000fff;
-+
-+ if (hi_ofs && lo_ofs)
- {
- insn = emit_insn (gen_add2_insn
-- (stack_pointer_rtx,
-- GEN_INT ((frame_size
-- & (HOST_WIDE_INT) 0xfff))));
-+ (stack_pointer_rtx, GEN_INT (hi_ofs)));
- RTX_FRAME_RELATED_P (insn) = 1;
-+ frame_size = lo_ofs;
- }
-- if ((frame_size & 0xfff) != frame_size)
-- {
-- insn = emit_insn (gen_add2_insn
-- (stack_pointer_rtx,
-- GEN_INT ((frame_size
-- & ~ (HOST_WIDE_INT) 0xfff))));
-- RTX_FRAME_RELATED_P (insn) = 1;
-- }
-+ insn = emit_insn (gen_add2_insn
-+ (stack_pointer_rtx, GEN_INT (frame_size)));
- }
-
-- aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
-- plus_constant (Pmode,
-- stack_pointer_rtx,
-- offset)));
-+ /* Reset the CFA to be SP + 0. */
-+ add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
-+ RTX_FRAME_RELATED_P (insn) = 1;
- }
-
-+ /* Stack adjustment for exception handler. */
-+ if (crtl->calls_eh_return)
-+ {
-+ /* We need to unwind the stack by the offset computed by
-+ EH_RETURN_STACKADJ_RTX. We have already reset the CFA
-+ to be SP; letting the CFA move during this adjustment
-+ is just as correct as retaining the CFA from the body
-+ of the function. Therefore, do nothing special. */
-+ emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
-+ }
-+
- emit_use (gen_rtx_REG (DImode, LR_REGNUM));
- if (!for_sibcall)
- emit_jump_insn (ret_rtx);
-@@ -2403,17 +2585,13 @@
- rtx
- aarch64_final_eh_return_addr (void)
- {
-- HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
-+ HOST_WIDE_INT fp_offset;
-+
- aarch64_layout_frame ();
-- original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
-- frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
-- + crtl->outgoing_args_size);
-- offset = frame_size = AARCH64_ROUND_UP (frame_size,
-- STACK_BOUNDARY / BITS_PER_UNIT);
-- fp_offset = offset
-- - original_frame_size
-- - cfun->machine->frame.saved_regs_size;
-
-+ fp_offset = cfun->machine->frame.frame_size
-+ - cfun->machine->frame.hard_fp_offset;
-+
- if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
- return gen_rtx_REG (DImode, LR_REGNUM);
-
-@@ -2449,12 +2627,22 @@
- - 2 * UNITS_PER_WORD));
- }
-
--/* Output code to build up a constant in a register. */
--static void
--aarch64_build_constant (int regnum, HOST_WIDE_INT val)
-+/* Possibly output code to build up a constant in a register. For
-+ the benefit of the costs infrastructure, returns the number of
-+ instructions which would be emitted. GENERATE inhibits or
-+ enables code generation. */
-+
-+static int
-+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
- {
-+ int insns = 0;
-+
- if (aarch64_bitmask_imm (val, DImode))
-- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
-+ {
-+ if (generate)
-+ emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
-+ insns = 1;
-+ }
- else
- {
- int i;
-@@ -2485,15 +2673,19 @@
- the same. */
- if (ncount < zcount)
- {
-- emit_move_insn (gen_rtx_REG (Pmode, regnum),
-- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
-+ if (generate)
-+ emit_move_insn (gen_rtx_REG (Pmode, regnum),
-+ GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
- tval = 0xffff;
-+ insns++;
- }
- else
- {
-- emit_move_insn (gen_rtx_REG (Pmode, regnum),
-- GEN_INT (val & 0xffff));
-+ if (generate)
-+ emit_move_insn (gen_rtx_REG (Pmode, regnum),
-+ GEN_INT (val & 0xffff));
- tval = 0;
-+ insns++;
- }
-
- val >>= 16;
-@@ -2501,11 +2693,17 @@
- for (i = 16; i < 64; i += 16)
- {
- if ((val & 0xffff) != tval)
-- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
-- GEN_INT (i), GEN_INT (val & 0xffff)));
-+ {
-+ if (generate)
-+ emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
-+ GEN_INT (i),
-+ GEN_INT (val & 0xffff)));
-+ insns++;
-+ }
- val >>= 16;
- }
- }
-+ return insns;
- }
-
- static void
-@@ -2520,7 +2718,7 @@
-
- if (mdelta >= 4096 * 4096)
- {
-- aarch64_build_constant (scratchreg, delta);
-+ (void) aarch64_build_constant (scratchreg, delta, true);
- emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
- }
- else if (mdelta > 0)
-@@ -2594,7 +2792,7 @@
- addr = plus_constant (Pmode, temp0, vcall_offset);
- else
- {
-- aarch64_build_constant (IP1_REGNUM, vcall_offset);
-+ (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
- addr = gen_rtx_PLUS (Pmode, temp0, temp1);
- }
-
-@@ -3011,8 +3209,8 @@
- return false;
- }
-
--static inline bool
--offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
-+bool
-+aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
- {
- return (offset >= -64 * GET_MODE_SIZE (mode)
- && offset < 64 * GET_MODE_SIZE (mode)
-@@ -3046,11 +3244,11 @@
- enum rtx_code code = GET_CODE (x);
- rtx op0, op1;
- bool allow_reg_index_p =
-- outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
--
-+ outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
-+ || aarch64_vector_mode_supported_p (mode));
- /* Don't support anything other than POST_INC or REG addressing for
- AdvSIMD. */
-- if (aarch64_vector_mode_p (mode)
-+ if (aarch64_vect_struct_mode_p (mode)
- && (code != POST_INC && code != REG))
- return false;
-
-@@ -3066,6 +3264,21 @@
- case PLUS:
- op0 = XEXP (x, 0);
- op1 = XEXP (x, 1);
-+
-+ if (! strict_p
-+ && REG_P (op0)
-+ && (op0 == virtual_stack_vars_rtx
-+ || op0 == frame_pointer_rtx
-+ || op0 == arg_pointer_rtx)
-+ && CONST_INT_P (op1))
-+ {
-+ info->type = ADDRESS_REG_IMM;
-+ info->base = op0;
-+ info->offset = op1;
-+
-+ return true;
-+ }
-+
- if (GET_MODE_SIZE (mode) != 0
- && CONST_INT_P (op1)
- && aarch64_base_register_rtx_p (op0, strict_p))
-@@ -3084,12 +3297,12 @@
- We conservatively require an offset representable in either mode.
- */
- if (mode == TImode || mode == TFmode)
-- return (offset_7bit_signed_scaled_p (mode, offset)
-+ return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
- && offset_9bit_signed_unscaled_p (mode, offset));
-
- if (outer_code == PARALLEL)
- return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-- && offset_7bit_signed_scaled_p (mode, offset));
-+ && aarch64_offset_7bit_signed_scaled_p (mode, offset));
- else
- return (offset_9bit_signed_unscaled_p (mode, offset)
- || offset_12bit_unsigned_scaled_p (mode, offset));
-@@ -3144,12 +3357,12 @@
- We conservatively require an offset representable in either mode.
- */
- if (mode == TImode || mode == TFmode)
-- return (offset_7bit_signed_scaled_p (mode, offset)
-+ return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
- && offset_9bit_signed_unscaled_p (mode, offset));
-
- if (outer_code == PARALLEL)
- return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-- && offset_7bit_signed_scaled_p (mode, offset));
-+ && aarch64_offset_7bit_signed_scaled_p (mode, offset));
- else
- return offset_9bit_signed_unscaled_p (mode, offset);
- }
-@@ -3333,7 +3546,7 @@
- the comparison will have to be swapped when we emit the assembly
- code. */
- if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-- && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
-+ && (REG_P (y) || GET_CODE (y) == SUBREG)
- && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
- || GET_CODE (x) == LSHIFTRT
- || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -3342,7 +3555,7 @@
- /* Similarly for a negated operand, but we can only do this for
- equalities. */
- if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
-- && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
-+ && (REG_P (y) || GET_CODE (y) == SUBREG)
- && (code == EQ || code == NE)
- && GET_CODE (x) == NEG)
- return CC_Zmode;
-@@ -3359,7 +3572,7 @@
- return CCmode;
- }
-
--static unsigned
-+int
- aarch64_get_condition_code (rtx x)
- {
- enum machine_mode mode = GET_MODE (XEXP (x, 0));
-@@ -3386,7 +3599,7 @@
- case UNLE: return AARCH64_LE;
- case UNGT: return AARCH64_HI;
- case UNGE: return AARCH64_PL;
-- default: gcc_unreachable ();
-+ default: return -1;
- }
- break;
-
-@@ -3403,7 +3616,7 @@
- case GTU: return AARCH64_HI;
- case LEU: return AARCH64_LS;
- case LTU: return AARCH64_CC;
-- default: gcc_unreachable ();
-+ default: return -1;
- }
- break;
-
-@@ -3422,7 +3635,7 @@
- case GTU: return AARCH64_CC;
- case LEU: return AARCH64_CS;
- case LTU: return AARCH64_HI;
-- default: gcc_unreachable ();
-+ default: return -1;
- }
- break;
-
-@@ -3433,7 +3646,7 @@
- case EQ: return AARCH64_EQ;
- case GE: return AARCH64_PL;
- case LT: return AARCH64_MI;
-- default: gcc_unreachable ();
-+ default: return -1;
- }
- break;
-
-@@ -3442,16 +3655,46 @@
- {
- case NE: return AARCH64_NE;
- case EQ: return AARCH64_EQ;
-- default: gcc_unreachable ();
-+ default: return -1;
- }
- break;
-
- default:
-- gcc_unreachable ();
-+ return -1;
- break;
- }
- }
-
-+bool
-+aarch64_const_vec_all_same_in_range_p (rtx x,
-+ HOST_WIDE_INT minval,
-+ HOST_WIDE_INT maxval)
-+{
-+ HOST_WIDE_INT firstval;
-+ int count, i;
-+
-+ if (GET_CODE (x) != CONST_VECTOR
-+ || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
-+ return false;
-+
-+ firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
-+ if (firstval < minval || firstval > maxval)
-+ return false;
-+
-+ count = CONST_VECTOR_NUNITS (x);
-+ for (i = 1; i < count; i++)
-+ if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
-+ return false;
-+
-+ return true;
-+}
-+
-+bool
-+aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
-+{
-+ return aarch64_const_vec_all_same_in_range_p (x, val, val);
-+}
-+
- static unsigned
- bit_count (unsigned HOST_WIDE_INT value)
- {
-@@ -3502,7 +3745,7 @@
- {
- int n;
-
-- if (GET_CODE (x) != CONST_INT
-+ if (!CONST_INT_P (x)
- || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
- {
- output_operand_lossage ("invalid operand for '%%%c'", code);
-@@ -3532,7 +3775,7 @@
- int n;
-
- /* Print N such that 2^N == X. */
-- if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
-+ if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
- {
- output_operand_lossage ("invalid operand for '%%%c'", code);
- return;
-@@ -3544,7 +3787,7 @@
-
- case 'P':
- /* Print the number of non-zero bits in X (a const_int). */
-- if (GET_CODE (x) != CONST_INT)
-+ if (!CONST_INT_P (x))
- {
- output_operand_lossage ("invalid operand for '%%%c'", code);
- return;
-@@ -3555,7 +3798,7 @@
-
- case 'H':
- /* Print the higher numbered register of a pair (TImode) of regs. */
-- if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
-+ if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
- {
- output_operand_lossage ("invalid operand for '%%%c'", code);
- return;
-@@ -3565,39 +3808,48 @@
- break;
-
- case 'm':
-- /* Print a condition (eq, ne, etc). */
-+ {
-+ int cond_code;
-+ /* Print a condition (eq, ne, etc). */
-
-- /* CONST_TRUE_RTX means always -- that's the default. */
-- if (x == const_true_rtx)
-- return;
--
-- if (!COMPARISON_P (x))
-- {
-- output_operand_lossage ("invalid operand for '%%%c'", code);
-+ /* CONST_TRUE_RTX means always -- that's the default. */
-+ if (x == const_true_rtx)
- return;
-- }
-
-- fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
-+ if (!COMPARISON_P (x))
-+ {
-+ output_operand_lossage ("invalid operand for '%%%c'", code);
-+ return;
-+ }
-+
-+ cond_code = aarch64_get_condition_code (x);
-+ gcc_assert (cond_code >= 0);
-+ fputs (aarch64_condition_codes[cond_code], f);
-+ }
- break;
-
- case 'M':
-- /* Print the inverse of a condition (eq <-> ne, etc). */
-+ {
-+ int cond_code;
-+ /* Print the inverse of a condition (eq <-> ne, etc). */
-
-- /* CONST_TRUE_RTX means never -- that's the default. */
-- if (x == const_true_rtx)
-- {
-- fputs ("nv", f);
-- return;
-- }
-+ /* CONST_TRUE_RTX means never -- that's the default. */
-+ if (x == const_true_rtx)
-+ {
-+ fputs ("nv", f);
-+ return;
-+ }
-
-- if (!COMPARISON_P (x))
-- {
-- output_operand_lossage ("invalid operand for '%%%c'", code);
-- return;
-- }
--
-- fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
-- (aarch64_get_condition_code (x))], f);
-+ if (!COMPARISON_P (x))
-+ {
-+ output_operand_lossage ("invalid operand for '%%%c'", code);
-+ return;
-+ }
-+ cond_code = aarch64_get_condition_code (x);
-+ gcc_assert (cond_code >= 0);
-+ fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
-+ (cond_code)], f);
-+ }
- break;
-
- case 'b':
-@@ -3629,7 +3881,7 @@
-
- case 'X':
- /* Print bottom 16 bits of integer constant in hex. */
-- if (GET_CODE (x) != CONST_INT)
-+ if (!CONST_INT_P (x))
- {
- output_operand_lossage ("invalid operand for '%%%c'", code);
- return;
-@@ -3694,9 +3946,10 @@
- case CONST_VECTOR:
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
- {
-- gcc_assert (aarch64_const_vec_all_same_int_p (x,
-- HOST_WIDE_INT_MIN,
-- HOST_WIDE_INT_MAX));
-+ gcc_assert (
-+ aarch64_const_vec_all_same_in_range_p (x,
-+ HOST_WIDE_INT_MIN,
-+ HOST_WIDE_INT_MAX));
- asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
- }
- else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
-@@ -3839,34 +4092,34 @@
- if (addr.offset == const0_rtx)
- asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
- else
-- asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
- INTVAL (addr.offset));
- return;
-
- case ADDRESS_REG_REG:
- if (addr.shift == 0)
-- asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
- reg_names [REGNO (addr.offset)]);
- else
-- asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
- reg_names [REGNO (addr.offset)], addr.shift);
- return;
-
- case ADDRESS_REG_UXTW:
- if (addr.shift == 0)
-- asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
- REGNO (addr.offset) - R0_REGNUM);
- else
-- asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
- REGNO (addr.offset) - R0_REGNUM, addr.shift);
- return;
-
- case ADDRESS_REG_SXTW:
- if (addr.shift == 0)
-- asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
- REGNO (addr.offset) - R0_REGNUM);
- else
-- asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
- REGNO (addr.offset) - R0_REGNUM, addr.shift);
- return;
-
-@@ -3874,27 +4127,27 @@
- switch (GET_CODE (x))
- {
- case PRE_INC:
-- asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (aarch64_memory_reference_mode));
- return;
- case POST_INC:
-- asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (aarch64_memory_reference_mode));
- return;
- case PRE_DEC:
-- asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (aarch64_memory_reference_mode));
- return;
- case POST_DEC:
-- asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
- GET_MODE_SIZE (aarch64_memory_reference_mode));
- return;
- case PRE_MODIFY:
-- asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
- INTVAL (addr.offset));
- return;
- case POST_MODIFY:
-- asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
-+ asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
- INTVAL (addr.offset));
- return;
- default:
-@@ -3903,7 +4156,7 @@
- break;
-
- case ADDRESS_LO_SUM:
-- asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
-+ asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
- output_addr_const (f, addr.offset);
- asm_fprintf (f, "]");
- return;
-@@ -3954,7 +4207,7 @@
- aarch64_regno_regclass (unsigned regno)
- {
- if (GP_REGNUM_P (regno))
-- return CORE_REGS;
-+ return GENERAL_REGS;
-
- if (regno == SP_REGNUM)
- return STACK_REG;
-@@ -3969,6 +4222,47 @@
- return NO_REGS;
- }
-
-+static rtx
-+aarch64_legitimize_address (rtx x, rtx /* orig_x */, enum machine_mode mode)
-+{
-+ /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
-+ where mask is selected by alignment and size of the offset.
-+ We try to pick as large a range for the offset as possible to
-+ maximize the chance of a CSE. However, for aligned addresses
-+ we limit the range to 4k so that structures with different sized
-+ elements are likely to use the same base. */
-+
-+ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
-+ {
-+ HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
-+ HOST_WIDE_INT base_offset;
-+
-+ /* Does it look like we'll need a load/store-pair operation? */
-+ if (GET_MODE_SIZE (mode) > 16
-+ || mode == TImode)
-+ base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-+ & ~((128 * GET_MODE_SIZE (mode)) - 1));
-+ /* For offsets aren't a multiple of the access size, the limit is
-+ -256...255. */
-+ else if (offset & (GET_MODE_SIZE (mode) - 1))
-+ base_offset = (offset + 0x100) & ~0x1ff;
-+ else
-+ base_offset = offset & ~0xfff;
-+
-+ if (base_offset == 0)
-+ return x;
-+
-+ offset -= base_offset;
-+ rtx base_reg = gen_reg_rtx (Pmode);
-+ rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
-+ NULL_RTX);
-+ emit_move_insn (base_reg, val);
-+ x = plus_constant (Pmode, base_reg, offset);
-+ }
-+
-+ return x;
-+}
-+
- /* Try a machine-dependent way of reloading an illegitimate address
- operand. If we find one, push the reload and return the new rtx. */
-
-@@ -3980,8 +4274,8 @@
- {
- rtx x = *x_p;
-
-- /* Do not allow mem (plus (reg, const)) if vector mode. */
-- if (aarch64_vector_mode_p (mode)
-+ /* Do not allow mem (plus (reg, const)) if vector struct mode. */
-+ if (aarch64_vect_struct_mode_p (mode)
- && GET_CODE (x) == PLUS
- && REG_P (XEXP (x, 0))
- && CONST_INT_P (XEXP (x, 1)))
-@@ -4105,12 +4399,12 @@
- /* A TFmode or TImode memory access should be handled via an FP_REGS
- because AArch64 has richer addressing modes for LDR/STR instructions
- than LDP/STP instructions. */
-- if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
-+ if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
- && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
- return FP_REGS;
-
- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
-- return CORE_REGS;
-+ return GENERAL_REGS;
-
- return NO_REGS;
- }
-@@ -4142,43 +4436,28 @@
- HOST_WIDE_INT
- aarch64_initial_elimination_offset (unsigned from, unsigned to)
- {
-- HOST_WIDE_INT frame_size;
-- HOST_WIDE_INT offset;
--
- aarch64_layout_frame ();
-- frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
-- + crtl->outgoing_args_size
-- + cfun->machine->saved_varargs_size);
-
-- frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
-- offset = frame_size;
-+ if (to == HARD_FRAME_POINTER_REGNUM)
-+ {
-+ if (from == ARG_POINTER_REGNUM)
-+ return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
-
-- if (to == HARD_FRAME_POINTER_REGNUM)
-- {
-- if (from == ARG_POINTER_REGNUM)
-- return offset - crtl->outgoing_args_size;
-+ if (from == FRAME_POINTER_REGNUM)
-+ return (cfun->machine->frame.hard_fp_offset
-+ - cfun->machine->frame.saved_varargs_size);
-+ }
-
-- if (from == FRAME_POINTER_REGNUM)
-- return cfun->machine->frame.saved_regs_size + get_frame_size ();
-- }
-+ if (to == STACK_POINTER_REGNUM)
-+ {
-+ if (from == FRAME_POINTER_REGNUM)
-+ return (cfun->machine->frame.frame_size
-+ - cfun->machine->frame.saved_varargs_size);
-+ }
-
-- if (to == STACK_POINTER_REGNUM)
-- {
-- if (from == FRAME_POINTER_REGNUM)
-- {
-- HOST_WIDE_INT elim = crtl->outgoing_args_size
-- + cfun->machine->frame.saved_regs_size
-- + get_frame_size ()
-- - cfun->machine->frame.fp_lr_offset;
-- elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
-- return elim;
-- }
-- }
--
-- return offset;
-+ return cfun->machine->frame.frame_size;
- }
-
--
- /* Implement RETURN_ADDR_RTX. We do not support moving back to a
- previous frame. */
-
-@@ -4242,7 +4521,7 @@
- {
- switch (regclass)
- {
-- case CORE_REGS:
-+ case CALLER_SAVE_REGS:
- case POINTER_REGS:
- case GENERAL_REGS:
- case ALL_REGS:
-@@ -4443,9 +4722,13 @@
- {
- rtx op = x;
-
-+ /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
-+ we can convert both to ROR during final output. */
- if ((GET_CODE (op) == ASHIFT
- || GET_CODE (op) == ASHIFTRT
-- || GET_CODE (op) == LSHIFTRT)
-+ || GET_CODE (op) == LSHIFTRT
-+ || GET_CODE (op) == ROTATERT
-+ || GET_CODE (op) == ROTATE)
- && CONST_INT_P (XEXP (op, 1)))
- return XEXP (op, 0);
-
-@@ -4457,12 +4740,12 @@
- return x;
- }
-
--/* Helper function for rtx cost calculation. Strip a shift or extend
-+/* Helper function for rtx cost calculation. Strip an extend
- expression from X. Returns the inner operand if successful, or the
- original expression on failure. We deal with a number of possible
- canonicalization variations here. */
- static rtx
--aarch64_strip_shift_or_extend (rtx x)
-+aarch64_strip_extend (rtx x)
- {
- rtx op = x;
-
-@@ -4469,6 +4752,7 @@
- /* Zero and sign extraction of a widened value. */
- if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
- && XEXP (op, 2) == const0_rtx
-+ && GET_CODE (XEXP (op, 0)) == MULT
- && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
- XEXP (op, 1)))
- return XEXP (XEXP (op, 0), 0);
-@@ -4497,9 +4781,335 @@
- if (op != x)
- return op;
-
-- return aarch64_strip_shift (x);
-+ return x;
- }
-
-+/* Helper function for rtx cost calculation. Calculate the cost of
-+ a MULT, which may be part of a multiply-accumulate rtx. Return
-+ the calculated cost of the expression, recursing manually in to
-+ operands where needed. */
-+
-+static int
-+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
-+{
-+ rtx op0, op1;
-+ const struct cpu_cost_table *extra_cost
-+ = aarch64_tune_params->insn_extra_cost;
-+ int cost = 0;
-+ bool maybe_fma = (outer == PLUS || outer == MINUS);
-+ enum machine_mode mode = GET_MODE (x);
-+
-+ gcc_checking_assert (code == MULT);
-+
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-+
-+ if (VECTOR_MODE_P (mode))
-+ mode = GET_MODE_INNER (mode);
-+
-+ /* Integer multiply/fma. */
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ {
-+ /* The multiply will be canonicalized as a shift, cost it as such. */
-+ if (CONST_INT_P (op1)
-+ && exact_log2 (INTVAL (op1)) > 0)
-+ {
-+ if (speed)
-+ {
-+ if (maybe_fma)
-+ /* ADD (shifted register). */
-+ cost += extra_cost->alu.arith_shift;
-+ else
-+ /* LSL (immediate). */
-+ cost += extra_cost->alu.shift;
-+ }
-+
-+ cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
-+
-+ return cost;
-+ }
-+
-+ /* Integer multiplies or FMAs have zero/sign extending variants. */
-+ if ((GET_CODE (op0) == ZERO_EXTEND
-+ && GET_CODE (op1) == ZERO_EXTEND)
-+ || (GET_CODE (op0) == SIGN_EXTEND
-+ && GET_CODE (op1) == SIGN_EXTEND))
-+ {
-+ cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-+ + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
-+
-+ if (speed)
-+ {
-+ if (maybe_fma)
-+ /* MADD/SMADDL/UMADDL. */
-+ cost += extra_cost->mult[0].extend_add;
-+ else
-+ /* MUL/SMULL/UMULL. */
-+ cost += extra_cost->mult[0].extend;
-+ }
-+
-+ return cost;
-+ }
-+
-+ /* This is either an integer multiply or an FMA. In both cases
-+ we want to recurse and cost the operands. */
-+ cost += rtx_cost (op0, MULT, 0, speed)
-+ + rtx_cost (op1, MULT, 1, speed);
-+
-+ if (speed)
-+ {
-+ if (maybe_fma)
-+ /* MADD. */
-+ cost += extra_cost->mult[mode == DImode].add;
-+ else
-+ /* MUL. */
-+ cost += extra_cost->mult[mode == DImode].simple;
-+ }
-+
-+ return cost;
-+ }
-+ else
-+ {
-+ if (speed)
-+ {
-+ /* Floating-point FMA/FMUL can also support negations of the
-+ operands. */
-+ if (GET_CODE (op0) == NEG)
-+ op0 = XEXP (op0, 0);
-+ if (GET_CODE (op1) == NEG)
-+ op1 = XEXP (op1, 0);
-+
-+ if (maybe_fma)
-+ /* FMADD/FNMADD/FNMSUB/FMSUB. */
-+ cost += extra_cost->fp[mode == DFmode].fma;
-+ else
-+ /* FMUL/FNMUL. */
-+ cost += extra_cost->fp[mode == DFmode].mult;
-+ }
-+
-+ cost += rtx_cost (op0, MULT, 0, speed)
-+ + rtx_cost (op1, MULT, 1, speed);
-+ return cost;
-+ }
-+}
-+
-+static int
-+aarch64_address_cost (rtx x,
-+ enum machine_mode mode,
-+ addr_space_t as ATTRIBUTE_UNUSED,
-+ bool speed)
-+{
-+ enum rtx_code c = GET_CODE (x);
-+ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
-+ struct aarch64_address_info info;
-+ int cost = 0;
-+ info.shift = 0;
-+
-+ if (!aarch64_classify_address (&info, x, mode, c, false))
-+ {
-+ if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
-+ {
-+ /* This is a CONST or SYMBOL ref which will be split
-+ in a different way depending on the code model in use.
-+ Cost it through the generic infrastructure. */
-+ int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
-+ /* Divide through by the cost of one instruction to
-+ bring it to the same units as the address costs. */
-+ cost_symbol_ref /= COSTS_N_INSNS (1);
-+ /* The cost is then the cost of preparing the address,
-+ followed by an immediate (possibly 0) offset. */
-+ return cost_symbol_ref + addr_cost->imm_offset;
-+ }
-+ else
-+ {
-+ /* This is most likely a jump table from a case
-+ statement. */
-+ return addr_cost->register_offset;
-+ }
-+ }
-+
-+ switch (info.type)
-+ {
-+ case ADDRESS_LO_SUM:
-+ case ADDRESS_SYMBOLIC:
-+ case ADDRESS_REG_IMM:
-+ cost += addr_cost->imm_offset;
-+ break;
-+
-+ case ADDRESS_REG_WB:
-+ if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
-+ cost += addr_cost->pre_modify;
-+ else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-+ cost += addr_cost->post_modify;
-+ else
-+ gcc_unreachable ();
-+
-+ break;
-+
-+ case ADDRESS_REG_REG:
-+ cost += addr_cost->register_offset;
-+ break;
-+
-+ case ADDRESS_REG_UXTW:
-+ case ADDRESS_REG_SXTW:
-+ cost += addr_cost->register_extend;
-+ break;
-+
-+ default:
-+ gcc_unreachable ();
-+ }
-+
-+
-+ if (info.shift > 0)
-+ {
-+ /* For the sake of calculating the cost of the shifted register
-+ component, we can treat same sized modes in the same way. */
-+ switch (GET_MODE_BITSIZE (mode))
-+ {
-+ case 16:
-+ cost += addr_cost->addr_scale_costs.hi;
-+ break;
-+
-+ case 32:
-+ cost += addr_cost->addr_scale_costs.si;
-+ break;
-+
-+ case 64:
-+ cost += addr_cost->addr_scale_costs.di;
-+ break;
-+
-+ /* We can't tell, or this is a 128-bit vector. */
-+ default:
-+ cost += addr_cost->addr_scale_costs.ti;
-+ break;
-+ }
-+ }
-+
-+ return cost;
-+}
-+
-+/* Return true if the RTX X in mode MODE is a zero or sign extract
-+ usable in an ADD or SUB (extended register) instruction. */
-+static bool
-+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
-+{
-+ /* Catch add with a sign extract.
-+ This is add_<optab><mode>_multp2. */
-+ if (GET_CODE (x) == SIGN_EXTRACT
-+ || GET_CODE (x) == ZERO_EXTRACT)
-+ {
-+ rtx op0 = XEXP (x, 0);
-+ rtx op1 = XEXP (x, 1);
-+ rtx op2 = XEXP (x, 2);
-+
-+ if (GET_CODE (op0) == MULT
-+ && CONST_INT_P (op1)
-+ && op2 == const0_rtx
-+ && CONST_INT_P (XEXP (op0, 1))
-+ && aarch64_is_extend_from_extract (mode,
-+ XEXP (op0, 1),
-+ op1))
-+ {
-+ return true;
-+ }
-+ }
-+
-+ return false;
-+}
-+
-+static bool
-+aarch64_frint_unspec_p (unsigned int u)
-+{
-+ switch (u)
-+ {
-+ case UNSPEC_FRINTZ:
-+ case UNSPEC_FRINTP:
-+ case UNSPEC_FRINTM:
-+ case UNSPEC_FRINTA:
-+ case UNSPEC_FRINTN:
-+ case UNSPEC_FRINTX:
-+ case UNSPEC_FRINTI:
-+ return true;
-+
-+ default:
-+ return false;
-+ }
-+}
-+
-+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
-+ storing it in *COST. Result is true if the total cost of the operation
-+ has now been calculated. */
-+static bool
-+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
-+{
-+ rtx inner;
-+ rtx comparator;
-+ enum rtx_code cmpcode;
-+
-+ if (COMPARISON_P (op0))
-+ {
-+ inner = XEXP (op0, 0);
-+ comparator = XEXP (op0, 1);
-+ cmpcode = GET_CODE (op0);
-+ }
-+ else
-+ {
-+ inner = op0;
-+ comparator = const0_rtx;
-+ cmpcode = NE;
-+ }
-+
-+ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
-+ {
-+ /* Conditional branch. */
-+ if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
-+ return true;
-+ else
-+ {
-+ if (cmpcode == NE || cmpcode == EQ)
-+ {
-+ if (comparator == const0_rtx)
-+ {
-+ /* TBZ/TBNZ/CBZ/CBNZ. */
-+ if (GET_CODE (inner) == ZERO_EXTRACT)
-+ /* TBZ/TBNZ. */
-+ *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
-+ 0, speed);
-+ else
-+ /* CBZ/CBNZ. */
-+ *cost += rtx_cost (inner, cmpcode, 0, speed);
-+
-+ return true;
-+ }
-+ }
-+ else if (cmpcode == LT || cmpcode == GE)
-+ {
-+ /* TBZ/TBNZ. */
-+ if (comparator == const0_rtx)
-+ return true;
-+ }
-+ }
-+ }
-+ else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
-+ {
-+ /* It's a conditional operation based on the status flags,
-+ so it must be some flavor of CSEL. */
-+
-+ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
-+ if (GET_CODE (op1) == NEG
-+ || GET_CODE (op1) == NOT
-+ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
-+ op1 = XEXP (op1, 0);
-+
-+ *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
-+ *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
-+ return true;
-+ }
-+
-+ /* We don't know what this is, cost all operands. */
-+ return false;
-+}
-+
- /* Calculate the cost of calculating X, storing it in *COST. Result
- is true if the total cost of the operation has now been calculated. */
- static bool
-@@ -4506,13 +5116,31 @@
- aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
- int param ATTRIBUTE_UNUSED, int *cost, bool speed)
- {
-- rtx op0, op1;
-+ rtx op0, op1, op2;
- const struct cpu_cost_table *extra_cost
- = aarch64_tune_params->insn_extra_cost;
-+ enum machine_mode mode = GET_MODE (x);
-
-+ /* By default, assume that everything has equivalent cost to the
-+ cheapest instruction. Any additional costs are applied as a delta
-+ above this default. */
-+ *cost = COSTS_N_INSNS (1);
-+
-+ /* TODO: The cost infrastructure currently does not handle
-+ vector operations. Assume that all vector operations
-+ are equally expensive. */
-+ if (VECTOR_MODE_P (mode))
-+ {
-+ if (speed)
-+ *cost += extra_cost->vect.alu;
-+ return true;
-+ }
-+
- switch (code)
- {
- case SET:
-+ /* The cost depends entirely on the operands to SET. */
-+ *cost = 0;
- op0 = SET_DEST (x);
- op1 = SET_SRC (x);
-
-@@ -4520,52 +5148,194 @@
- {
- case MEM:
- if (speed)
-- *cost += extra_cost->ldst.store;
-+ {
-+ rtx address = XEXP (op0, 0);
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ *cost += extra_cost->ldst.store;
-+ else if (mode == SFmode)
-+ *cost += extra_cost->ldst.storef;
-+ else if (mode == DFmode)
-+ *cost += extra_cost->ldst.stored;
-
-- if (op1 != const0_rtx)
-- *cost += rtx_cost (op1, SET, 1, speed);
-+ *cost +=
-+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+ 0, speed));
-+ }
-+
-+ *cost += rtx_cost (op1, SET, 1, speed);
- return true;
-
- case SUBREG:
- if (! REG_P (SUBREG_REG (op0)))
- *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
-+
- /* Fall through. */
- case REG:
-- /* Cost is just the cost of the RHS of the set. */
-- *cost += rtx_cost (op1, SET, 1, true);
-+ /* const0_rtx is in general free, but we will use an
-+ instruction to set a register to 0. */
-+ if (REG_P (op1) || op1 == const0_rtx)
-+ {
-+ /* The cost is 1 per register copied. */
-+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
-+ / UNITS_PER_WORD;
-+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
-+ }
-+ else
-+ /* Cost is just the cost of the RHS of the set. */
-+ *cost += rtx_cost (op1, SET, 1, speed);
- return true;
-
-- case ZERO_EXTRACT: /* Bit-field insertion. */
-+ case ZERO_EXTRACT:
- case SIGN_EXTRACT:
-- /* Strip any redundant widening of the RHS to meet the width of
-- the target. */
-+ /* Bit-field insertion. Strip any redundant widening of
-+ the RHS to meet the width of the target. */
- if (GET_CODE (op1) == SUBREG)
- op1 = SUBREG_REG (op1);
- if ((GET_CODE (op1) == ZERO_EXTEND
- || GET_CODE (op1) == SIGN_EXTEND)
-- && GET_CODE (XEXP (op0, 1)) == CONST_INT
-+ && CONST_INT_P (XEXP (op0, 1))
- && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
- >= INTVAL (XEXP (op0, 1))))
- op1 = XEXP (op1, 0);
-- *cost += rtx_cost (op1, SET, 1, speed);
-+
-+ if (CONST_INT_P (op1))
-+ {
-+ /* MOV immediate is assumed to always be cheap. */
-+ *cost = COSTS_N_INSNS (1);
-+ }
-+ else
-+ {
-+ /* BFM. */
-+ if (speed)
-+ *cost += extra_cost->alu.bfi;
-+ *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
-+ }
-+
- return true;
-
- default:
-- break;
-+ /* We can't make sense of this, assume default cost. */
-+ *cost = COSTS_N_INSNS (1);
-+ return false;
- }
- return false;
-
-+ case CONST_INT:
-+ /* If an instruction can incorporate a constant within the
-+ instruction, the instruction's expression avoids calling
-+ rtx_cost() on the constant. If rtx_cost() is called on a
-+ constant, then it is usually because the constant must be
-+ moved into a register by one or more instructions.
-+
-+ The exception is constant 0, which can be expressed
-+ as XZR/WZR and is therefore free. The exception to this is
-+ if we have (set (reg) (const0_rtx)) in which case we must cost
-+ the move. However, we can catch that when we cost the SET, so
-+ we don't need to consider that here. */
-+ if (x == const0_rtx)
-+ *cost = 0;
-+ else
-+ {
-+ /* To an approximation, building any other constant is
-+ proportionally expensive to the number of instructions
-+ required to build that constant. This is true whether we
-+ are compiling for SPEED or otherwise. */
-+ *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
-+ (NULL_RTX, x, false, mode));
-+ }
-+ return true;
-+
-+ case CONST_DOUBLE:
-+ if (speed)
-+ {
-+ /* mov[df,sf]_aarch64. */
-+ if (aarch64_float_const_representable_p (x))
-+ /* FMOV (scalar immediate). */
-+ *cost += extra_cost->fp[mode == DFmode].fpconst;
-+ else if (!aarch64_float_const_zero_rtx_p (x))
-+ {
-+ /* This will be a load from memory. */
-+ if (mode == DFmode)
-+ *cost += extra_cost->ldst.loadd;
-+ else
-+ *cost += extra_cost->ldst.loadf;
-+ }
-+ else
-+ /* Otherwise this is +0.0. We get this using MOVI d0, #0
-+ or MOV v0.s[0], wzr - neither of which are modeled by the
-+ cost tables. Just use the default cost. */
-+ {
-+ }
-+ }
-+
-+ return true;
-+
- case MEM:
- if (speed)
-- *cost += extra_cost->ldst.load;
-+ {
-+ /* For loads we want the base cost of a load, plus an
-+ approximation for the additional cost of the addressing
-+ mode. */
-+ rtx address = XEXP (x, 0);
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ *cost += extra_cost->ldst.load;
-+ else if (mode == SFmode)
-+ *cost += extra_cost->ldst.loadf;
-+ else if (mode == DFmode)
-+ *cost += extra_cost->ldst.loadd;
-
-+ *cost +=
-+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+ 0, speed));
-+ }
-+
- return true;
-
- case NEG:
-- op0 = CONST0_RTX (GET_MODE (x));
-- op1 = XEXP (x, 0);
-- goto cost_minus;
-+ op0 = XEXP (x, 0);
-
-+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-+ {
-+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
-+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
-+ {
-+ /* CSETM. */
-+ *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
-+ return true;
-+ }
-+
-+ /* Cost this as SUB wzr, X. */
-+ op0 = CONST0_RTX (GET_MODE (x));
-+ op1 = XEXP (x, 0);
-+ goto cost_minus;
-+ }
-+
-+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
-+ {
-+ /* Support (neg(fma...)) as a single instruction only if
-+ sign of zeros is unimportant. This matches the decision
-+ making in aarch64.md. */
-+ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
-+ {
-+ /* FNMADD. */
-+ *cost = rtx_cost (op0, NEG, 0, speed);
-+ return true;
-+ }
-+ if (speed)
-+ /* FNEG. */
-+ *cost += extra_cost->fp[mode == DFmode].neg;
-+ return false;
-+ }
-+
-+ return false;
-+
-+ case CLRSB:
-+ case CLZ:
-+ if (speed)
-+ *cost += extra_cost->alu.clz;
-+
-+ return false;
-+
- case COMPARE:
- op0 = XEXP (x, 0);
- op1 = XEXP (x, 1);
-@@ -4577,96 +5347,228 @@
- goto cost_logic;
- }
-
-- /* Comparisons can work if the order is swapped.
-- Canonicalization puts the more complex operation first, but
-- we want it in op1. */
-- if (! (REG_P (op0)
-- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
-- {
-- op0 = XEXP (x, 1);
-- op1 = XEXP (x, 0);
-- }
-- goto cost_minus;
-+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
-+ {
-+ /* TODO: A write to the CC flags possibly costs extra, this
-+ needs encoding in the cost tables. */
-
-+ /* CC_ZESWPmode supports zero extend for free. */
-+ if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
-+ op0 = XEXP (op0, 0);
-+
-+ /* ANDS. */
-+ if (GET_CODE (op0) == AND)
-+ {
-+ x = op0;
-+ goto cost_logic;
-+ }
-+
-+ if (GET_CODE (op0) == PLUS)
-+ {
-+ /* ADDS (and CMN alias). */
-+ x = op0;
-+ goto cost_plus;
-+ }
-+
-+ if (GET_CODE (op0) == MINUS)
-+ {
-+ /* SUBS. */
-+ x = op0;
-+ goto cost_minus;
-+ }
-+
-+ if (GET_CODE (op1) == NEG)
-+ {
-+ /* CMN. */
-+ if (speed)
-+ *cost += extra_cost->alu.arith;
-+
-+ *cost += rtx_cost (op0, COMPARE, 0, speed);
-+ *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
-+ return true;
-+ }
-+
-+ /* CMP.
-+
-+ Compare can freely swap the order of operands, and
-+ canonicalization puts the more complex operation first.
-+ But the integer MINUS logic expects the shift/extend
-+ operation in op1. */
-+ if (! (REG_P (op0)
-+ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
-+ {
-+ op0 = XEXP (x, 1);
-+ op1 = XEXP (x, 0);
-+ }
-+ goto cost_minus;
-+ }
-+
-+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
-+ {
-+ /* FCMP. */
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].compare;
-+
-+ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
-+ {
-+ /* FCMP supports constant 0.0 for no extra cost. */
-+ return true;
-+ }
-+ return false;
-+ }
-+
-+ return false;
-+
- case MINUS:
-- op0 = XEXP (x, 0);
-- op1 = XEXP (x, 1);
-+ {
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-
-- cost_minus:
-- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
-- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
-- && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
-- {
-- if (op0 != const0_rtx)
-+cost_minus:
-+ /* Detect valid immediates. */
-+ if ((GET_MODE_CLASS (mode) == MODE_INT
-+ || (GET_MODE_CLASS (mode) == MODE_CC
-+ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
-+ && CONST_INT_P (op1)
-+ && aarch64_uimm12_shift (INTVAL (op1)))
-+ {
- *cost += rtx_cost (op0, MINUS, 0, speed);
-
-- if (CONST_INT_P (op1))
-- {
-- if (!aarch64_uimm12_shift (INTVAL (op1)))
-- *cost += rtx_cost (op1, MINUS, 1, speed);
-- }
-- else
-- {
-- op1 = aarch64_strip_shift_or_extend (op1);
-- *cost += rtx_cost (op1, MINUS, 1, speed);
-- }
-- return true;
-- }
-+ if (speed)
-+ /* SUB(S) (immediate). */
-+ *cost += extra_cost->alu.arith;
-+ return true;
-
-- return false;
-+ }
-
-+ /* Look for SUB (extended register). */
-+ if (aarch64_rtx_arith_op_extract_p (op1, mode))
-+ {
-+ if (speed)
-+ *cost += extra_cost->alu.arith_shift;
-+
-+ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
-+ (enum rtx_code) GET_CODE (op1),
-+ 0, speed);
-+ return true;
-+ }
-+
-+ rtx new_op1 = aarch64_strip_extend (op1);
-+
-+ /* Cost this as an FMA-alike operation. */
-+ if ((GET_CODE (new_op1) == MULT
-+ || GET_CODE (new_op1) == ASHIFT)
-+ && code != COMPARE)
-+ {
-+ *cost += aarch64_rtx_mult_cost (new_op1, MULT,
-+ (enum rtx_code) code,
-+ speed);
-+ *cost += rtx_cost (op0, MINUS, 0, speed);
-+ return true;
-+ }
-+
-+ *cost += rtx_cost (new_op1, MINUS, 1, speed);
-+
-+ if (speed)
-+ {
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ /* SUB(S). */
-+ *cost += extra_cost->alu.arith;
-+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+ /* FSUB. */
-+ *cost += extra_cost->fp[mode == DFmode].addsub;
-+ }
-+ return true;
-+ }
-+
- case PLUS:
-- op0 = XEXP (x, 0);
-- op1 = XEXP (x, 1);
-+ {
-+ rtx new_op0;
-
-- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-- {
-- if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
-- {
-- *cost += rtx_cost (op0, PLUS, 0, speed);
-- }
-- else
-- {
-- rtx new_op0 = aarch64_strip_shift_or_extend (op0);
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-
-- if (new_op0 == op0
-- && GET_CODE (op0) == MULT)
-- {
-- if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
-- && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
-- || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
-- && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
-- {
-- *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
-- speed)
-- + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
-- speed)
-- + rtx_cost (op1, PLUS, 1, speed));
-- if (speed)
-- *cost +=
-- extra_cost->mult[GET_MODE (x) == DImode].extend_add;
-- return true;
-- }
-+cost_plus:
-+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
-+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
-+ {
-+ /* CSINC. */
-+ *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
-+ *cost += rtx_cost (op1, PLUS, 1, speed);
-+ return true;
-+ }
-
-- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-- + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
-- + rtx_cost (op1, PLUS, 1, speed));
-+ if (GET_MODE_CLASS (mode) == MODE_INT
-+ && CONST_INT_P (op1)
-+ && aarch64_uimm12_shift (INTVAL (op1)))
-+ {
-+ *cost += rtx_cost (op0, PLUS, 0, speed);
-
-- if (speed)
-- *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
-+ if (speed)
-+ /* ADD (immediate). */
-+ *cost += extra_cost->alu.arith;
-+ return true;
-+ }
-
-- return true;
-- }
-+ /* Look for ADD (extended register). */
-+ if (aarch64_rtx_arith_op_extract_p (op0, mode))
-+ {
-+ if (speed)
-+ *cost += extra_cost->alu.arith_shift;
-
-- *cost += (rtx_cost (new_op0, PLUS, 0, speed)
-- + rtx_cost (op1, PLUS, 1, speed));
-- }
-- return true;
-- }
-+ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
-+ (enum rtx_code) GET_CODE (op0),
-+ 0, speed);
-+ return true;
-+ }
-
-+ /* Strip any extend, leave shifts behind as we will
-+ cost them through mult_cost. */
-+ new_op0 = aarch64_strip_extend (op0);
-+
-+ if (GET_CODE (new_op0) == MULT
-+ || GET_CODE (new_op0) == ASHIFT)
-+ {
-+ *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
-+ speed);
-+ *cost += rtx_cost (op1, PLUS, 1, speed);
-+ return true;
-+ }
-+
-+ *cost += (rtx_cost (new_op0, PLUS, 0, speed)
-+ + rtx_cost (op1, PLUS, 1, speed));
-+
-+ if (speed)
-+ {
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ /* ADD. */
-+ *cost += extra_cost->alu.arith;
-+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+ /* FADD. */
-+ *cost += extra_cost->fp[mode == DFmode].addsub;
-+ }
-+ return true;
-+ }
-+
-+ case BSWAP:
-+ *cost = COSTS_N_INSNS (1);
-+
-+ if (speed)
-+ *cost += extra_cost->alu.rev;
-+
- return false;
-
- case IOR:
-+ if (aarch_rev16_p (x))
-+ {
-+ *cost = COSTS_N_INSNS (1);
-+
-+ if (speed)
-+ *cost += extra_cost->alu.rev;
-+
-+ return true;
-+ }
-+ /* Fall through. */
- case XOR:
- case AND:
- cost_logic:
-@@ -4673,117 +5575,252 @@
- op0 = XEXP (x, 0);
- op1 = XEXP (x, 1);
-
-+ if (code == AND
-+ && GET_CODE (op0) == MULT
-+ && CONST_INT_P (XEXP (op0, 1))
-+ && CONST_INT_P (op1)
-+ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
-+ INTVAL (op1)) != 0)
-+ {
-+ /* This is a UBFM/SBFM. */
-+ *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
-+ if (speed)
-+ *cost += extra_cost->alu.bfx;
-+ return true;
-+ }
-+
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- {
-+ /* We possibly get the immediate for free, this is not
-+ modelled. */
- if (CONST_INT_P (op1)
- && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
- {
-- *cost += rtx_cost (op0, AND, 0, speed);
-+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
-+
-+ if (speed)
-+ *cost += extra_cost->alu.logical;
-+
-+ return true;
- }
- else
- {
-+ rtx new_op0 = op0;
-+
-+ /* Handle ORN, EON, or BIC. */
- if (GET_CODE (op0) == NOT)
- op0 = XEXP (op0, 0);
-- op0 = aarch64_strip_shift (op0);
-- *cost += (rtx_cost (op0, AND, 0, speed)
-- + rtx_cost (op1, AND, 1, speed));
-+
-+ new_op0 = aarch64_strip_shift (op0);
-+
-+ /* If we had a shift on op0 then this is a logical-shift-
-+ by-register/immediate operation. Otherwise, this is just
-+ a logical operation. */
-+ if (speed)
-+ {
-+ if (new_op0 != op0)
-+ {
-+ /* Shift by immediate. */
-+ if (CONST_INT_P (XEXP (op0, 1)))
-+ *cost += extra_cost->alu.log_shift;
-+ else
-+ *cost += extra_cost->alu.log_shift_reg;
-+ }
-+ else
-+ *cost += extra_cost->alu.logical;
-+ }
-+
-+ /* In both cases we want to cost both operands. */
-+ *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
-+ + rtx_cost (op1, (enum rtx_code) code, 1, speed);
-+
-+ return true;
- }
-- return true;
- }
- return false;
-
-+ case NOT:
-+ /* MVN. */
-+ if (speed)
-+ *cost += extra_cost->alu.logical;
-+
-+ /* The logical instruction could have the shifted register form,
-+ but the cost is the same if the shift is processed as a separate
-+ instruction, so we don't bother with it here. */
-+ return false;
-+
- case ZERO_EXTEND:
-- if ((GET_MODE (x) == DImode
-- && GET_MODE (XEXP (x, 0)) == SImode)
-- || GET_CODE (XEXP (x, 0)) == MEM)
-+
-+ op0 = XEXP (x, 0);
-+ /* If a value is written in SI mode, then zero extended to DI
-+ mode, the operation will in general be free as a write to
-+ a 'w' register implicitly zeroes the upper bits of an 'x'
-+ register. However, if this is
-+
-+ (set (reg) (zero_extend (reg)))
-+
-+ we must cost the explicit register move. */
-+ if (mode == DImode
-+ && GET_MODE (op0) == SImode
-+ && outer == SET)
- {
-- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
-+ int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
-+
-+ if (!op_cost && speed)
-+ /* MOV. */
-+ *cost += extra_cost->alu.extend;
-+ else
-+ /* Free, the cost is that of the SI mode operation. */
-+ *cost = op_cost;
-+
- return true;
- }
-+ else if (MEM_P (XEXP (x, 0)))
-+ {
-+ /* All loads can zero extend to any size for free. */
-+ *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
-+ return true;
-+ }
-+
-+ /* UXTB/UXTH. */
-+ if (speed)
-+ *cost += extra_cost->alu.extend;
-+
- return false;
-
- case SIGN_EXTEND:
-- if (GET_CODE (XEXP (x, 0)) == MEM)
-+ if (MEM_P (XEXP (x, 0)))
- {
-- *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
-+ /* LDRSH. */
-+ if (speed)
-+ {
-+ rtx address = XEXP (XEXP (x, 0), 0);
-+ *cost += extra_cost->ldst.load_sign_extend;
-+
-+ *cost +=
-+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+ 0, speed));
-+ }
- return true;
- }
-+
-+ if (speed)
-+ *cost += extra_cost->alu.extend;
- return false;
-
-+ case ASHIFT:
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-+
-+ if (CONST_INT_P (op1))
-+ {
-+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
-+ aliases. */
-+ if (speed)
-+ *cost += extra_cost->alu.shift;
-+
-+ /* We can incorporate zero/sign extend for free. */
-+ if (GET_CODE (op0) == ZERO_EXTEND
-+ || GET_CODE (op0) == SIGN_EXTEND)
-+ op0 = XEXP (op0, 0);
-+
-+ *cost += rtx_cost (op0, ASHIFT, 0, speed);
-+ return true;
-+ }
-+ else
-+ {
-+ /* LSLV. */
-+ if (speed)
-+ *cost += extra_cost->alu.shift_reg;
-+
-+ return false; /* All arguments need to be in registers. */
-+ }
-+
- case ROTATE:
-- if (!CONST_INT_P (XEXP (x, 1)))
-- *cost += COSTS_N_INSNS (2);
-- /* Fall through. */
- case ROTATERT:
- case LSHIFTRT:
-- case ASHIFT:
- case ASHIFTRT:
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-
-- /* Shifting by a register often takes an extra cycle. */
-- if (speed && !CONST_INT_P (XEXP (x, 1)))
-- *cost += extra_cost->alu.arith_shift_reg;
-+ if (CONST_INT_P (op1))
-+ {
-+ /* ASR (immediate) and friends. */
-+ if (speed)
-+ *cost += extra_cost->alu.shift;
-
-- *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
-+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
-+ return true;
-+ }
-+ else
-+ {
-+
-+ /* ASR (register) and friends. */
-+ if (speed)
-+ *cost += extra_cost->alu.shift_reg;
-+
-+ return false; /* All arguments need to be in registers. */
-+ }
-+
-+ case SYMBOL_REF:
-+
-+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
-+ {
-+ /* LDR. */
-+ if (speed)
-+ *cost += extra_cost->ldst.load;
-+ }
-+ else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-+ || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-+ {
-+ /* ADRP, followed by ADD. */
-+ *cost += COSTS_N_INSNS (1);
-+ if (speed)
-+ *cost += 2 * extra_cost->alu.arith;
-+ }
-+ else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-+ {
-+ /* ADR. */
-+ if (speed)
-+ *cost += extra_cost->alu.arith;
-+ }
-+
-+ if (flag_pic)
-+ {
-+ /* One extra load instruction, after accessing the GOT. */
-+ *cost += COSTS_N_INSNS (1);
-+ if (speed)
-+ *cost += extra_cost->ldst.load;
-+ }
- return true;
-
- case HIGH:
-- if (!CONSTANT_P (XEXP (x, 0)))
-- *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
-- return true;
--
- case LO_SUM:
-- if (!CONSTANT_P (XEXP (x, 1)))
-- *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
-- *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
-+ /* ADRP/ADD (immediate). */
-+ if (speed)
-+ *cost += extra_cost->alu.arith;
- return true;
-
- case ZERO_EXTRACT:
- case SIGN_EXTRACT:
-- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
-+ /* UBFX/SBFX. */
-+ if (speed)
-+ *cost += extra_cost->alu.bfx;
-+
-+ /* We can trust that the immediates used will be correct (there
-+ are no by-register forms), so we need only cost op0. */
-+ *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
- return true;
-
- case MULT:
-- op0 = XEXP (x, 0);
-- op1 = XEXP (x, 1);
-+ *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
-+ /* aarch64_rtx_mult_cost always handles recursion to its
-+ operands. */
-+ return true;
-
-- *cost = COSTS_N_INSNS (1);
-- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-- {
-- if (CONST_INT_P (op1)
-- && exact_log2 (INTVAL (op1)) > 0)
-- {
-- *cost += rtx_cost (op0, ASHIFT, 0, speed);
-- return true;
-- }
--
-- if ((GET_CODE (op0) == ZERO_EXTEND
-- && GET_CODE (op1) == ZERO_EXTEND)
-- || (GET_CODE (op0) == SIGN_EXTEND
-- && GET_CODE (op1) == SIGN_EXTEND))
-- {
-- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-- + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
-- if (speed)
-- *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
-- return true;
-- }
--
-- if (speed)
-- *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
-- }
-- else if (speed)
-- {
-- if (GET_MODE (x) == DFmode)
-- *cost += extra_cost->fp[1].mult;
-- else if (GET_MODE (x) == SFmode)
-- *cost += extra_cost->fp[0].mult;
-- }
--
-- return false; /* All arguments need to be in registers. */
--
- case MOD:
- case UMOD:
-- *cost = COSTS_N_INSNS (2);
- if (speed)
- {
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-@@ -4800,53 +5837,222 @@
-
- case DIV:
- case UDIV:
-- *cost = COSTS_N_INSNS (1);
-+ case SQRT:
- if (speed)
- {
-- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-- *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
-- else if (GET_MODE (x) == DFmode)
-- *cost += extra_cost->fp[1].div;
-- else if (GET_MODE (x) == SFmode)
-- *cost += extra_cost->fp[0].div;
-+ if (GET_MODE_CLASS (mode) == MODE_INT)
-+ /* There is no integer SQRT, so only DIV and UDIV can get
-+ here. */
-+ *cost += extra_cost->mult[mode == DImode].idiv;
-+ else
-+ *cost += extra_cost->fp[mode == DFmode].div;
- }
- return false; /* All arguments need to be in registers. */
-
-+ case IF_THEN_ELSE:
-+ return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
-+ XEXP (x, 2), cost, speed);
-+
-+ case EQ:
-+ case NE:
-+ case GT:
-+ case GTU:
-+ case LT:
-+ case LTU:
-+ case GE:
-+ case GEU:
-+ case LE:
-+ case LEU:
-+
-+ return false; /* All arguments must be in registers. */
-+
-+ case FMA:
-+ op0 = XEXP (x, 0);
-+ op1 = XEXP (x, 1);
-+ op2 = XEXP (x, 2);
-+
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].fma;
-+
-+ /* FMSUB, FNMADD, and FNMSUB are free. */
-+ if (GET_CODE (op0) == NEG)
-+ op0 = XEXP (op0, 0);
-+
-+ if (GET_CODE (op2) == NEG)
-+ op2 = XEXP (op2, 0);
-+
-+ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
-+ and the by-element operand as operand 0. */
-+ if (GET_CODE (op1) == NEG)
-+ op1 = XEXP (op1, 0);
-+
-+ /* Catch vector-by-element operations. The by-element operand can
-+ either be (vec_duplicate (vec_select (x))) or just
-+ (vec_select (x)), depending on whether we are multiplying by
-+ a vector or a scalar.
-+
-+ Canonicalization is not very good in these cases, FMA4 will put the
-+ by-element operand as operand 0, FNMA4 will have it as operand 1. */
-+ if (GET_CODE (op0) == VEC_DUPLICATE)
-+ op0 = XEXP (op0, 0);
-+ else if (GET_CODE (op1) == VEC_DUPLICATE)
-+ op1 = XEXP (op1, 0);
-+
-+ if (GET_CODE (op0) == VEC_SELECT)
-+ op0 = XEXP (op0, 0);
-+ else if (GET_CODE (op1) == VEC_SELECT)
-+ op1 = XEXP (op1, 0);
-+
-+ /* If the remaining parameters are not registers,
-+ get the cost to put them into registers. */
-+ *cost += rtx_cost (op0, FMA, 0, speed);
-+ *cost += rtx_cost (op1, FMA, 1, speed);
-+ *cost += rtx_cost (op2, FMA, 2, speed);
-+ return true;
-+
-+ case FLOAT_EXTEND:
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].widen;
-+ return false;
-+
-+ case FLOAT_TRUNCATE:
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].narrow;
-+ return false;
-+
-+ case FIX:
-+ case UNSIGNED_FIX:
-+ x = XEXP (x, 0);
-+ /* Strip the rounding part. They will all be implemented
-+ by the fcvt* family of instructions anyway. */
-+ if (GET_CODE (x) == UNSPEC)
-+ {
-+ unsigned int uns_code = XINT (x, 1);
-+
-+ if (uns_code == UNSPEC_FRINTA
-+ || uns_code == UNSPEC_FRINTM
-+ || uns_code == UNSPEC_FRINTN
-+ || uns_code == UNSPEC_FRINTP
-+ || uns_code == UNSPEC_FRINTZ)
-+ x = XVECEXP (x, 0, 0);
-+ }
-+
-+ if (speed)
-+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-+
-+ *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
-+ return true;
-+
-+ case ABS:
-+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+ {
-+ /* FABS and FNEG are analogous. */
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].neg;
-+ }
-+ else
-+ {
-+ /* Integer ABS will either be split to
-+ two arithmetic instructions, or will be an ABS
-+ (scalar), which we don't model. */
-+ *cost = COSTS_N_INSNS (2);
-+ if (speed)
-+ *cost += 2 * extra_cost->alu.arith;
-+ }
-+ return false;
-+
-+ case SMAX:
-+ case SMIN:
-+ if (speed)
-+ {
-+ /* FMAXNM/FMINNM/FMAX/FMIN.
-+ TODO: This may not be accurate for all implementations, but
-+ we do not model this in the cost tables. */
-+ *cost += extra_cost->fp[mode == DFmode].addsub;
-+ }
-+ return false;
-+
-+ case UNSPEC:
-+ /* The floating point round to integer frint* instructions. */
-+ if (aarch64_frint_unspec_p (XINT (x, 1)))
-+ {
-+ if (speed)
-+ *cost += extra_cost->fp[mode == DFmode].roundint;
-+
-+ return false;
-+ }
-+
-+ if (XINT (x, 1) == UNSPEC_RBIT)
-+ {
-+ if (speed)
-+ *cost += extra_cost->alu.rev;
-+
-+ return false;
-+ }
-+ break;
-+
-+ case TRUNCATE:
-+
-+ /* Decompose <su>muldi3_highpart. */
-+ if (/* (truncate:DI */
-+ mode == DImode
-+ /* (lshiftrt:TI */
-+ && GET_MODE (XEXP (x, 0)) == TImode
-+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT
-+ /* (mult:TI */
-+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
-+ /* (ANY_EXTEND:TI (reg:DI))
-+ (ANY_EXTEND:TI (reg:DI))) */
-+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
-+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
-+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
-+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
-+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
-+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
-+ /* (const_int 64) */
-+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
-+ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
-+ {
-+ /* UMULH/SMULH. */
-+ if (speed)
-+ *cost += extra_cost->mult[mode == DImode].extend;
-+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
-+ MULT, 0, speed);
-+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
-+ MULT, 1, speed);
-+ return true;
-+ }
-+
-+ /* Fall through. */
- default:
- break;
- }
-- return false;
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file,
-+ "\nFailed to cost RTX. Assuming default cost.\n");
-+
-+ return true;
- }
-
--static int
--aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
-- enum machine_mode mode ATTRIBUTE_UNUSED,
-- addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
-+/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
-+ calculated for X. This cost is stored in *COST. Returns true
-+ if the total cost of X was calculated. */
-+static bool
-+aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
-+ int param, int *cost, bool speed)
- {
-- enum rtx_code c = GET_CODE (x);
-- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
-+ bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
-
-- if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
-- return addr_cost->pre_modify;
--
-- if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-- return addr_cost->post_modify;
--
-- if (c == PLUS)
-+ if (dump_file && (dump_flags & TDF_DETAILS))
- {
-- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
-- return addr_cost->imm_offset;
-- else if (GET_CODE (XEXP (x, 0)) == MULT
-- || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
-- return addr_cost->register_extend;
--
-- return addr_cost->register_offset;
-+ print_rtl_single (dump_file, x);
-+ fprintf (dump_file, "\n%s cost: %d (%s)\n",
-+ speed ? "Hot" : "Cold",
-+ *cost, result ? "final" : "partial");
- }
-- else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
-- return addr_cost->imm_offset;
-
-- return 0;
-+ return result;
- }
-
- static int
-@@ -4858,6 +6064,13 @@
- const struct cpu_regmove_cost *regmove_cost
- = aarch64_tune_params->regmove_cost;
-
-+ /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
-+ if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
-+ to = GENERAL_REGS;
-+
-+ if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
-+ from = GENERAL_REGS;
-+
- /* Moving between GPR and stack cost is the same as GP2GP. */
- if ((from == GENERAL_REGS && to == STACK_REG)
- || (to == GENERAL_REGS && from == STACK_REG))
-@@ -4880,7 +6093,7 @@
- secondary reload. A general register is used as a scratch to move
- the upper DI value and the lower DI value is moved directly,
- hence the cost is the sum of three moves. */
-- if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
-+ if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 16)
- return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
-
- return regmove_cost->FP2FP;
-@@ -5253,6 +6466,7 @@
- aarch64_tune_flags = selected_tune->flags;
- aarch64_tune = selected_tune->core;
- aarch64_tune_params = selected_tune->tune;
-+ aarch64_architecture_version = selected_cpu->architecture_version;
-
- if (aarch64_fix_a53_err835769 == 2)
- {
-@@ -5998,7 +7212,7 @@
-
- /* We don't save the size into *PRETEND_SIZE because we want to avoid
- any complication of having crtl->args.pretend_args_size changed. */
-- cfun->machine->saved_varargs_size
-+ cfun->machine->frame.saved_varargs_size
- = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
- STACK_BOUNDARY / BITS_PER_UNIT)
- + vr_saved * UNITS_PER_VREG);
-@@ -6685,7 +7899,7 @@
- unsigned HOST_WIDE_INT elpart;
- unsigned int part, parts;
-
-- if (GET_CODE (el) == CONST_INT)
-+ if (CONST_INT_P (el))
- {
- elpart = INTVAL (el);
- parts = 1;
-@@ -6816,30 +8030,6 @@
- #undef CHECK
- }
-
--static bool
--aarch64_const_vec_all_same_int_p (rtx x,
-- HOST_WIDE_INT minval,
-- HOST_WIDE_INT maxval)
--{
-- HOST_WIDE_INT firstval;
-- int count, i;
--
-- if (GET_CODE (x) != CONST_VECTOR
-- || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
-- return false;
--
-- firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
-- if (firstval < minval || firstval > maxval)
-- return false;
--
-- count = CONST_VECTOR_NUNITS (x);
-- for (i = 1; i < count; i++)
-- if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
-- return false;
--
-- return true;
--}
--
- /* Check of immediate shift constants are within range. */
- bool
- aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
-@@ -6846,9 +8036,9 @@
- {
- int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
- if (left)
-- return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
-+ return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
- else
-- return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
-+ return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
- }
-
- /* Return true if X is a uniform vector where all elements
-@@ -6886,7 +8076,7 @@
- && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
- return true;
-
-- if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
-+ if (CONST_INT_P (x))
- return true;
-
- if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
-@@ -6923,17 +8113,43 @@
- return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
- }
-
--/* Construct and return a PARALLEL RTX vector. */
-+/* Construct and return a PARALLEL RTX vector with elements numbering the
-+ lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
-+ the vector - from the perspective of the architecture. This does not
-+ line up with GCC's perspective on lane numbers, so we end up with
-+ different masks depending on our target endian-ness. The diagram
-+ below may help. We must draw the distinction when building masks
-+ which select one half of the vector. An instruction selecting
-+ architectural low-lanes for a big-endian target, must be described using
-+ a mask selecting GCC high-lanes.
-+
-+ Big-Endian Little-Endian
-+
-+GCC 0 1 2 3 3 2 1 0
-+ | x | x | x | x | | x | x | x | x |
-+Architecture 3 2 1 0 3 2 1 0
-+
-+Low Mask: { 2, 3 } { 0, 1 }
-+High Mask: { 0, 1 } { 2, 3 }
-+*/
-+
- rtx
- aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
- {
- int nunits = GET_MODE_NUNITS (mode);
- rtvec v = rtvec_alloc (nunits / 2);
-- int base = high ? nunits / 2 : 0;
-+ int high_base = nunits / 2;
-+ int low_base = 0;
-+ int base;
- rtx t1;
- int i;
-
-- for (i=0; i < nunits / 2; i++)
-+ if (BYTES_BIG_ENDIAN)
-+ base = high ? low_base : high_base;
-+ else
-+ base = high ? high_base : low_base;
-+
-+ for (i = 0; i < nunits / 2; i++)
- RTVEC_ELT (v, i) = GEN_INT (base + i);
-
- t1 = gen_rtx_PARALLEL (mode, v);
-@@ -6940,6 +8156,38 @@
- return t1;
- }
-
-+/* Check OP for validity as a PARALLEL RTX vector with elements
-+ numbering the lanes of either the high (HIGH == TRUE) or low lanes,
-+ from the perspective of the architecture. See the diagram above
-+ aarch64_simd_vect_par_cnst_half for more details. */
-+
-+bool
-+aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
-+ bool high)
-+{
-+ rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
-+ HOST_WIDE_INT count_op = XVECLEN (op, 0);
-+ HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
-+ int i = 0;
-+
-+ if (!VECTOR_MODE_P (mode))
-+ return false;
-+
-+ if (count_op != count_ideal)
-+ return false;
-+
-+ for (i = 0; i < count_ideal; i++)
-+ {
-+ rtx elt_op = XVECEXP (op, 0, i);
-+ rtx elt_ideal = XVECEXP (ideal, 0, i);
-+
-+ if (!CONST_INT_P (elt_op)
-+ || INTVAL (elt_ideal) != INTVAL (elt_op))
-+ return false;
-+ }
-+ return true;
-+}
-+
- /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
- HIGH (exclusive). */
- void
-@@ -6946,7 +8194,7 @@
- aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
- {
- HOST_WIDE_INT lane;
-- gcc_assert (GET_CODE (operand) == CONST_INT);
-+ gcc_assert (CONST_INT_P (operand));
- lane = INTVAL (operand);
-
- if (lane < low || lane >= high)
-@@ -6956,7 +8204,7 @@
- void
- aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
- {
-- gcc_assert (GET_CODE (operand) == CONST_INT);
-+ gcc_assert (CONST_INT_P (operand));
- HOST_WIDE_INT lane = INTVAL (operand);
-
- if (lane < low || lane >= high)
-@@ -6994,7 +8242,7 @@
- aarch64_simd_mem_operand_p (rtx op)
- {
- return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
-- || GET_CODE (XEXP (op, 0)) == REG);
-+ || REG_P (XEXP (op, 0)));
- }
-
- /* Set up OPERANDS for a register copy from SRC to DEST, taking care
-@@ -7647,6 +8895,9 @@
- if (!CONST_DOUBLE_P (x))
- return false;
-
-+ if (GET_MODE (x) == VOIDmode)
-+ return false;
-+
- REAL_VALUE_FROM_CONST_DOUBLE (r, x);
-
- /* We cannot represent infinities, NaNs or +/-zero. We won't
-@@ -7899,20 +9150,26 @@
- aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
- {
- enum machine_mode vmode = GET_MODE (target);
-- unsigned int i, nelt = GET_MODE_NUNITS (vmode);
-+ unsigned int nelt = GET_MODE_NUNITS (vmode);
- bool one_vector_p = rtx_equal_p (op0, op1);
-- rtx rmask[MAX_VECT_LEN], mask;
-+ rtx mask;
-
-- gcc_checking_assert (!BYTES_BIG_ENDIAN);
--
- /* The TBL instruction does not use a modulo index, so we must take care
- of that ourselves. */
-- mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
-- for (i = 0; i < nelt; ++i)
-- rmask[i] = mask;
-- mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
-+ mask = aarch64_simd_gen_const_vector_dup (vmode,
-+ one_vector_p ? nelt - 1 : 2 * nelt - 1);
- sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
-
-+ /* For big-endian, we also need to reverse the index within the vector
-+ (but not which vector). */
-+ if (BYTES_BIG_ENDIAN)
-+ {
-+ /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
-+ if (!one_vector_p)
-+ mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
-+ sel = expand_simple_binop (vmode, XOR, sel, mask,
-+ NULL, 0, OPTAB_LIB_WIDEN);
-+ }
- aarch64_expand_vec_perm_1 (target, op0, op1, sel);
- }
-
-@@ -8171,7 +9428,145 @@
- return true;
- }
-
-+/* Recognize patterns for the EXT insn. */
-+
- static bool
-+aarch64_evpc_ext (struct expand_vec_perm_d *d)
-+{
-+ unsigned int i, nelt = d->nelt;
-+ rtx (*gen) (rtx, rtx, rtx, rtx);
-+ rtx offset;
-+
-+ unsigned int location = d->perm[0]; /* Always < nelt. */
-+
-+ /* Check if the extracted indices are increasing by one. */
-+ for (i = 1; i < nelt; i++)
-+ {
-+ unsigned int required = location + i;
-+ if (d->one_vector_p)
-+ {
-+ /* We'll pass the same vector in twice, so allow indices to wrap. */
-+ required &= (nelt - 1);
-+ }
-+ if (d->perm[i] != required)
-+ return false;
-+ }
-+
-+ switch (d->vmode)
-+ {
-+ case V16QImode: gen = gen_aarch64_extv16qi; break;
-+ case V8QImode: gen = gen_aarch64_extv8qi; break;
-+ case V4HImode: gen = gen_aarch64_extv4hi; break;
-+ case V8HImode: gen = gen_aarch64_extv8hi; break;
-+ case V2SImode: gen = gen_aarch64_extv2si; break;
-+ case V4SImode: gen = gen_aarch64_extv4si; break;
-+ case V2SFmode: gen = gen_aarch64_extv2sf; break;
-+ case V4SFmode: gen = gen_aarch64_extv4sf; break;
-+ case V2DImode: gen = gen_aarch64_extv2di; break;
-+ case V2DFmode: gen = gen_aarch64_extv2df; break;
-+ default:
-+ return false;
-+ }
-+
-+ /* Success! */
-+ if (d->testing_p)
-+ return true;
-+
-+ /* The case where (location == 0) is a no-op for both big- and little-endian,
-+ and is removed by the mid-end at optimization levels -O1 and higher. */
-+
-+ if (BYTES_BIG_ENDIAN && (location != 0))
-+ {
-+ /* After setup, we want the high elements of the first vector (stored
-+ at the LSB end of the register), and the low elements of the second
-+ vector (stored at the MSB end of the register). So swap. */
-+ rtx temp = d->op0;
-+ d->op0 = d->op1;
-+ d->op1 = temp;
-+ /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
-+ location = nelt - location;
-+ }
-+
-+ offset = GEN_INT (location);
-+ emit_insn (gen (d->target, d->op0, d->op1, offset));
-+ return true;
-+}
-+
-+/* Recognize patterns for the REV insns. */
-+
-+static bool
-+aarch64_evpc_rev (struct expand_vec_perm_d *d)
-+{
-+ unsigned int i, j, diff, nelt = d->nelt;
-+ rtx (*gen) (rtx, rtx);
-+
-+ if (!d->one_vector_p)
-+ return false;
-+
-+ diff = d->perm[0];
-+ switch (diff)
-+ {
-+ case 7:
-+ switch (d->vmode)
-+ {
-+ case V16QImode: gen = gen_aarch64_rev64v16qi; break;
-+ case V8QImode: gen = gen_aarch64_rev64v8qi; break;
-+ default:
-+ return false;
-+ }
-+ break;
-+ case 3:
-+ switch (d->vmode)
-+ {
-+ case V16QImode: gen = gen_aarch64_rev32v16qi; break;
-+ case V8QImode: gen = gen_aarch64_rev32v8qi; break;
-+ case V8HImode: gen = gen_aarch64_rev64v8hi; break;
-+ case V4HImode: gen = gen_aarch64_rev64v4hi; break;
-+ default:
-+ return false;
-+ }
-+ break;
-+ case 1:
-+ switch (d->vmode)
-+ {
-+ case V16QImode: gen = gen_aarch64_rev16v16qi; break;
-+ case V8QImode: gen = gen_aarch64_rev16v8qi; break;
-+ case V8HImode: gen = gen_aarch64_rev32v8hi; break;
-+ case V4HImode: gen = gen_aarch64_rev32v4hi; break;
-+ case V4SImode: gen = gen_aarch64_rev64v4si; break;
-+ case V2SImode: gen = gen_aarch64_rev64v2si; break;
-+ case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
-+ case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
-+ default:
-+ return false;
-+ }
-+ break;
-+ default:
-+ return false;
-+ }
-+
-+ for (i = 0; i < nelt ; i += diff + 1)
-+ for (j = 0; j <= diff; j += 1)
-+ {
-+ /* This is guaranteed to be true as the value of diff
-+ is 7, 3, 1 and we should have enough elements in the
-+ queue to generate this. Getting a vector mask with a
-+ value of diff other than these values implies that
-+ something is wrong by the time we get here. */
-+ gcc_assert (i + j < nelt);
-+ if (d->perm[i + j] != i + diff - j)
-+ return false;
-+ }
-+
-+ /* Success! */
-+ if (d->testing_p)
-+ return true;
-+
-+ emit_insn (gen (d->target, d->op0));
-+ return true;
-+}
-+
-+static bool
- aarch64_evpc_dup (struct expand_vec_perm_d *d)
- {
- rtx (*gen) (rtx, rtx, rtx);
-@@ -8181,10 +9576,6 @@
- unsigned int i, elt, nelt = d->nelt;
- rtx lane;
-
-- /* TODO: This may not be big-endian safe. */
-- if (BYTES_BIG_ENDIAN)
-- return false;
--
- elt = d->perm[0];
- for (i = 1; i < nelt; i++)
- {
-@@ -8198,7 +9589,7 @@
- use d->op0 and need not do any extra arithmetic to get the
- correct lane number. */
- in0 = d->op0;
-- lane = GEN_INT (elt);
-+ lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
-
- switch (vmode)
- {
-@@ -8227,11 +9618,6 @@
- enum machine_mode vmode = d->vmode;
- unsigned int i, nelt = d->nelt;
-
-- /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
-- numbering of elements for big-endian, we must reverse the order. */
-- if (BYTES_BIG_ENDIAN)
-- return false;
--
- if (d->testing_p)
- return true;
-
-@@ -8242,7 +9628,15 @@
- return false;
-
- for (i = 0; i < nelt; ++i)
-- rperm[i] = GEN_INT (d->perm[i]);
-+ {
-+ int nunits = GET_MODE_NUNITS (vmode);
-+
-+ /* If big-endian and two vectors we end up with a weird mixed-endian
-+ mode on NEON. Reverse the index within each word but not the word
-+ itself. */
-+ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
-+ : d->perm[i]);
-+ }
- sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
- sel = force_reg (vmode, sel);
-
-@@ -8271,14 +9665,18 @@
-
- if (TARGET_SIMD)
- {
-- if (aarch64_evpc_zip (d))
-+ if (aarch64_evpc_rev (d))
- return true;
-+ else if (aarch64_evpc_ext (d))
-+ return true;
-+ else if (aarch64_evpc_dup (d))
-+ return true;
-+ else if (aarch64_evpc_zip (d))
-+ return true;
- else if (aarch64_evpc_uzp (d))
- return true;
- else if (aarch64_evpc_trn (d))
- return true;
-- else if (aarch64_evpc_dup (d))
-- return true;
- return aarch64_evpc_tbl (d);
- }
- return false;
-@@ -8397,7 +9795,8 @@
- /* Limited combinations of subregs are safe on FPREGs. Particularly,
- 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
- 2. Scalar to Scalar for integer modes or same size float modes.
-- 3. Vector to Vector modes. */
-+ 3. Vector to Vector modes.
-+ 4. On little-endian only, Vector-Structure to Vector modes. */
- if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
- {
- if (aarch64_vector_mode_supported_p (from)
-@@ -8413,11 +9812,215 @@
- if (aarch64_vector_mode_supported_p (from)
- && aarch64_vector_mode_supported_p (to))
- return false;
-+
-+ /* Within an vector structure straddling multiple vector registers
-+ we are in a mixed-endian representation. As such, we can't
-+ easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
-+ switch between vectors and vector structures cheaply. */
-+ if (!BYTES_BIG_ENDIAN)
-+ if ((aarch64_vector_mode_supported_p (from)
-+ && aarch64_vect_struct_mode_p (to))
-+ || (aarch64_vector_mode_supported_p (to)
-+ && aarch64_vect_struct_mode_p (from)))
-+ return false;
- }
-
- return true;
- }
-
-+/* Implement MODES_TIEABLE_P. */
-+
-+bool
-+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
-+{
-+ if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
-+ return true;
-+
-+ /* We specifically want to allow elements of "structure" modes to
-+ be tieable to the structure. This more general condition allows
-+ other rarer situations too. */
-+ if (TARGET_SIMD
-+ && aarch64_vector_mode_p (mode1)
-+ && aarch64_vector_mode_p (mode2))
-+ return true;
-+
-+ return false;
-+}
-+
-+/* Return a new RTX holding the result of moving POINTER forward by
-+ AMOUNT bytes. */
-+
-+static rtx
-+aarch64_move_pointer (rtx pointer, int amount)
-+{
-+ rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
-+
-+ return adjust_automodify_address (pointer, GET_MODE (pointer),
-+ next, amount);
-+}
-+
-+/* Return a new RTX holding the result of moving POINTER forward by the
-+ size of the mode it points to. */
-+
-+static rtx
-+aarch64_progress_pointer (rtx pointer)
-+{
-+ HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
-+
-+ return aarch64_move_pointer (pointer, amount);
-+}
-+
-+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
-+ MODE bytes. */
-+
-+static void
-+aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
-+ enum machine_mode mode)
-+{
-+ rtx reg = gen_reg_rtx (mode);
-+
-+ /* "Cast" the pointers to the correct mode. */
-+ *src = adjust_address (*src, mode, 0);
-+ *dst = adjust_address (*dst, mode, 0);
-+ /* Emit the memcpy. */
-+ emit_move_insn (reg, *src);
-+ emit_move_insn (*dst, reg);
-+ /* Move the pointers forward. */
-+ *src = aarch64_progress_pointer (*src);
-+ *dst = aarch64_progress_pointer (*dst);
-+}
-+
-+/* Expand movmem, as if from a __builtin_memcpy. Return true if
-+ we succeed, otherwise return false. */
-+
-+bool
-+aarch64_expand_movmem (rtx *operands)
-+{
-+ unsigned int n;
-+ rtx dst = operands[0];
-+ rtx src = operands[1];
-+ rtx base;
-+ bool speed_p = !optimize_function_for_size_p (cfun);
-+
-+ /* When optimizing for size, give a better estimate of the length of a
-+ memcpy call, but use the default otherwise. */
-+ unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
-+
-+ /* We can't do anything smart if the amount to copy is not constant. */
-+ if (!CONST_INT_P (operands[2]))
-+ return false;
-+
-+ n = UINTVAL (operands[2]);
-+
-+ /* Try to keep the number of instructions low. For cases below 16 bytes we
-+ need to make at most two moves. For cases above 16 bytes it will be one
-+ move for each 16 byte chunk, then at most two additional moves. */
-+ if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
-+ return false;
-+
-+ base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
-+ dst = adjust_automodify_address (dst, VOIDmode, base, 0);
-+
-+ base = copy_to_mode_reg (Pmode, XEXP (src, 0));
-+ src = adjust_automodify_address (src, VOIDmode, base, 0);
-+
-+ /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
-+ 1-byte chunk. */
-+ if (n < 4)
-+ {
-+ if (n >= 2)
-+ {
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-+ n -= 2;
-+ }
-+
-+ if (n == 1)
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
-+
-+ return true;
-+ }
-+
-+ /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
-+ 4-byte chunk, partially overlapping with the previously copied chunk. */
-+ if (n < 8)
-+ {
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+ n -= 4;
-+ if (n > 0)
-+ {
-+ int move = n - 4;
-+
-+ src = aarch64_move_pointer (src, move);
-+ dst = aarch64_move_pointer (dst, move);
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+ }
-+ return true;
-+ }
-+
-+ /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
-+ them, then (if applicable) an 8-byte chunk. */
-+ while (n >= 8)
-+ {
-+ if (n / 16)
-+ {
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
-+ n -= 16;
-+ }
-+ else
-+ {
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
-+ n -= 8;
-+ }
-+ }
-+
-+ /* Finish the final bytes of the copy. We can always do this in one
-+ instruction. We either copy the exact amount we need, or partially
-+ overlap with the previous chunk we copied and copy 8-bytes. */
-+ if (n == 0)
-+ return true;
-+ else if (n == 1)
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
-+ else if (n == 2)
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-+ else if (n == 4)
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+ else
-+ {
-+ if (n == 3)
-+ {
-+ src = aarch64_move_pointer (src, -1);
-+ dst = aarch64_move_pointer (dst, -1);
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+ }
-+ else
-+ {
-+ int move = n - 8;
-+
-+ src = aarch64_move_pointer (src, move);
-+ dst = aarch64_move_pointer (dst, move);
-+ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
-+ }
-+ }
-+
-+ return true;
-+}
-+
-+static bool
-+aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int align,
-+ enum by_pieces_operation op,
-+ bool speed_p)
-+{
-+ /* STORE_BY_PIECES can be used when copying a constant string, but
-+ in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
-+ For now we always fail this and let the move_by_pieces code copy
-+ the string from read-only memory. */
-+ if (op == STORE_BY_PIECES)
-+ return false;
-+
-+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
-+}
-+
- #undef TARGET_ADDRESS_COST
- #define TARGET_ADDRESS_COST aarch64_address_cost
-
-@@ -8588,7 +10191,7 @@
- #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
-
- #undef TARGET_RTX_COSTS
--#define TARGET_RTX_COSTS aarch64_rtx_costs
-+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
-
- #undef TARGET_SCHED_ISSUE_RATE
- #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
-@@ -8626,6 +10229,10 @@
- #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
- aarch64_autovectorize_vector_sizes
-
-+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
-+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
-+ aarch64_atomic_assign_expand_fenv
-+
- /* Section anchor support. */
-
- #undef TARGET_MIN_ANCHOR_OFFSET
-@@ -8654,6 +10261,19 @@
- #undef TARGET_FIXED_CONDITION_CODE_REGS
- #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
-
-+#undef TARGET_FLAGS_REGNUM
-+#define TARGET_FLAGS_REGNUM CC_REGNUM
-+
-+#undef TARGET_LEGITIMIZE_ADDRESS
-+#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
-+
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+ aarch64_use_by_pieces_infrastructure_p
-+
-+#undef TARGET_CAN_USE_DOLOOP_P
-+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
-
- #include "gt-aarch64.h"
---- a/src/gcc/config/aarch64/aarch64-elf-raw.h
-+++ b/src/gcc/config/aarch64/aarch64-elf-raw.h
-@@ -23,7 +23,9 @@
- #define GCC_AARCH64_ELF_RAW_H
-
- #define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
--#define ENDFILE_SPEC " crtend%O%s crtn%O%s"
-+#define ENDFILE_SPEC \
-+ " crtend%O%s crtn%O%s " \
-+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
-
- #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
- #define CA53_ERR_835769_SPEC \
---- a/src/gcc/config/aarch64/aarch64-linux.h
-+++ b/src/gcc/config/aarch64/aarch64-linux.h
-@@ -21,7 +21,7 @@
- #ifndef GCC_AARCH64_LINUX_H
- #define GCC_AARCH64_LINUX_H
-
--#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
-+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
-
- #define CPP_SPEC "%{pthread:-D_REENTRANT}"
-
-@@ -33,7 +33,7 @@
- -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \
- -X \
- %{mbig-endian:-EB} %{mlittle-endian:-EL} \
-- -maarch64linux%{mbig-endian:b}"
-+ -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}"
-
- #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
- #define CA53_ERR_835769_SPEC \
-@@ -46,6 +46,14 @@
- #define LINK_SPEC LINUX_TARGET_LINK_SPEC \
- CA53_ERR_835769_SPEC
-
-+#define GNU_USER_TARGET_MATHFILE_SPEC \
-+ "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
-+
-+#undef ENDFILE_SPEC
-+#define ENDFILE_SPEC \
-+ GNU_USER_TARGET_MATHFILE_SPEC " " \
-+ GNU_USER_TARGET_ENDFILE_SPEC
-+
- #define TARGET_OS_CPP_BUILTINS() \
- do \
- { \
---- a/src/gcc/config/aarch64/iterators.md
-+++ b/src/gcc/config/aarch64/iterators.md
-@@ -95,6 +95,9 @@
- ;; Vector Float modes.
- (define_mode_iterator VDQF [V2SF V4SF V2DF])
-
-+;; Vector Float modes, and DF.
-+(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
-+
- ;; Vector single Float modes.
- (define_mode_iterator VDQSF [V2SF V4SF])
-
-@@ -156,6 +159,9 @@
- ;; Vector modes for H and S types.
- (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
-
-+;; Vector modes for H, S and D types.
-+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
-+
- ;; Vector modes for Q, H and S types.
- (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
-
-@@ -273,6 +279,10 @@
- UNSPEC_UZP2 ; Used in vector permute patterns.
- UNSPEC_TRN1 ; Used in vector permute patterns.
- UNSPEC_TRN2 ; Used in vector permute patterns.
-+ UNSPEC_EXT ; Used in aarch64-simd.md.
-+ UNSPEC_REV64 ; Used in vector reverse patterns (permute).
-+ UNSPEC_REV32 ; Used in vector reverse patterns (permute).
-+ UNSPEC_REV16 ; Used in vector reverse patterns (permute).
- UNSPEC_AESE ; Used in aarch64-simd.md.
- UNSPEC_AESD ; Used in aarch64-simd.md.
- UNSPEC_AESMC ; Used in aarch64-simd.md.
-@@ -299,6 +309,10 @@
- ;; 32-bit version and "%x0" in the 64-bit version.
- (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
-
-+;; For inequal width int to float conversion
-+(define_mode_attr w1 [(SF "w") (DF "x")])
-+(define_mode_attr w2 [(SF "x") (DF "w")])
-+
- ;; For constraints used in scalar immediate vector moves
- (define_mode_attr hq [(HI "h") (QI "q")])
-
-@@ -348,6 +362,9 @@
- ;; Attribute to describe constants acceptable in logical operations
- (define_mode_attr lconst [(SI "K") (DI "L")])
-
-+;; Attribute to describe constants acceptable in atomic logical operations
-+(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")])
-+
- ;; Map a mode to a specific constraint character.
- (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
-
-@@ -358,6 +375,9 @@
- (V2DI "2d") (V2SF "2s")
- (V4SF "4s") (V2DF "2d")])
-
-+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
-+ (V4SI "32") (V2DI "64")])
-+
- (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
- (V4HI ".4h") (V8HI ".8h")
- (V2SI ".2s") (V4SI ".4s")
-@@ -552,13 +572,43 @@
-
- (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
-
-+;; Mode of pair of elements for each vector mode, to define transfer
-+;; size for structure lane/dup loads and stores.
-+(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI")
-+ (V4HI "SI") (V8HI "SI")
-+ (V2SI "V2SI") (V4SI "V2SI")
-+ (DI "V2DI") (V2DI "V2DI")
-+ (V2SF "V2SF") (V4SF "V2SF")
-+ (DF "V2DI") (V2DF "V2DI")])
-+
-+;; Similar, for three elements.
-+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK")
-+ (V4HI "BLK") (V8HI "BLK")
-+ (V2SI "BLK") (V4SI "BLK")
-+ (DI "EI") (V2DI "EI")
-+ (V2SF "BLK") (V4SF "BLK")
-+ (DF "EI") (V2DF "EI")])
-+
-+;; Similar, for four elements.
-+(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI")
-+ (V4HI "V4HI") (V8HI "V4HI")
-+ (V2SI "V4SI") (V4SI "V4SI")
-+ (DI "OI") (V2DI "OI")
-+ (V2SF "V4SF") (V4SF "V4SF")
-+ (DF "OI") (V2DF "OI")])
-+
-+
- ;; Mode for atomic operation suffixes
- (define_mode_attr atomic_sfx
- [(QI "b") (HI "h") (SI "") (DI "")])
-
--(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
--(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
-+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")])
-+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")])
-
-+;; for the inequal width integer to fp conversions
-+(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
-+(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
-+
- (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
- (V4HI "V8HI") (V8HI "V4HI")
- (V2SI "V4SI") (V4SI "V2SI")
-@@ -853,6 +903,8 @@
- UNSPEC_TRN1 UNSPEC_TRN2
- UNSPEC_UZP1 UNSPEC_UZP2])
-
-+(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16])
-+
- (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
- UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
- UNSPEC_FRINTA])
-@@ -862,6 +914,10 @@
-
- (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
-
-+(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
-+ UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH
-+ UNSPEC_CRC32CW UNSPEC_CRC32CX])
-+
- (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD])
- (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC])
-
-@@ -980,6 +1036,10 @@
- (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
- (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
-
-+; op code for REV instructions (size within which elements are reversed).
-+(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
-+ (UNSPEC_REV16 "16")])
-+
- (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
- (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
- (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
-@@ -986,6 +1046,16 @@
-
- (define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
-
-+(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
-+ (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x")
-+ (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch")
-+ (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")])
-+
-+(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI")
-+ (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI")
-+ (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI")
-+ (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")])
-+
- (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")])
- (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")])
-
---- a/src/gcc/config/aarch64/aarch64.h
-+++ b/src/gcc/config/aarch64/aarch64.h
-@@ -26,14 +26,48 @@
- #define TARGET_CPU_CPP_BUILTINS() \
- do \
- { \
-- builtin_define ("__aarch64__"); \
-+ builtin_define ("__aarch64__"); \
-+ builtin_define ("__ARM_64BIT_STATE"); \
-+ builtin_define_with_int_value \
-+ ("__ARM_ARCH", aarch64_architecture_version); \
-+ cpp_define_formatted \
-+ (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version); \
-+ builtin_define ("__ARM_ARCH_ISA_A64"); \
-+ builtin_define_with_int_value \
-+ ("__ARM_ARCH_PROFILE", 'A'); \
-+ builtin_define ("__ARM_FEATURE_CLZ"); \
-+ builtin_define ("__ARM_FEATURE_IDIV"); \
-+ builtin_define ("__ARM_FEATURE_UNALIGNED"); \
-+ if (flag_unsafe_math_optimizations) \
-+ builtin_define ("__ARM_FP_FAST"); \
-+ builtin_define ("__ARM_PCS_AAPCS64"); \
-+ builtin_define_with_int_value \
-+ ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); \
-+ builtin_define_with_int_value \
-+ ("__ARM_SIZEOF_MINIMAL_ENUM", \
-+ flag_short_enums? 1 : 4); \
- if (TARGET_BIG_END) \
-- builtin_define ("__AARCH64EB__"); \
-+ { \
-+ builtin_define ("__AARCH64EB__"); \
-+ builtin_define ("__ARM_BIG_ENDIAN"); \
-+ } \
- else \
- builtin_define ("__AARCH64EL__"); \
- \
-- if (TARGET_SIMD) \
-- builtin_define ("__ARM_NEON"); \
-+ if (TARGET_FLOAT) \
-+ { \
-+ builtin_define ("__ARM_FEATURE_FMA"); \
-+ builtin_define_with_int_value ("__ARM_FP", 0x0C); \
-+ } \
-+ if (TARGET_SIMD) \
-+ { \
-+ builtin_define ("__ARM_FEATURE_NUMERIC_MAXMIN"); \
-+ builtin_define ("__ARM_NEON"); \
-+ builtin_define_with_int_value ("__ARM_NEON_FP", 0x0C);\
-+ } \
-+ \
-+ if (TARGET_CRC32) \
-+ builtin_define ("__ARM_FEATURE_CRC32"); \
- \
- switch (aarch64_cmodel) \
- { \
-@@ -155,6 +189,8 @@
-
- #define PCC_BITFIELD_TYPE_MATTERS 1
-
-+/* Major revision number of the ARM Architecture implemented by the target. */
-+extern unsigned aarch64_architecture_version;
-
- /* Instruction tuning/selection flags. */
-
-@@ -188,6 +224,9 @@
- /* Crypto is an optional extension to AdvSIMD. */
- #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
-
-+/* CRC instructions that can be enabled through +crc arch extension. */
-+#define TARGET_CRC32 (AARCH64_ISA_CRC)
-+
- /* Standard register usage. */
-
- /* 31 64-bit general purpose registers R0-R30:
-@@ -365,8 +404,7 @@
-
- #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE)
-
--#define MODES_TIEABLE_P(MODE1, MODE2) \
-- (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
-+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2)
-
- #define DWARF2_UNWIND_INFO 1
-
-@@ -409,7 +447,7 @@
- enum reg_class
- {
- NO_REGS,
-- CORE_REGS,
-+ CALLER_SAVE_REGS,
- GENERAL_REGS,
- STACK_REG,
- POINTER_REGS,
-@@ -424,7 +462,7 @@
- #define REG_CLASS_NAMES \
- { \
- "NO_REGS", \
-- "CORE_REGS", \
-+ "CALLER_SAVE_REGS", \
- "GENERAL_REGS", \
- "STACK_REG", \
- "POINTER_REGS", \
-@@ -436,7 +474,7 @@
- #define REG_CLASS_CONTENTS \
- { \
- { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \
-- { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \
-+ { 0x0007ffff, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
- { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \
- { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \
- { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \
-@@ -447,7 +485,7 @@
-
- #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO)
-
--#define INDEX_REG_CLASS CORE_REGS
-+#define INDEX_REG_CLASS GENERAL_REGS
- #define BASE_REG_CLASS POINTER_REGS
-
- /* Register pairs used to eliminate unneeded registers that point into
-@@ -524,13 +562,33 @@
- struct GTY (()) aarch64_frame
- {
- HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
-+
-+ /* The number of extra stack bytes taken up by register varargs.
-+ This area is allocated by the callee at the very top of the
-+ frame. This value is rounded up to a multiple of
-+ STACK_BOUNDARY. */
-+ HOST_WIDE_INT saved_varargs_size;
-+
- HOST_WIDE_INT saved_regs_size;
- /* Padding if needed after the all the callee save registers have
- been saved. */
- HOST_WIDE_INT padding0;
- HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */
-- HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */
-
-+ /* Offset from the base of the frame (incomming SP) to the
-+ hard_frame_pointer. This value is always a multiple of
-+ STACK_BOUNDARY. */
-+ HOST_WIDE_INT hard_fp_offset;
-+
-+ /* The size of the frame. This value is the offset from base of the
-+ * frame (incomming SP) to the stack_pointer. This value is always
-+ * a multiple of STACK_BOUNDARY. */
-+
-+ unsigned wb_candidate1;
-+ unsigned wb_candidate2;
-+
-+ HOST_WIDE_INT frame_size;
-+
- bool laid_out;
- };
-
-@@ -537,11 +595,6 @@
- typedef struct GTY (()) machine_function
- {
- struct aarch64_frame frame;
--
-- /* The number of extra stack bytes taken up by register varargs.
-- This area is allocated by the callee at the very top of the frame. */
-- HOST_WIDE_INT saved_varargs_size;
--
- } machine_function;
- #endif
-
-@@ -565,11 +618,7 @@
- };
-
-
--extern enum arm_pcs arm_pcs_variant;
-
--#ifndef ARM_DEFAULT_PCS
--#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
--#endif
-
- /* We can't use enum machine_mode inside a generator file because it
- hasn't been created yet; we shouldn't be using any code that
-@@ -670,12 +719,14 @@
- /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */
- #define AARCH64_CALL_RATIO 8
-
--/* When optimizing for size, give a better estimate of the length of a memcpy
-- call, but use the default otherwise. But move_by_pieces_ninsns() counts
-- memory-to-memory moves, and we'll have to generate a load & store for each,
-- so halve the value to take that into account. */
-+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
-+ move_by_pieces will continually copy the largest safe chunks. So a
-+ 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient
-+ for both size and speed of copy, so we will instead use the "movmem"
-+ standard name to implement the copy. This logic does not apply when
-+ targeting -mstrict-align, so keep a sensible default in that case. */
- #define MOVE_RATIO(speed) \
-- (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)
-+ (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2))
-
- /* For CLEAR_RATIO, when optimizing for size, give a better estimate
- of the length of a memset call, but use the default otherwise. */
-@@ -688,12 +739,6 @@
- #define SET_RATIO(speed) \
- ((speed) ? 15 : AARCH64_CALL_RATIO - 2)
-
--/* STORE_BY_PIECES_P can be used when copying a constant string, but
-- in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
-- For now we always fail this and let the move_by_pieces code copy
-- the string from read-only memory. */
--#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
--
- /* Disable auto-increment in move_by_pieces et al. Use of auto-increment is
- rarely a good idea in straight-line code since it adds an extra address
- dependency between each instruction. Better to use incrementing offsets. */
-@@ -835,6 +880,11 @@
-
- #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD
-
-+/* Choose appropriate mode for caller saves, so we do the minimum
-+ required size of load/store. */
-+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
-+ aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE))
-+
- /* Callee only saves lower 64-bits of a 128-bit register. Tell the
- compiler the callee clobbers the top 64-bits when restoring the
- bottom 64-bits. */
---- a/src/gcc/config/arc/arc.c
-+++ b/src/gcc/config/arc/arc.c
-@@ -398,6 +398,11 @@
-
- static bool arc_frame_pointer_required (void);
-
-+static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+ unsigned int,
-+ enum by_pieces_operation op,
-+ bool);
-+
- /* Implements target hook vector_mode_supported_p. */
-
- static bool
-@@ -512,6 +517,10 @@
- #undef TARGET_DELEGITIMIZE_ADDRESS
- #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
-
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+ arc_use_by_pieces_infrastructure_p
-+
- /* Usually, we will be able to scale anchor offsets.
- When this fails, we want LEGITIMIZE_ADDRESS to kick in. */
- #undef TARGET_MIN_ANCHOR_OFFSET
-@@ -9355,6 +9364,21 @@
- return false;
- }
-
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
-+
-+static bool
-+arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int align,
-+ enum by_pieces_operation op,
-+ bool speed_p)
-+{
-+ /* Let the movmem expander handle small block moves. */
-+ if (op == MOVE_BY_PIECES)
-+ return false;
-+
-+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
-+}
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
-
- #include "gt-arc.h"
---- a/src/gcc/config/arc/arc.h
-+++ b/src/gcc/config/arc/arc.h
-@@ -1553,12 +1553,6 @@
- in one reasonably fast instruction. */
- #define MOVE_MAX 4
-
--/* Let the movmem expander handle small block moves. */
--#define MOVE_BY_PIECES_P(LEN, ALIGN) 0
--#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \
-- (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
-- < (unsigned int) MOVE_RATIO (!optimize_size))
--
- /* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */
- #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3)
-
---- a/src/gcc/config/arm/aarch-cost-tables.h
-+++ b/src/gcc/config/arm/aarch-cost-tables.h
-@@ -39,6 +39,7 @@
- 0, /* bfi. */
- 0, /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- COSTS_N_INSNS (1), /* non_exec. */
- false /* non_exec_costs_exec. */
- },
-@@ -139,6 +140,7 @@
- COSTS_N_INSNS (1), /* bfi. */
- COSTS_N_INSNS (1), /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
-@@ -239,6 +241,7 @@
- COSTS_N_INSNS (1), /* bfi. */
- 0, /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
---- a/src/gcc/config/arm/cortex-a15.md
-+++ b/src/gcc/config/arm/cortex-a15.md
-@@ -64,7 +64,7 @@
- (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,rev,\
-+ adr,bfm,clz,rbit,rev,\
- shift_imm,shift_reg,\
- mov_imm,mov_reg,\
- mvn_imm,mvn_reg,\
-@@ -72,11 +72,14 @@
- "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
-
- ;; ALU ops with immediate shift
-+;; crc is also included here so that appropriate scheduling of CRC32 ARMv8-A
-+;; instructions can be performed when tuning for the Cortex-A57 since that
-+;; core reuses the Cortex-A15 pipeline description for the moment.
- (define_insn_reservation "cortex_a15_alu_shift" 3
- (and (eq_attr "tune" "cortexa15")
- (eq_attr "type" "extend,\
- alu_shift_imm,alus_shift_imm,\
-- logic_shift_imm,logics_shift_imm,\
-+ crc,logic_shift_imm,logics_shift_imm,\
- mov_shift,mvn_shift"))
- "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
- |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
---- a/src/gcc/config/arm/arm-tables.opt
-+++ b/src/gcc/config/arm/arm-tables.opt
-@@ -274,6 +274,9 @@
- Enum(processor_type) String(cortex-r7) Value(cortexr7)
-
- EnumValue
-+Enum(processor_type) String(cortex-m7) Value(cortexm7)
-+
-+EnumValue
- Enum(processor_type) String(cortex-m4) Value(cortexm4)
-
- EnumValue
-@@ -423,17 +426,23 @@
- Enum(arm_fpu) String(fpv4-sp-d16) Value(11)
-
- EnumValue
--Enum(arm_fpu) String(neon-vfpv4) Value(12)
-+Enum(arm_fpu) String(fpv5-sp-d16) Value(12)
-
- EnumValue
--Enum(arm_fpu) String(fp-armv8) Value(13)
-+Enum(arm_fpu) String(fpv5-d16) Value(13)
-
- EnumValue
--Enum(arm_fpu) String(neon-fp-armv8) Value(14)
-+Enum(arm_fpu) String(neon-vfpv4) Value(14)
-
- EnumValue
--Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15)
-+Enum(arm_fpu) String(fp-armv8) Value(15)
-
- EnumValue
--Enum(arm_fpu) String(vfp3) Value(16)
-+Enum(arm_fpu) String(neon-fp-armv8) Value(16)
-
-+EnumValue
-+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(17)
-+
-+EnumValue
-+Enum(arm_fpu) String(vfp3) Value(18)
-+
---- a/src/gcc/config/arm/thumb2.md
-+++ b/src/gcc/config/arm/thumb2.md
-@@ -329,7 +329,7 @@
- movw%?\\t%0, %L1\\t%@ movhi
- str%(h%)\\t%1, %0\\t%@ movhi
- ldr%(h%)\\t%0, %1\\t%@ movhi"
-- [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1")
-+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1")
- (set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "yes,no,yes,no,no,no")
- (set_attr "length" "2,4,2,4,4,4")
-@@ -1370,6 +1370,103 @@
- (set_attr "type" "alu_reg")]
- )
-
-+; Constants for op 2 will never be given to these patterns.
-+(define_insn_and_split "*iordi_notdi_di"
-+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+ (ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r"))
-+ (match_operand:DI 2 "s_register_operand" "r,0")))]
-+ "TARGET_THUMB2"
-+ "#"
-+ "TARGET_THUMB2 && reload_completed"
-+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2)))
-+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
-+ "
-+ {
-+ operands[3] = gen_highpart (SImode, operands[0]);
-+ operands[0] = gen_lowpart (SImode, operands[0]);
-+ operands[4] = gen_highpart (SImode, operands[1]);
-+ operands[1] = gen_lowpart (SImode, operands[1]);
-+ operands[5] = gen_highpart (SImode, operands[2]);
-+ operands[2] = gen_lowpart (SImode, operands[2]);
-+ }"
-+ [(set_attr "length" "8")
-+ (set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notzesidi_di"
-+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+ (ior:DI (not:DI (zero_extend:DI
-+ (match_operand:SI 2 "s_register_operand" "r,r")))
-+ (match_operand:DI 1 "s_register_operand" "0,?r")))]
-+ "TARGET_THUMB2"
-+ "#"
-+ ; (not (zero_extend...)) means operand0 will always be 0xffffffff
-+ "TARGET_THUMB2 && reload_completed"
-+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+ (set (match_dup 3) (const_int -1))]
-+ "
-+ {
-+ operands[3] = gen_highpart (SImode, operands[0]);
-+ operands[0] = gen_lowpart (SImode, operands[0]);
-+ operands[1] = gen_lowpart (SImode, operands[1]);
-+ }"
-+ [(set_attr "length" "4,8")
-+ (set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notdi_zesidi"
-+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
-+ (zero_extend:DI
-+ (match_operand:SI 1 "s_register_operand" "r,r"))))]
-+ "TARGET_THUMB2"
-+ "#"
-+ "TARGET_THUMB2 && reload_completed"
-+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+ (set (match_dup 3) (not:SI (match_dup 4)))]
-+ "
-+ {
-+ operands[3] = gen_highpart (SImode, operands[0]);
-+ operands[0] = gen_lowpart (SImode, operands[0]);
-+ operands[1] = gen_lowpart (SImode, operands[1]);
-+ operands[4] = gen_highpart (SImode, operands[2]);
-+ operands[2] = gen_lowpart (SImode, operands[2]);
-+ }"
-+ [(set_attr "length" "8")
-+ (set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notsesidi_di"
-+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+ (ior:DI (not:DI (sign_extend:DI
-+ (match_operand:SI 2 "s_register_operand" "r,r")))
-+ (match_operand:DI 1 "s_register_operand" "0,r")))]
-+ "TARGET_THUMB2"
-+ "#"
-+ "TARGET_THUMB2 && reload_completed"
-+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+ (set (match_dup 3) (ior:SI (not:SI
-+ (ashiftrt:SI (match_dup 2) (const_int 31)))
-+ (match_dup 4)))]
-+ "
-+ {
-+ operands[3] = gen_highpart (SImode, operands[0]);
-+ operands[0] = gen_lowpart (SImode, operands[0]);
-+ operands[4] = gen_highpart (SImode, operands[1]);
-+ operands[1] = gen_lowpart (SImode, operands[1]);
-+ }"
-+ [(set_attr "length" "8")
-+ (set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "type" "multiple")]
-+)
-+
- (define_insn "*orsi_notsi_si"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
---- a/src/gcc/config/arm/arm.c
-+++ b/src/gcc/config/arm/arm.c
-@@ -50,6 +50,7 @@
- #include "except.h"
- #include "tm_p.h"
- #include "target.h"
-+#include "sched-int.h"
- #include "target-def.h"
- #include "debug.h"
- #include "langhooks.h"
-@@ -59,6 +60,7 @@
- #include "params.h"
- #include "opts.h"
- #include "dumpfile.h"
-+#include "gimple-expr.h"
-
- /* Forward definitions of types. */
- typedef struct minipool_node Mnode;
-@@ -93,6 +95,7 @@
- static bool thumb_force_lr_save (void);
- static unsigned arm_size_return_regs (void);
- static bool arm_assemble_integer (rtx, unsigned int, int);
-+static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
- static void arm_print_operand (FILE *, rtx, int);
- static void arm_print_operand_address (FILE *, rtx);
- static bool arm_print_operand_punct_valid_p (unsigned char code);
-@@ -584,6 +587,9 @@
- #undef TARGET_MANGLE_TYPE
- #define TARGET_MANGLE_TYPE arm_mangle_type
-
-+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
-+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
-+
- #undef TARGET_BUILD_BUILTIN_VA_LIST
- #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
- #undef TARGET_EXPAND_BUILTIN_VA_START
-@@ -985,6 +991,7 @@
- COSTS_N_INSNS (1), /* bfi. */
- COSTS_N_INSNS (1), /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
-@@ -1068,7 +1075,210 @@
- }
- };
-
-+const struct cpu_cost_table cortexa8_extra_costs =
-+{
-+ /* ALU */
-+ {
-+ 0, /* arith. */
-+ 0, /* logical. */
-+ COSTS_N_INSNS (1), /* shift. */
-+ 0, /* shift_reg. */
-+ COSTS_N_INSNS (1), /* arith_shift. */
-+ 0, /* arith_shift_reg. */
-+ COSTS_N_INSNS (1), /* log_shift. */
-+ 0, /* log_shift_reg. */
-+ 0, /* extend. */
-+ 0, /* extend_arith. */
-+ 0, /* bfi. */
-+ 0, /* bfx. */
-+ 0, /* clz. */
-+ 0, /* rev. */
-+ 0, /* non_exec. */
-+ true /* non_exec_costs_exec. */
-+ },
-+ {
-+ /* MULT SImode */
-+ {
-+ COSTS_N_INSNS (1), /* simple. */
-+ COSTS_N_INSNS (1), /* flag_setting. */
-+ COSTS_N_INSNS (1), /* extend. */
-+ COSTS_N_INSNS (1), /* add. */
-+ COSTS_N_INSNS (1), /* extend_add. */
-+ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
-+ },
-+ /* MULT DImode */
-+ {
-+ 0, /* simple (N/A). */
-+ 0, /* flag_setting (N/A). */
-+ COSTS_N_INSNS (2), /* extend. */
-+ 0, /* add (N/A). */
-+ COSTS_N_INSNS (2), /* extend_add. */
-+ 0 /* idiv (N/A). */
-+ }
-+ },
-+ /* LD/ST */
-+ {
-+ COSTS_N_INSNS (1), /* load. */
-+ COSTS_N_INSNS (1), /* load_sign_extend. */
-+ COSTS_N_INSNS (1), /* ldrd. */
-+ COSTS_N_INSNS (1), /* ldm_1st. */
-+ 1, /* ldm_regs_per_insn_1st. */
-+ 2, /* ldm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (1), /* loadf. */
-+ COSTS_N_INSNS (1), /* loadd. */
-+ COSTS_N_INSNS (1), /* load_unaligned. */
-+ COSTS_N_INSNS (1), /* store. */
-+ COSTS_N_INSNS (1), /* strd. */
-+ COSTS_N_INSNS (1), /* stm_1st. */
-+ 1, /* stm_regs_per_insn_1st. */
-+ 2, /* stm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (1), /* storef. */
-+ COSTS_N_INSNS (1), /* stored. */
-+ COSTS_N_INSNS (1) /* store_unaligned. */
-+ },
-+ {
-+ /* FP SFmode */
-+ {
-+ COSTS_N_INSNS (36), /* div. */
-+ COSTS_N_INSNS (11), /* mult. */
-+ COSTS_N_INSNS (20), /* mult_addsub. */
-+ COSTS_N_INSNS (30), /* fma. */
-+ COSTS_N_INSNS (9), /* addsub. */
-+ COSTS_N_INSNS (3), /* fpconst. */
-+ COSTS_N_INSNS (3), /* neg. */
-+ COSTS_N_INSNS (6), /* compare. */
-+ COSTS_N_INSNS (4), /* widen. */
-+ COSTS_N_INSNS (4), /* narrow. */
-+ COSTS_N_INSNS (8), /* toint. */
-+ COSTS_N_INSNS (8), /* fromint. */
-+ COSTS_N_INSNS (8) /* roundint. */
-+ },
-+ /* FP DFmode */
-+ {
-+ COSTS_N_INSNS (64), /* div. */
-+ COSTS_N_INSNS (16), /* mult. */
-+ COSTS_N_INSNS (25), /* mult_addsub. */
-+ COSTS_N_INSNS (30), /* fma. */
-+ COSTS_N_INSNS (9), /* addsub. */
-+ COSTS_N_INSNS (3), /* fpconst. */
-+ COSTS_N_INSNS (3), /* neg. */
-+ COSTS_N_INSNS (6), /* compare. */
-+ COSTS_N_INSNS (6), /* widen. */
-+ COSTS_N_INSNS (6), /* narrow. */
-+ COSTS_N_INSNS (8), /* toint. */
-+ COSTS_N_INSNS (8), /* fromint. */
-+ COSTS_N_INSNS (8) /* roundint. */
-+ }
-+ },
-+ /* Vector */
-+ {
-+ COSTS_N_INSNS (1) /* alu. */
-+ }
-+};
-
-+const struct cpu_cost_table cortexa5_extra_costs =
-+{
-+ /* ALU */
-+ {
-+ 0, /* arith. */
-+ 0, /* logical. */
-+ COSTS_N_INSNS (1), /* shift. */
-+ COSTS_N_INSNS (1), /* shift_reg. */
-+ COSTS_N_INSNS (1), /* arith_shift. */
-+ COSTS_N_INSNS (1), /* arith_shift_reg. */
-+ COSTS_N_INSNS (1), /* log_shift. */
-+ COSTS_N_INSNS (1), /* log_shift_reg. */
-+ COSTS_N_INSNS (1), /* extend. */
-+ COSTS_N_INSNS (1), /* extend_arith. */
-+ COSTS_N_INSNS (1), /* bfi. */
-+ COSTS_N_INSNS (1), /* bfx. */
-+ COSTS_N_INSNS (1), /* clz. */
-+ COSTS_N_INSNS (1), /* rev. */
-+ 0, /* non_exec. */
-+ true /* non_exec_costs_exec. */
-+ },
-+
-+ {
-+ /* MULT SImode */
-+ {
-+ 0, /* simple. */
-+ COSTS_N_INSNS (1), /* flag_setting. */
-+ COSTS_N_INSNS (1), /* extend. */
-+ COSTS_N_INSNS (1), /* add. */
-+ COSTS_N_INSNS (1), /* extend_add. */
-+ COSTS_N_INSNS (7) /* idiv. */
-+ },
-+ /* MULT DImode */
-+ {
-+ 0, /* simple (N/A). */
-+ 0, /* flag_setting (N/A). */
-+ COSTS_N_INSNS (1), /* extend. */
-+ 0, /* add. */
-+ COSTS_N_INSNS (2), /* extend_add. */
-+ 0 /* idiv (N/A). */
-+ }
-+ },
-+ /* LD/ST */
-+ {
-+ COSTS_N_INSNS (1), /* load. */
-+ COSTS_N_INSNS (1), /* load_sign_extend. */
-+ COSTS_N_INSNS (6), /* ldrd. */
-+ COSTS_N_INSNS (1), /* ldm_1st. */
-+ 1, /* ldm_regs_per_insn_1st. */
-+ 2, /* ldm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (2), /* loadf. */
-+ COSTS_N_INSNS (4), /* loadd. */
-+ COSTS_N_INSNS (1), /* load_unaligned. */
-+ COSTS_N_INSNS (1), /* store. */
-+ COSTS_N_INSNS (3), /* strd. */
-+ COSTS_N_INSNS (1), /* stm_1st. */
-+ 1, /* stm_regs_per_insn_1st. */
-+ 2, /* stm_regs_per_insn_subsequent. */
-+ COSTS_N_INSNS (2), /* storef. */
-+ COSTS_N_INSNS (2), /* stored. */
-+ COSTS_N_INSNS (1) /* store_unaligned. */
-+ },
-+ {
-+ /* FP SFmode */
-+ {
-+ COSTS_N_INSNS (15), /* div. */
-+ COSTS_N_INSNS (3), /* mult. */
-+ COSTS_N_INSNS (7), /* mult_addsub. */
-+ COSTS_N_INSNS (7), /* fma. */
-+ COSTS_N_INSNS (3), /* addsub. */
-+ COSTS_N_INSNS (3), /* fpconst. */
-+ COSTS_N_INSNS (3), /* neg. */
-+ COSTS_N_INSNS (3), /* compare. */
-+ COSTS_N_INSNS (3), /* widen. */
-+ COSTS_N_INSNS (3), /* narrow. */
-+ COSTS_N_INSNS (3), /* toint. */
-+ COSTS_N_INSNS (3), /* fromint. */
-+ COSTS_N_INSNS (3) /* roundint. */
-+ },
-+ /* FP DFmode */
-+ {
-+ COSTS_N_INSNS (30), /* div. */
-+ COSTS_N_INSNS (6), /* mult. */
-+ COSTS_N_INSNS (10), /* mult_addsub. */
-+ COSTS_N_INSNS (7), /* fma. */
-+ COSTS_N_INSNS (3), /* addsub. */
-+ COSTS_N_INSNS (3), /* fpconst. */
-+ COSTS_N_INSNS (3), /* neg. */
-+ COSTS_N_INSNS (3), /* compare. */
-+ COSTS_N_INSNS (3), /* widen. */
-+ COSTS_N_INSNS (3), /* narrow. */
-+ COSTS_N_INSNS (3), /* toint. */
-+ COSTS_N_INSNS (3), /* fromint. */
-+ COSTS_N_INSNS (3) /* roundint. */
-+ }
-+ },
-+ /* Vector */
-+ {
-+ COSTS_N_INSNS (1) /* alu. */
-+ }
-+};
-+
-+
- const struct cpu_cost_table cortexa7_extra_costs =
- {
- /* ALU */
-@@ -1086,6 +1296,7 @@
- COSTS_N_INSNS (1), /* bfi. */
- COSTS_N_INSNS (1), /* bfx. */
- COSTS_N_INSNS (1), /* clz. */
-+ COSTS_N_INSNS (1), /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
-@@ -1187,6 +1398,7 @@
- 0, /* bfi. */
- COSTS_N_INSNS (1), /* bfx. */
- COSTS_N_INSNS (1), /* clz. */
-+ COSTS_N_INSNS (1), /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
-@@ -1287,6 +1499,7 @@
- COSTS_N_INSNS (1), /* bfi. */
- 0, /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- 0, /* non_exec. */
- true /* non_exec_costs_exec. */
- },
-@@ -1387,6 +1600,7 @@
- 0, /* bfi. */
- 0, /* bfx. */
- 0, /* clz. */
-+ 0, /* rev. */
- COSTS_N_INSNS (1), /* non_exec. */
- false /* non_exec_costs_exec. */
- },
-@@ -1483,7 +1697,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_fastmul_tune =
-@@ -1499,7 +1714,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- /* StrongARM has early execution of branches, so a sequence that is worth
-@@ -1518,7 +1734,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_xscale_tune =
-@@ -1534,7 +1751,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_9e_tune =
-@@ -1550,7 +1768,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_v6t2_tune =
-@@ -1566,7 +1785,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- /* Generic Cortex tuning. Use more specific tunings if appropriate. */
-@@ -1583,9 +1803,27 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
-+const struct tune_params arm_cortex_a8_tune =
-+{
-+ arm_9e_rtx_costs,
-+ &cortexa8_extra_costs,
-+ NULL, /* Sched adj cost. */
-+ 1, /* Constant limit. */
-+ 5, /* Max cond insns. */
-+ ARM_PREFETCH_NOT_BENEFICIAL,
-+ false, /* Prefer constant pool. */
-+ arm_default_branch_cost,
-+ false, /* Prefer LDRD/STRD. */
-+ {true, true}, /* Prefer non short circuit. */
-+ &arm_default_vec_cost, /* Vectorizer costs. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
-+};
-+
- const struct tune_params arm_cortex_a7_tune =
- {
- arm_9e_rtx_costs,
-@@ -1599,7 +1837,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_cortex_a15_tune =
-@@ -1615,7 +1854,8 @@
- true, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ true, true /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_cortex_a53_tune =
-@@ -1631,7 +1871,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_cortex_a57_tune =
-@@ -1647,7 +1888,8 @@
- true, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ true, true /* Prefer 32-bit encodings. */
- };
-
- /* Branches can be dual-issued on Cortex-A5, so conditional execution is
-@@ -1656,7 +1898,7 @@
- const struct tune_params arm_cortex_a5_tune =
- {
- arm_9e_rtx_costs,
-- NULL,
-+ &cortexa5_extra_costs,
- NULL, /* Sched adj cost. */
- 1, /* Constant limit. */
- 1, /* Max cond insns. */
-@@ -1666,7 +1908,8 @@
- false, /* Prefer LDRD/STRD. */
- {false, false}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_cortex_a9_tune =
-@@ -1682,7 +1925,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_cortex_a12_tune =
-@@ -1698,7 +1942,8 @@
- true, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
-@@ -1721,7 +1966,8 @@
- false, /* Prefer LDRD/STRD. */
- {false, false}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
-@@ -1739,7 +1985,8 @@
- false, /* Prefer LDRD/STRD. */
- {false, false}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
- const struct tune_params arm_fa726te_tune =
-@@ -1755,7 +2002,8 @@
- false, /* Prefer LDRD/STRD. */
- {true, true}, /* Prefer non short circuit. */
- &arm_default_vec_cost, /* Vectorizer costs. */
-- false /* Prefer Neon for 64-bits bitops. */
-+ false, /* Prefer Neon for 64-bits bitops. */
-+ false, false /* Prefer 32-bit encodings. */
- };
-
-
-@@ -2806,7 +3054,7 @@
- prefer_neon_for_64bits = true;
-
- /* Use the alternative scheduling-pressure algorithm by default. */
-- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
-+ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
- global_options.x_param_values,
- global_options_set.x_param_values);
-
-@@ -6079,11 +6327,6 @@
- if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
- return false;
-
-- /* Cannot tail-call to long calls, since these are out of range of
-- a branch instruction. */
-- if (decl && arm_is_long_call_p (decl))
-- return false;
--
- /* If we are interworking and the function is not declared static
- then we can't tail-call it unless we know that it exists in this
- compilation unit (since it might be a Thumb routine). */
-@@ -9337,6 +9580,47 @@
- *cost = LIBCALL_COST (2);
- return false;
-
-+ case BSWAP:
-+ if (arm_arch6)
-+ {
-+ if (mode == SImode)
-+ {
-+ *cost = COSTS_N_INSNS (1);
-+ if (speed_p)
-+ *cost += extra_cost->alu.rev;
-+
-+ return false;
-+ }
-+ }
-+ else
-+ {
-+ /* No rev instruction available. Look at arm_legacy_rev
-+ and thumb_legacy_rev for the form of RTL used then. */
-+ if (TARGET_THUMB)
-+ {
-+ *cost = COSTS_N_INSNS (10);
-+
-+ if (speed_p)
-+ {
-+ *cost += 6 * extra_cost->alu.shift;
-+ *cost += 3 * extra_cost->alu.logical;
-+ }
-+ }
-+ else
-+ {
-+ *cost = COSTS_N_INSNS (5);
-+
-+ if (speed_p)
-+ {
-+ *cost += 2 * extra_cost->alu.shift;
-+ *cost += extra_cost->alu.arith_shift;
-+ *cost += 2 * extra_cost->alu.logical;
-+ }
-+ }
-+ return true;
-+ }
-+ return false;
-+
- case MINUS:
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
- && (mode == SFmode || !TARGET_VFP_SINGLE))
-@@ -9719,8 +10003,17 @@
- /* Vector mode? */
- *cost = LIBCALL_COST (2);
- return false;
-+ case IOR:
-+ if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
-+ {
-+ *cost = COSTS_N_INSNS (1);
-+ if (speed_p)
-+ *cost += extra_cost->alu.rev;
-
-- case AND: case XOR: case IOR:
-+ return true;
-+ }
-+ /* Fall through. */
-+ case AND: case XOR:
- if (mode == SImode)
- {
- enum rtx_code subcode = GET_CODE (XEXP (x, 0));
-@@ -10619,6 +10912,36 @@
- *cost = LIBCALL_COST (1);
- return false;
-
-+ case FMA:
-+ if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
-+ {
-+ rtx op0 = XEXP (x, 0);
-+ rtx op1 = XEXP (x, 1);
-+ rtx op2 = XEXP (x, 2);
-+
-+ *cost = COSTS_N_INSNS (1);
-+
-+ /* vfms or vfnma. */
-+ if (GET_CODE (op0) == NEG)
-+ op0 = XEXP (op0, 0);
-+
-+ /* vfnms or vfnma. */
-+ if (GET_CODE (op2) == NEG)
-+ op2 = XEXP (op2, 0);
-+
-+ *cost += rtx_cost (op0, FMA, 0, speed_p);
-+ *cost += rtx_cost (op1, FMA, 1, speed_p);
-+ *cost += rtx_cost (op2, FMA, 2, speed_p);
-+
-+ if (speed_p)
-+ *cost += extra_cost->fp[mode ==DFmode].fma;
-+
-+ return true;
-+ }
-+
-+ *cost = LIBCALL_COST (3);
-+ return false;
-+
- case FIX:
- case UNSIGNED_FIX:
- if (TARGET_HARD_FLOAT)
-@@ -10669,10 +10992,16 @@
- return true;
-
- case ASM_OPERANDS:
-- /* Just a guess. Cost one insn per input. */
-- *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
-- return true;
-+ {
-+ /* Just a guess. Guess number of instructions in the asm
-+ plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
-+ though (see PR60663). */
-+ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
-+ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
-
-+ *cost = COSTS_N_INSNS (asm_length + num_operands);
-+ return true;
-+ }
- default:
- if (mode != VOIDmode)
- *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-@@ -12566,7 +12895,11 @@
- || (type == 0 && GET_CODE (ind) == PRE_DEC))
- return arm_address_register_rtx_p (XEXP (ind, 0), 0);
-
-- /* FIXME: vld1 allows register post-modify. */
-+ /* Allow post-increment by register for VLDn */
-+ if (type == 2 && GET_CODE (ind) == POST_MODIFY
-+ && GET_CODE (XEXP (ind, 1)) == PLUS
-+ && REG_P (XEXP (XEXP (ind, 1), 1)))
-+ return true;
-
- /* Match:
- (plus (reg)
-@@ -16787,9 +17120,20 @@
- compute_bb_for_insn ();
- df_analyze ();
-
-+ enum Convert_Action {SKIP, CONV, SWAP_CONV};
-+
- FOR_EACH_BB_FN (bb, cfun)
- {
-+ if (current_tune->disparage_flag_setting_t16_encodings
-+ && optimize_bb_for_speed_p (bb))
-+ continue;
-+
- rtx insn;
-+ Convert_Action action = SKIP;
-+ Convert_Action action_for_partial_flag_setting
-+ = (current_tune->disparage_partial_flag_setting_t16_encodings
-+ && optimize_bb_for_speed_p (bb))
-+ ? SKIP : CONV;
-
- COPY_REG_SET (&live, DF_LR_OUT (bb));
- df_simulate_initialize_backwards (bb, &live);
-@@ -16799,7 +17143,7 @@
- && !REGNO_REG_SET_P (&live, CC_REGNUM)
- && GET_CODE (PATTERN (insn)) == SET)
- {
-- enum {SKIP, CONV, SWAP_CONV} action = SKIP;
-+ action = SKIP;
- rtx pat = PATTERN (insn);
- rtx dst = XEXP (pat, 0);
- rtx src = XEXP (pat, 1);
-@@ -16880,10 +17224,11 @@
- /* ANDS <Rdn>,<Rm> */
- if (rtx_equal_p (dst, op0)
- && low_register_operand (op1, SImode))
-- action = CONV;
-+ action = action_for_partial_flag_setting;
- else if (rtx_equal_p (dst, op1)
- && low_register_operand (op0, SImode))
-- action = SWAP_CONV;
-+ action = action_for_partial_flag_setting == SKIP
-+ ? SKIP : SWAP_CONV;
- break;
-
- case ASHIFTRT:
-@@ -16894,7 +17239,7 @@
- /* LSLS <Rdn>,<Rm> */
- if (rtx_equal_p (dst, op0)
- && low_register_operand (op1, SImode))
-- action = CONV;
-+ action = action_for_partial_flag_setting;
- /* ASRS <Rd>,<Rm>,#<imm5> */
- /* LSRS <Rd>,<Rm>,#<imm5> */
- /* LSLS <Rd>,<Rm>,#<imm5> */
-@@ -16901,7 +17246,7 @@
- else if (low_register_operand (op0, SImode)
- && CONST_INT_P (op1)
- && IN_RANGE (INTVAL (op1), 0, 31))
-- action = CONV;
-+ action = action_for_partial_flag_setting;
- break;
-
- case ROTATERT:
-@@ -16908,12 +17253,16 @@
- /* RORS <Rdn>,<Rm> */
- if (rtx_equal_p (dst, op0)
- && low_register_operand (op1, SImode))
-- action = CONV;
-+ action = action_for_partial_flag_setting;
- break;
-
- case NOT:
-+ /* MVNS <Rd>,<Rm> */
-+ if (low_register_operand (op0, SImode))
-+ action = action_for_partial_flag_setting;
-+ break;
-+
- case NEG:
-- /* MVNS <Rd>,<Rm> */
- /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
- if (low_register_operand (op0, SImode))
- action = CONV;
-@@ -16923,7 +17272,7 @@
- /* MOVS <Rd>,#<imm8> */
- if (CONST_INT_P (src)
- && IN_RANGE (INTVAL (src), 0, 255))
-- action = CONV;
-+ action = action_for_partial_flag_setting;
- break;
-
- case REG:
-@@ -17144,24 +17493,7 @@
-
- /* Routines to output assembly language. */
-
--/* If the rtx is the correct value then return the string of the number.
-- In this way we can ensure that valid double constants are generated even
-- when cross compiling. */
--const char *
--fp_immediate_constant (rtx x)
--{
-- REAL_VALUE_TYPE r;
--
-- if (!fp_consts_inited)
-- init_fp_table ();
--
-- REAL_VALUE_FROM_CONST_DOUBLE (r, x);
--
-- gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
-- return "0";
--}
--
--/* As for fp_immediate_constant, but value is passed directly, not in rtx. */
-+/* Return string representation of passed in real value. */
- static const char *
- fp_const_from_val (REAL_VALUE_TYPE *r)
- {
-@@ -17252,14 +17584,22 @@
- /* Output the assembly for a store multiple. */
-
- const char *
--vfp_output_fstmd (rtx * operands)
-+vfp_output_vstmd (rtx * operands)
- {
- char pattern[100];
- int p;
- int base;
- int i;
-+ rtx addr_reg = REG_P (XEXP (operands[0], 0))
-+ ? XEXP (operands[0], 0)
-+ : XEXP (XEXP (operands[0], 0), 0);
-+ bool push_p = REGNO (addr_reg) == SP_REGNUM;
-
-- strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
-+ if (push_p)
-+ strcpy (pattern, "vpush%?.64\t{%P1");
-+ else
-+ strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
-+
- p = strlen (pattern);
-
- gcc_assert (REG_P (operands[1]));
-@@ -17387,6 +17727,15 @@
- require_pic_register ();
- use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
- }
-+
-+ if (TARGET_AAPCS_BASED)
-+ {
-+ /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
-+ linker. */
-+ rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
-+ clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
-+ clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM));
-+ }
- }
-
- /* Output a 'call' insn. */
-@@ -18066,19 +18415,19 @@
- switch (GET_CODE (addr))
- {
- case PRE_DEC:
-- templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
-+ templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
- ops[0] = XEXP (addr, 0);
- ops[1] = reg;
- break;
-
- case POST_INC:
-- templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
-+ templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
- ops[0] = XEXP (addr, 0);
- ops[1] = reg;
- break;
-
- default:
-- templ = "f%s%c%%?\t%%%s0, %%1%s";
-+ templ = "v%sr%%?.%s\t%%%s0, %%1%s";
- ops[0] = reg;
- ops[1] = mem;
- break;
-@@ -18086,7 +18435,7 @@
-
- sprintf (buff, templ,
- load ? "ld" : "st",
-- dp ? 'd' : 's',
-+ dp ? "64" : "32",
- dp ? "P" : "",
- integer_p ? "\t%@ int" : "");
- output_asm_insn (buff, ops);
-@@ -20426,6 +20775,18 @@
- {
- int reg = -1;
-
-+ /* Register r3 is caller-saved. Normally it does not need to be
-+ saved on entry by the prologue. However if we choose to save
-+ it for padding then we may confuse the compiler into thinking
-+ a prologue sequence is required when in fact it is not. This
-+ will occur when shrink-wrapping if r3 is used as a scratch
-+ register and there are no other callee-saved writes.
-+
-+ This situation can be avoided when other callee-saved registers
-+ are available and r3 is not mandatory if we choose a callee-saved
-+ register for padding. */
-+ bool prefer_callee_reg_p = false;
-+
- /* If it is safe to use r3, then do so. This sometimes
- generates better code on Thumb-2 by avoiding the need to
- use 32-bit push/pop instructions. */
-@@ -20432,24 +20793,29 @@
- if (! any_sibcall_could_use_r3 ()
- && arm_size_return_regs () <= 12
- && (offsets->saved_regs_mask & (1 << 3)) == 0
-- && (TARGET_THUMB2
-+ && (TARGET_THUMB2
- || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
- {
- reg = 3;
-+ if (!TARGET_THUMB2)
-+ prefer_callee_reg_p = true;
- }
-- else
-- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
-- {
-- /* Avoid fixed registers; they may be changed at
-- arbitrary times so it's unsafe to restore them
-- during the epilogue. */
-- if (!fixed_regs[i]
-- && (offsets->saved_regs_mask & (1 << i)) == 0)
-- {
-- reg = i;
-- break;
-- }
-- }
-+ if (reg == -1
-+ || prefer_callee_reg_p)
-+ {
-+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
-+ {
-+ /* Avoid fixed registers; they may be changed at
-+ arbitrary times so it's unsafe to restore them
-+ during the epilogue. */
-+ if (!fixed_regs[i]
-+ && (offsets->saved_regs_mask & (1 << i)) == 0)
-+ {
-+ reg = i;
-+ break;
-+ }
-+ }
-+ }
-
- if (reg != -1)
- {
-@@ -21039,7 +21405,15 @@
- }
-
-
--/* If CODE is 'd', then the X is a condition operand and the instruction
-+/* Globally reserved letters: acln
-+ Puncutation letters currently used: @_|?().!#
-+ Lower case letters currently used: bcdefhimpqtvwxyz
-+ Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
-+ Letters previously used, but now deprecated/obsolete: sVWXYZ.
-+
-+ Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
-+
-+ If CODE is 'd', then the X is a condition operand and the instruction
- should only be executed if the condition is true.
- if CODE is 'D', then the X is a condition operand and the instruction
- should only be executed if the condition is false: however, if the mode
-@@ -21179,6 +21553,19 @@
- }
- return;
-
-+ case 'b':
-+ /* Print the log2 of a CONST_INT. */
-+ {
-+ HOST_WIDE_INT val;
-+
-+ if (!CONST_INT_P (x)
-+ || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
-+ output_operand_lossage ("Unsupported operand for code '%c'", code);
-+ else
-+ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
-+ }
-+ return;
-+
- case 'L':
- /* The low 16 bits of an immediate constant. */
- fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
-@@ -21421,7 +21808,7 @@
- register. */
- case 'p':
- {
-- int mode = GET_MODE (x);
-+ enum machine_mode mode = GET_MODE (x);
- int regno;
-
- if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
-@@ -21445,7 +21832,7 @@
- case 'P':
- case 'q':
- {
-- int mode = GET_MODE (x);
-+ enum machine_mode mode = GET_MODE (x);
- int is_quad = (code == 'q');
- int regno;
-
-@@ -21481,7 +21868,7 @@
- case 'e':
- case 'f':
- {
-- int mode = GET_MODE (x);
-+ enum machine_mode mode = GET_MODE (x);
- int regno;
-
- if ((GET_MODE_SIZE (mode) != 16
-@@ -21563,6 +21950,7 @@
- {
- rtx addr;
- bool postinc = FALSE;
-+ rtx postinc_reg = NULL;
- unsigned align, memsize, align_bits;
-
- gcc_assert (MEM_P (x));
-@@ -21572,6 +21960,11 @@
- postinc = 1;
- addr = XEXP (addr, 0);
- }
-+ if (GET_CODE (addr) == POST_MODIFY)
-+ {
-+ postinc_reg = XEXP( XEXP (addr, 1), 1);
-+ addr = XEXP (addr, 0);
-+ }
- asm_fprintf (stream, "[%r", REGNO (addr));
-
- /* We know the alignment of this access, so we can emit a hint in the
-@@ -21597,6 +21990,8 @@
-
- if (postinc)
- fputs("!", stream);
-+ if (postinc_reg)
-+ asm_fprintf (stream, ", %r", REGNO (postinc_reg));
- }
- return;
-
-@@ -21614,7 +22009,7 @@
- /* Translate an S register number into a D register number and element index. */
- case 'y':
- {
-- int mode = GET_MODE (x);
-+ enum machine_mode mode = GET_MODE (x);
- int regno;
-
- if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
-@@ -21648,7 +22043,7 @@
- number into a D register number and element index. */
- case 'z':
- {
-- int mode = GET_MODE (x);
-+ enum machine_mode mode = GET_MODE (x);
- int regno;
-
- if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
-@@ -21688,15 +22083,12 @@
- break;
-
- case CONST_DOUBLE:
-- if (TARGET_NEON)
-- {
-- char fpstr[20];
-- real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
-- sizeof (fpstr), 0, 1);
-- fprintf (stream, "#%s", fpstr);
-- }
-- else
-- fprintf (stream, "#%s", fp_immediate_constant (x));
-+ {
-+ char fpstr[20];
-+ real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
-+ sizeof (fpstr), 0, 1);
-+ fprintf (stream, "#%s", fpstr);
-+ }
- break;
-
- default:
-@@ -22564,6 +22956,9 @@
- || (TARGET_HARD_FLOAT && TARGET_VFP
- && regno == VFPCC_REGNUM));
-
-+ if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
-+ return false;
-+
- if (TARGET_THUMB1)
- /* For the Thumb we only allow values bigger than SImode in
- registers 0 - 6, so that there is always a second low
-@@ -22609,13 +23004,20 @@
- }
-
- /* We allow almost any value to be stored in the general registers.
-- Restrict doubleword quantities to even register pairs so that we can
-- use ldrd. Do not allow very large Neon structure opaque modes in
-- general registers; they would use too many. */
-+ Restrict doubleword quantities to even register pairs in ARM state
-+ so that we can use ldrd. Do not allow very large Neon structure
-+ opaque modes in general registers; they would use too many. */
- if (regno <= LAST_ARM_REGNUM)
-- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
-- && ARM_NUM_REGS (mode) <= 4;
-+ {
-+ if (ARM_NUM_REGS (mode) > 4)
-+ return FALSE;
-
-+ if (TARGET_THUMB2)
-+ return TRUE;
-+
-+ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
-+ }
-+
- if (regno == FRAME_POINTER_REGNUM
- || regno == ARG_POINTER_REGNUM)
- /* We only allow integers in the fake hard registers. */
-@@ -22653,6 +23055,9 @@
- enum reg_class
- arm_regno_class (int regno)
- {
-+ if (regno == PC_REGNUM)
-+ return NO_REGS;
-+
- if (TARGET_THUMB1)
- {
- if (regno == STACK_POINTER_REGNUM)
-@@ -22826,10 +23231,12 @@
- NEON_BINOP,
- NEON_TERNOP,
- NEON_UNOP,
-+ NEON_BSWAP,
- NEON_GETLANE,
- NEON_SETLANE,
- NEON_CREATE,
- NEON_RINT,
-+ NEON_COPYSIGNF,
- NEON_DUP,
- NEON_DUPLANE,
- NEON_COMBINE,
-@@ -22847,7 +23254,6 @@
- NEON_FLOAT_NARROW,
- NEON_FIXCONV,
- NEON_SELECT,
-- NEON_RESULTPAIR,
- NEON_REINTERP,
- NEON_VTBL,
- NEON_VTBX,
-@@ -23216,6 +23622,9 @@
- ARM_BUILTIN_CRC32CH,
- ARM_BUILTIN_CRC32CW,
-
-+ ARM_BUILTIN_GET_FPSCR,
-+ ARM_BUILTIN_SET_FPSCR,
-+
- #undef CRYPTO1
- #undef CRYPTO2
- #undef CRYPTO3
-@@ -23293,14 +23702,19 @@
-
- tree V8QI_type_node;
- tree V4HI_type_node;
-+ tree V4UHI_type_node;
- tree V4HF_type_node;
- tree V2SI_type_node;
-+ tree V2USI_type_node;
- tree V2SF_type_node;
- tree V16QI_type_node;
- tree V8HI_type_node;
-+ tree V8UHI_type_node;
- tree V4SI_type_node;
-+ tree V4USI_type_node;
- tree V4SF_type_node;
- tree V2DI_type_node;
-+ tree V2UDI_type_node;
-
- tree intUQI_type_node;
- tree intUHI_type_node;
-@@ -23312,27 +23726,6 @@
- tree intCI_type_node;
- tree intXI_type_node;
-
-- tree V8QI_pointer_node;
-- tree V4HI_pointer_node;
-- tree V2SI_pointer_node;
-- tree V2SF_pointer_node;
-- tree V16QI_pointer_node;
-- tree V8HI_pointer_node;
-- tree V4SI_pointer_node;
-- tree V4SF_pointer_node;
-- tree V2DI_pointer_node;
--
-- tree void_ftype_pv8qi_v8qi_v8qi;
-- tree void_ftype_pv4hi_v4hi_v4hi;
-- tree void_ftype_pv2si_v2si_v2si;
-- tree void_ftype_pv2sf_v2sf_v2sf;
-- tree void_ftype_pdi_di_di;
-- tree void_ftype_pv16qi_v16qi_v16qi;
-- tree void_ftype_pv8hi_v8hi_v8hi;
-- tree void_ftype_pv4si_v4si_v4si;
-- tree void_ftype_pv4sf_v4sf_v4sf;
-- tree void_ftype_pv2di_v2di_v2di;
--
- tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
- tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
- tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
-@@ -23396,6 +23789,12 @@
- const_intDI_pointer_node = build_pointer_type (const_intDI_node);
- const_float_pointer_node = build_pointer_type (const_float_node);
-
-+ /* Unsigned integer types for various mode sizes. */
-+ intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-+ intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-+ intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-+ intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-+ neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
- /* Now create vector types based on our NEON element types. */
- /* 64-bit vectors. */
- V8QI_type_node =
-@@ -23402,10 +23801,14 @@
- build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
- V4HI_type_node =
- build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
-+ V4UHI_type_node =
-+ build_vector_type_for_mode (intUHI_type_node, V4HImode);
- V4HF_type_node =
- build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
- V2SI_type_node =
- build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
-+ V2USI_type_node =
-+ build_vector_type_for_mode (intUSI_type_node, V2SImode);
- V2SF_type_node =
- build_vector_type_for_mode (neon_float_type_node, V2SFmode);
- /* 128-bit vectors. */
-@@ -23413,21 +23816,20 @@
- build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
- V8HI_type_node =
- build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
-+ V8UHI_type_node =
-+ build_vector_type_for_mode (intUHI_type_node, V8HImode);
- V4SI_type_node =
- build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
-+ V4USI_type_node =
-+ build_vector_type_for_mode (intUSI_type_node, V4SImode);
- V4SF_type_node =
- build_vector_type_for_mode (neon_float_type_node, V4SFmode);
- V2DI_type_node =
- build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
-+ V2UDI_type_node =
-+ build_vector_type_for_mode (intUDI_type_node, V2DImode);
-
-- /* Unsigned integer types for various mode sizes. */
-- intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-- intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-- intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-- intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-- neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
-
--
- (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
- "__builtin_neon_uqi");
- (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
-@@ -23458,53 +23860,8 @@
- (*lang_hooks.types.register_builtin_type) (intXI_type_node,
- "__builtin_neon_xi");
-
-- /* Pointers to vector types. */
-- V8QI_pointer_node = build_pointer_type (V8QI_type_node);
-- V4HI_pointer_node = build_pointer_type (V4HI_type_node);
-- V2SI_pointer_node = build_pointer_type (V2SI_type_node);
-- V2SF_pointer_node = build_pointer_type (V2SF_type_node);
-- V16QI_pointer_node = build_pointer_type (V16QI_type_node);
-- V8HI_pointer_node = build_pointer_type (V8HI_type_node);
-- V4SI_pointer_node = build_pointer_type (V4SI_type_node);
-- V4SF_pointer_node = build_pointer_type (V4SF_type_node);
-- V2DI_pointer_node = build_pointer_type (V2DI_type_node);
--
-- /* Operations which return results as pairs. */
-- void_ftype_pv8qi_v8qi_v8qi =
-- build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
-- V8QI_type_node, NULL);
-- void_ftype_pv4hi_v4hi_v4hi =
-- build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
-- V4HI_type_node, NULL);
-- void_ftype_pv2si_v2si_v2si =
-- build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
-- V2SI_type_node, NULL);
-- void_ftype_pv2sf_v2sf_v2sf =
-- build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
-- V2SF_type_node, NULL);
-- void_ftype_pdi_di_di =
-- build_function_type_list (void_type_node, intDI_pointer_node,
-- neon_intDI_type_node, neon_intDI_type_node, NULL);
-- void_ftype_pv16qi_v16qi_v16qi =
-- build_function_type_list (void_type_node, V16QI_pointer_node,
-- V16QI_type_node, V16QI_type_node, NULL);
-- void_ftype_pv8hi_v8hi_v8hi =
-- build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
-- V8HI_type_node, NULL);
-- void_ftype_pv4si_v4si_v4si =
-- build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
-- V4SI_type_node, NULL);
-- void_ftype_pv4sf_v4sf_v4sf =
-- build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
-- V4SF_type_node, NULL);
-- void_ftype_pv2di_v2di_v2di =
-- build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
-- V2DI_type_node, NULL);
--
- if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
- {
-- tree V4USI_type_node =
-- build_vector_type_for_mode (intUSI_type_node, V4SImode);
-
- tree V16UQI_type_node =
- build_vector_type_for_mode (intUQI_type_node, V16QImode);
-@@ -23790,25 +24147,6 @@
- }
- break;
-
-- case NEON_RESULTPAIR:
-- {
-- switch (insn_data[d->code].operand[1].mode)
-- {
-- case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
-- case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
-- case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
-- case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
-- case DImode: ftype = void_ftype_pdi_di_di; break;
-- case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
-- case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
-- case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
-- case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
-- case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
-- default: gcc_unreachable ();
-- }
-- }
-- break;
--
- case NEON_REINTERP:
- {
- /* We iterate over NUM_DREG_TYPES doubleword types,
-@@ -23868,6 +24206,47 @@
- ftype = build_function_type_list (return_type, eltype, NULL);
- break;
- }
-+ case NEON_BSWAP:
-+ {
-+ tree eltype = NULL_TREE;
-+ switch (insn_data[d->code].operand[1].mode)
-+ {
-+ case V4HImode:
-+ eltype = V4UHI_type_node;
-+ break;
-+ case V8HImode:
-+ eltype = V8UHI_type_node;
-+ break;
-+ case V2SImode:
-+ eltype = V2USI_type_node;
-+ break;
-+ case V4SImode:
-+ eltype = V4USI_type_node;
-+ break;
-+ case V2DImode:
-+ eltype = V2UDI_type_node;
-+ break;
-+ default: gcc_unreachable ();
-+ }
-+ ftype = build_function_type_list (eltype, eltype, NULL);
-+ break;
-+ }
-+ case NEON_COPYSIGNF:
-+ {
-+ tree eltype = NULL_TREE;
-+ switch (insn_data[d->code].operand[1].mode)
-+ {
-+ case V2SFmode:
-+ eltype = V2SF_type_node;
-+ break;
-+ case V4SFmode:
-+ eltype = V4SF_type_node;
-+ break;
-+ default: gcc_unreachable ();
-+ }
-+ ftype = build_function_type_list (eltype, eltype, NULL);
-+ break;
-+ }
- default:
- gcc_unreachable ();
- }
-@@ -24014,6 +24393,15 @@
- IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
- IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
-
-+
-+#define FP_BUILTIN(L, U) \
-+ {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
-+ UNKNOWN, 0},
-+
-+ FP_BUILTIN (get_fpscr, GET_FPSCR)
-+ FP_BUILTIN (set_fpscr, SET_FPSCR)
-+#undef FP_BUILTIN
-+
- #define CRC32_BUILTIN(L, U) \
- {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
- UNKNOWN, 0},
-@@ -24528,6 +24916,21 @@
-
- if (TARGET_CRC32)
- arm_init_crc32_builtins ();
-+
-+ if (TARGET_VFP && TARGET_HARD_FLOAT)
-+ {
-+ tree ftype_set_fpscr
-+ = build_function_type_list (void_type_node, unsigned_type_node, NULL);
-+ tree ftype_get_fpscr
-+ = build_function_type_list (unsigned_type_node, NULL);
-+
-+ arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
-+ = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
-+ ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
-+ arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
-+ = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
-+ ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
-+ }
- }
-
- /* Return the ARM builtin for CODE. */
-@@ -25042,20 +25445,17 @@
- case NEON_SPLIT:
- case NEON_FLOAT_WIDEN:
- case NEON_FLOAT_NARROW:
-+ case NEON_BSWAP:
- case NEON_REINTERP:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
-+ case NEON_COPYSIGNF:
- case NEON_COMBINE:
- case NEON_VTBL:
- return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
-
-- case NEON_RESULTPAIR:
-- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
-- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
-- NEON_ARG_STOP);
--
- case NEON_LANEMUL:
- case NEON_LANEMULL:
- case NEON_LANEMULH:
-@@ -25117,24 +25517,6 @@
- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
- }
-
--/* Emit code to place a Neon pair result in memory locations (with equal
-- registers). */
--void
--neon_emit_pair_result_insn (enum machine_mode mode,
-- rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
-- rtx op1, rtx op2)
--{
-- rtx mem = gen_rtx_MEM (mode, destaddr);
-- rtx tmp1 = gen_reg_rtx (mode);
-- rtx tmp2 = gen_reg_rtx (mode);
--
-- emit_insn (intfn (tmp1, op1, op2, tmp2));
--
-- emit_move_insn (mem, tmp1);
-- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
-- emit_move_insn (mem, tmp2);
--}
--
- /* Set up OPERANDS for a register copy from SRC to DEST, taking care
- not to early-clobber SRC registers in the process.
-
-@@ -25255,6 +25637,25 @@
-
- switch (fcode)
- {
-+ case ARM_BUILTIN_GET_FPSCR:
-+ case ARM_BUILTIN_SET_FPSCR:
-+ if (fcode == ARM_BUILTIN_GET_FPSCR)
-+ {
-+ icode = CODE_FOR_get_fpscr;
-+ target = gen_reg_rtx (SImode);
-+ pat = GEN_FCN (icode) (target);
-+ }
-+ else
-+ {
-+ target = NULL_RTX;
-+ icode = CODE_FOR_set_fpscr;
-+ arg0 = CALL_EXPR_ARG (exp, 0);
-+ op0 = expand_normal (arg0);
-+ pat = GEN_FCN (icode) (op0);
-+ }
-+ emit_insn (pat);
-+ return target;
-+
- case ARM_BUILTIN_TEXTRMSB:
- case ARM_BUILTIN_TEXTRMUB:
- case ARM_BUILTIN_TEXTRMSH:
-@@ -25888,7 +26289,7 @@
- int pops_needed;
- unsigned available;
- unsigned required;
-- int mode;
-+ enum machine_mode mode;
- int size;
- int restore_a4 = FALSE;
-
-@@ -29555,10 +29956,10 @@
- {
- enum machine_mode in_mode, out_mode;
- int in_n, out_n;
-+ bool out_unsigned_p = TYPE_UNSIGNED (type_out);
-
- if (TREE_CODE (type_out) != VECTOR_TYPE
-- || TREE_CODE (type_in) != VECTOR_TYPE
-- || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
-+ || TREE_CODE (type_in) != VECTOR_TYPE)
- return NULL_TREE;
-
- out_mode = TYPE_MODE (TREE_TYPE (type_out));
-@@ -29570,7 +29971,13 @@
- decl of the vectorized builtin for the appropriate vector mode.
- NULL_TREE is returned if no such builtin is available. */
- #undef ARM_CHECK_BUILTIN_MODE
--#define ARM_CHECK_BUILTIN_MODE(C) \
-+#define ARM_CHECK_BUILTIN_MODE(C) \
-+ (TARGET_NEON && TARGET_FPU_ARMV8 \
-+ && flag_unsafe_math_optimizations \
-+ && ARM_CHECK_BUILTIN_MODE_1 (C))
-+
-+#undef ARM_CHECK_BUILTIN_MODE_1
-+#define ARM_CHECK_BUILTIN_MODE_1(C) \
- (out_mode == SFmode && out_n == C \
- && in_mode == SFmode && in_n == C)
-
-@@ -29595,6 +30002,67 @@
- return ARM_FIND_VRINT_VARIANT (vrintz);
- case BUILT_IN_ROUNDF:
- return ARM_FIND_VRINT_VARIANT (vrinta);
-+#undef ARM_CHECK_BUILTIN_MODE_1
-+#define ARM_CHECK_BUILTIN_MODE_1(C) \
-+ (out_mode == SImode && out_n == C \
-+ && in_mode == SFmode && in_n == C)
-+
-+#define ARM_FIND_VCVT_VARIANT(N) \
-+ (ARM_CHECK_BUILTIN_MODE (2) \
-+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
-+ : (ARM_CHECK_BUILTIN_MODE (4) \
-+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
-+ : NULL_TREE))
-+
-+#define ARM_FIND_VCVTU_VARIANT(N) \
-+ (ARM_CHECK_BUILTIN_MODE (2) \
-+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
-+ : (ARM_CHECK_BUILTIN_MODE (4) \
-+ ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
-+ : NULL_TREE))
-+ case BUILT_IN_LROUNDF:
-+ return out_unsigned_p
-+ ? ARM_FIND_VCVTU_VARIANT (vcvta)
-+ : ARM_FIND_VCVT_VARIANT (vcvta);
-+ case BUILT_IN_LCEILF:
-+ return out_unsigned_p
-+ ? ARM_FIND_VCVTU_VARIANT (vcvtp)
-+ : ARM_FIND_VCVT_VARIANT (vcvtp);
-+ case BUILT_IN_LFLOORF:
-+ return out_unsigned_p
-+ ? ARM_FIND_VCVTU_VARIANT (vcvtm)
-+ : ARM_FIND_VCVT_VARIANT (vcvtm);
-+#undef ARM_CHECK_BUILTIN_MODE
-+#define ARM_CHECK_BUILTIN_MODE(C, N) \
-+ (out_mode == N##mode && out_n == C \
-+ && in_mode == N##mode && in_n == C)
-+ case BUILT_IN_BSWAP16:
-+ if (ARM_CHECK_BUILTIN_MODE (4, HI))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
-+ else if (ARM_CHECK_BUILTIN_MODE (8, HI))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
-+ else
-+ return NULL_TREE;
-+ case BUILT_IN_BSWAP32:
-+ if (ARM_CHECK_BUILTIN_MODE (2, SI))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
-+ else if (ARM_CHECK_BUILTIN_MODE (4, SI))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
-+ else
-+ return NULL_TREE;
-+ case BUILT_IN_BSWAP64:
-+ if (ARM_CHECK_BUILTIN_MODE (2, DI))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
-+ else
-+ return NULL_TREE;
-+ case BUILT_IN_COPYSIGNF:
-+ if (ARM_CHECK_BUILTIN_MODE (2, SF))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
-+ else if (ARM_CHECK_BUILTIN_MODE (4, SF))
-+ return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
-+ else
-+ return NULL_TREE;
-+
- default:
- return NULL_TREE;
- }
-@@ -29601,9 +30069,12 @@
- }
- return NULL_TREE;
- }
-+#undef ARM_FIND_VCVT_VARIANT
-+#undef ARM_FIND_VCVTU_VARIANT
- #undef ARM_CHECK_BUILTIN_MODE
- #undef ARM_FIND_VRINT_VARIANT
-
-+
- /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
- static HOST_WIDE_INT
- arm_vector_alignment (const_tree type)
-@@ -31174,6 +31645,75 @@
- return false;
- }
-
-+static void
-+arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
-+{
-+ const unsigned ARM_FE_INVALID = 1;
-+ const unsigned ARM_FE_DIVBYZERO = 2;
-+ const unsigned ARM_FE_OVERFLOW = 4;
-+ const unsigned ARM_FE_UNDERFLOW = 8;
-+ const unsigned ARM_FE_INEXACT = 16;
-+ const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
-+ | ARM_FE_DIVBYZERO
-+ | ARM_FE_OVERFLOW
-+ | ARM_FE_UNDERFLOW
-+ | ARM_FE_INEXACT);
-+ const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
-+ tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
-+ tree new_fenv_var, reload_fenv, restore_fnenv;
-+ tree update_call, atomic_feraiseexcept, hold_fnclex;
-+
-+ if (!TARGET_VFP || !TARGET_HARD_FLOAT)
-+ return;
-+
-+ /* Generate the equivalent of :
-+ unsigned int fenv_var;
-+ fenv_var = __builtin_arm_get_fpscr ();
-+
-+ unsigned int masked_fenv;
-+ masked_fenv = fenv_var & mask;
-+
-+ __builtin_arm_set_fpscr (masked_fenv); */
-+
-+ fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+ get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
-+ set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
-+ mask = build_int_cst (unsigned_type_node,
-+ ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
-+ | ARM_FE_ALL_EXCEPT));
-+ ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
-+ fenv_var, build_call_expr (get_fpscr, 0));
-+ masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
-+ hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
-+ *hold = build2 (COMPOUND_EXPR, void_type_node,
-+ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
-+ hold_fnclex);
-+
-+ /* Store the value of masked_fenv to clear the exceptions:
-+ __builtin_arm_set_fpscr (masked_fenv); */
-+
-+ *clear = build_call_expr (set_fpscr, 1, masked_fenv);
-+
-+ /* Generate the equivalent of :
-+ unsigned int new_fenv_var;
-+ new_fenv_var = __builtin_arm_get_fpscr ();
-+
-+ __builtin_arm_set_fpscr (fenv_var);
-+
-+ __atomic_feraiseexcept (new_fenv_var); */
-+
-+ new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
-+ build_call_expr (get_fpscr, 0));
-+ restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
-+ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
-+ update_call = build_call_expr (atomic_feraiseexcept, 1,
-+ fold_convert (integer_type_node, new_fenv_var));
-+ *update = build2 (COMPOUND_EXPR, void_type_node,
-+ build2 (COMPOUND_EXPR, void_type_node,
-+ reload_fenv, restore_fnenv), update_call);
-+}
-+
- /* return TRUE if x is a reference to a value in a constant pool */
- extern bool
- arm_is_constant_pool_ref (rtx x)
---- a/src/gcc/config/arm/arm.h
-+++ b/src/gcc/config/arm/arm.h
-@@ -166,7 +166,10 @@
- builtin_define ("__ARM_EABI__"); \
- } \
- if (TARGET_IDIV) \
-- builtin_define ("__ARM_ARCH_EXT_IDIV__"); \
-+ { \
-+ builtin_define ("__ARM_ARCH_EXT_IDIV__"); \
-+ builtin_define ("__ARM_FEATURE_IDIV"); \
-+ } \
- } while (0)
-
- #include "config/arm/arm-opts.h"
-@@ -298,6 +301,9 @@
- /* FPU supports VFPv3 instructions. */
- #define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
-
-+/* FPU supports FPv5 instructions. */
-+#define TARGET_VFP5 (TARGET_VFP && arm_fpu_desc->rev >= 5)
-+
- /* FPU only supports VFP single-precision instructions. */
- #define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE)
-
-@@ -442,9 +448,6 @@
- #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
- #endif
-
--#define LARGEST_EXPONENT_IS_NORMAL(bits) \
-- ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
--
- #ifndef ARM_DEFAULT_ABI
- #define ARM_DEFAULT_ABI ARM_ABI_APCS
- #endif
---- a/src/gcc/config/arm/unspecs.md
-+++ b/src/gcc/config/arm/unspecs.md
-@@ -143,6 +143,8 @@
- VUNSPEC_SLX ; Represent a store-register-release-exclusive.
- VUNSPEC_LDA ; Represent a store-register-acquire.
- VUNSPEC_STL ; Represent a store-register-release.
-+ VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content.
-+ VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content.
- ])
-
- ;; Enumerators for NEON unspecs.
---- a/src/gcc/config/arm/cortex-m4.md
-+++ b/src/gcc/config/arm/cortex-m4.md
-@@ -34,7 +34,7 @@
- (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,rev,\
-+ adr,bfm,clz,rbit,rev,\
- shift_imm,shift_reg,extend,\
- alu_shift_imm,alus_shift_imm,\
- logic_shift_imm,logics_shift_imm,\
---- a/src/gcc/config/arm/arm-modes.def
-+++ b/src/gcc/config/arm/arm-modes.def
-@@ -21,9 +21,6 @@
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
-
--/* Extended precision floating point.
-- FIXME What format is this? */
--FLOAT_MODE (XF, 12, 0);
-
- /* Half-precision floating point */
- FLOAT_MODE (HF, 2, 0);
---- a/src/gcc/config/arm/arm-cores.def
-+++ b/src/gcc/config/arm/arm-cores.def
-@@ -141,7 +141,7 @@
- ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex)
- ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5)
- ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
--ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex)
-+ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8)
- ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9)
- ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
- ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-@@ -149,6 +149,7 @@
- ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, FL_LDSCHED, cortex)
- ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
- ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex)
-+ARM_CORE("cortex-m7", cortexm7, cortexm7, 7EM, FL_LDSCHED, v7m)
- ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m)
- ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m)
- ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e)
---- a/src/gcc/config/arm/cortex-r4.md
-+++ b/src/gcc/config/arm/cortex-r4.md
-@@ -81,7 +81,7 @@
- (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,rev,\
-+ adr,bfm,clz,rbit,rev,\
- shift_imm,shift_reg,mvn_imm,mvn_reg"))
- "cortex_r4_alu")
-
---- a/src/gcc/config/arm/arm-tune.md
-+++ b/src/gcc/config/arm/arm-tune.md
-@@ -28,7 +28,8 @@
- genericv7a,cortexa5,cortexa7,
- cortexa8,cortexa9,cortexa12,
- cortexa15,cortexr4,cortexr4f,
-- cortexr5,cortexr7,cortexm4,
-- cortexm3,marvell_pj4,cortexa15cortexa7,
-- cortexa53,cortexa57,cortexa57cortexa53"
-+ cortexr5,cortexr7,cortexm7,
-+ cortexm4,cortexm3,marvell_pj4,
-+ cortexa15cortexa7,cortexa53,cortexa57,
-+ cortexa57cortexa53"
- (const (symbol_ref "((enum attr_tune) arm_tune)")))
---- a/src/gcc/config/arm/arm-protos.h
-+++ b/src/gcc/config/arm/arm-protos.h
-@@ -126,7 +126,6 @@
- extern int arm_const_double_inline_cost (rtx);
- extern bool arm_const_double_by_parts (rtx);
- extern bool arm_const_double_by_immediates (rtx);
--extern const char *fp_immediate_constant (rtx);
- extern void arm_emit_call_insn (rtx, rtx);
- extern const char *output_call (rtx *);
- extern const char *output_call_mem (rtx *);
-@@ -150,7 +149,7 @@
- extern int arm_emit_vector_const (FILE *, rtx);
- extern void arm_emit_fp16_const (rtx c);
- extern const char * arm_output_load_gr (rtx *);
--extern const char *vfp_output_fstmd (rtx *);
-+extern const char *vfp_output_vstmd (rtx *);
- extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
- extern void arm_set_return_address (rtx, rtx);
- extern int arm_eliminable_register (rtx);
-@@ -273,6 +272,11 @@
- const struct cpu_vec_costs* vec_costs;
- /* Prefer Neon for 64-bit bitops. */
- bool prefer_neon_for_64bits;
-+ /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */
-+ bool disparage_flag_setting_t16_encodings;
-+ /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
-+ would be set. */
-+ bool disparage_partial_flag_setting_t16_encodings;
- };
-
- extern const struct tune_params *current_tune;
---- a/src/gcc/config/arm/vfp.md
-+++ b/src/gcc/config/arm/vfp.md
-@@ -41,11 +41,11 @@
- case 5:
- return \"str%?\\t%1, %0\";
- case 6:
-- return \"fmsr%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?\\t%0, %1\\t%@ int\";
- case 7:
-- return \"fmrs%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?\\t%0, %1\\t%@ int\";
- case 8:
-- return \"fcpys%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?.f32\\t%0, %1\\t%@ int\";
- case 9: case 10:
- return output_move_vfp (operands);
- default:
-@@ -87,11 +87,11 @@
- case 8:
- return \"str%?\\t%1, %0\";
- case 9:
-- return \"fmsr%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?\\t%0, %1\\t%@ int\";
- case 10:
-- return \"fmrs%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?\\t%0, %1\\t%@ int\";
- case 11:
-- return \"fcpys%?\\t%0, %1\\t%@ int\";
-+ return \"vmov%?.f32\\t%0, %1\\t%@ int\";
- case 12: case 13:
- return output_move_vfp (operands);
- default:
-@@ -100,7 +100,7 @@
- "
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
-- (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
-+ (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
- (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
- (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
- (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
-@@ -130,14 +130,14 @@
- case 6:
- return output_move_double (operands, true, NULL);
- case 7:
-- return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-+ return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
- case 8:
-- return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-+ return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
- case 9:
- if (TARGET_VFP_SINGLE)
-- return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
-+ return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\";
- else
-- return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-+ return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
- case 10: case 11:
- return output_move_vfp (operands);
- default:
-@@ -181,11 +181,11 @@
- case 6:
- return output_move_double (operands, true, NULL);
- case 7:
-- return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-+ return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
- case 8:
-- return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-+ return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
- case 9:
-- return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-+ return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
- case 10: case 11:
- return output_move_vfp (operands);
- default:
-@@ -229,13 +229,13 @@
- case 3: /* memory from ARM register */
- return \"strh\\t%1, %0\\t%@ __fp16\";
- case 4: /* S register from S register */
-- return \"fcpys\\t%0, %1\";
-+ return \"vmov.f32\\t%0, %1\";
- case 5: /* ARM register from ARM register */
- return \"mov\\t%0, %1\\t%@ __fp16\";
- case 6: /* S register from ARM register */
-- return \"fmsr\\t%0, %1\";
-+ return \"vmov\\t%0, %1\";
- case 7: /* ARM register from S register */
-- return \"fmrs\\t%0, %1\";
-+ return \"vmov\\t%0, %1\";
- case 8: /* ARM register from constant */
- {
- REAL_VALUE_TYPE r;
-@@ -280,13 +280,13 @@
- case 1: /* memory from ARM register */
- return \"strh\\t%1, %0\\t%@ __fp16\";
- case 2: /* S register from S register */
-- return \"fcpys\\t%0, %1\";
-+ return \"vmov.f32\\t%0, %1\";
- case 3: /* ARM register from ARM register */
- return \"mov\\t%0, %1\\t%@ __fp16\";
- case 4: /* S register from ARM register */
-- return \"fmsr\\t%0, %1\";
-+ return \"vmov\\t%0, %1\";
- case 5: /* ARM register from S register */
-- return \"fmrs\\t%0, %1\";
-+ return \"vmov\\t%0, %1\";
- case 6: /* ARM register from constant */
- {
- REAL_VALUE_TYPE r;
-@@ -322,7 +322,7 @@
-
- (define_insn "*movsf_vfp"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t ,Uv,r ,m,t,r")
-- (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))]
-+ (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))]
- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
- && ( s_register_operand (operands[0], SFmode)
- || s_register_operand (operands[1], SFmode))"
-@@ -330,11 +330,11 @@
- switch (which_alternative)
- {
- case 0:
-- return \"fmsr%?\\t%0, %1\";
-+ return \"vmov%?\\t%0, %1\";
- case 1:
-- return \"fmrs%?\\t%0, %1\";
-+ return \"vmov%?\\t%0, %1\";
- case 2:
-- return \"fconsts%?\\t%0, #%G1\";
-+ return \"vmov%?.f32\\t%0, %1\";
- case 3: case 4:
- return output_move_vfp (operands);
- case 5:
-@@ -342,7 +342,7 @@
- case 6:
- return \"str%?\\t%1, %0\\t%@ float\";
- case 7:
-- return \"fcpys%?\\t%0, %1\";
-+ return \"vmov%?.f32\\t%0, %1\";
- case 8:
- return \"mov%?\\t%0, %1\\t%@ float\";
- default:
-@@ -366,11 +366,11 @@
- switch (which_alternative)
- {
- case 0:
-- return \"fmsr%?\\t%0, %1\";
-+ return \"vmov%?\\t%0, %1\";
- case 1:
-- return \"fmrs%?\\t%0, %1\";
-+ return \"vmov%?\\t%0, %1\";
- case 2:
-- return \"fconsts%?\\t%0, #%G1\";
-+ return \"vmov%?.f32\\t%0, %1\";
- case 3: case 4:
- return output_move_vfp (operands);
- case 5:
-@@ -378,7 +378,7 @@
- case 6:
- return \"str%?\\t%1, %0\\t%@ float\";
- case 7:
-- return \"fcpys%?\\t%0, %1\";
-+ return \"vmov%?.f32\\t%0, %1\";
- case 8:
- return \"mov%?\\t%0, %1\\t%@ float\";
- default:
-@@ -406,12 +406,12 @@
- switch (which_alternative)
- {
- case 0:
-- return \"fmdrr%?\\t%P0, %Q1, %R1\";
-+ return \"vmov%?\\t%P0, %Q1, %R1\";
- case 1:
-- return \"fmrrd%?\\t%Q0, %R0, %P1\";
-+ return \"vmov%?\\t%Q0, %R0, %P1\";
- case 2:
- gcc_assert (TARGET_VFP_DOUBLE);
-- return \"fconstd%?\\t%P0, #%G1\";
-+ return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
- return output_move_vfp (operands);
- case 5: case 6:
-@@ -418,9 +418,9 @@
- return output_move_double (operands, true, NULL);
- case 7:
- if (TARGET_VFP_SINGLE)
-- return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
-+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
- else
-- return \"fcpyd%?\\t%P0, %P1\";
-+ return \"vmov%?.f64\\t%P0, %P1\";
- case 8:
- return \"#\";
- default:
-@@ -453,12 +453,12 @@
- switch (which_alternative)
- {
- case 0:
-- return \"fmdrr%?\\t%P0, %Q1, %R1\";
-+ return \"vmov%?\\t%P0, %Q1, %R1\";
- case 1:
-- return \"fmrrd%?\\t%Q0, %R0, %P1\";
-+ return \"vmov%?\\t%Q0, %R0, %P1\";
- case 2:
- gcc_assert (TARGET_VFP_DOUBLE);
-- return \"fconstd%?\\t%P0, #%G1\";
-+ return \"vmov%?.f64\\t%P0, %1\";
- case 3: case 4:
- return output_move_vfp (operands);
- case 5: case 6: case 8:
-@@ -465,9 +465,9 @@
- return output_move_double (operands, true, NULL);
- case 7:
- if (TARGET_VFP_SINGLE)
-- return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
-+ return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
- else
-- return \"fcpyd%?\\t%P0, %P1\";
-+ return \"vmov%?.f64\\t%P0, %P1\";
- default:
- abort ();
- }
-@@ -498,15 +498,15 @@
- (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
- "@
-- fcpys%D3\\t%0, %2
-- fcpys%d3\\t%0, %1
-- fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-- fmsr%D3\\t%0, %2
-- fmsr%d3\\t%0, %1
-- fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-- fmrs%D3\\t%0, %2
-- fmrs%d3\\t%0, %1
-- fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
-+ vmov%D3.f32\\t%0, %2
-+ vmov%d3.f32\\t%0, %1
-+ vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
-+ vmov%D3\\t%0, %2
-+ vmov%d3\\t%0, %1
-+ vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
-+ vmov%D3\\t%0, %2
-+ vmov%d3\\t%0, %1
-+ vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
- [(set_attr "conds" "use")
- (set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
-@@ -521,15 +521,15 @@
- (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
- "@
-- it\\t%D3\;fcpys%D3\\t%0, %2
-- it\\t%d3\;fcpys%d3\\t%0, %1
-- ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-- it\\t%D3\;fmsr%D3\\t%0, %2
-- it\\t%d3\;fmsr%d3\\t%0, %1
-- ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-- it\\t%D3\;fmrs%D3\\t%0, %2
-- it\\t%d3\;fmrs%d3\\t%0, %1
-- ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
-+ it\\t%D3\;vmov%D3.f32\\t%0, %2
-+ it\\t%d3\;vmov%d3.f32\\t%0, %1
-+ ite\\t%D3\;vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
-+ it\\t%D3\;vmov%D3\\t%0, %2
-+ it\\t%d3\;vmov%d3\\t%0, %1
-+ ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
-+ it\\t%D3\;vmov%D3\\t%0, %2
-+ it\\t%d3\;vmov%d3\\t%0, %1
-+ ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
- [(set_attr "conds" "use")
- (set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
-@@ -544,15 +544,15 @@
- (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
- "@
-- fcpyd%D3\\t%P0, %P2
-- fcpyd%d3\\t%P0, %P1
-- fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-- fmdrr%D3\\t%P0, %Q2, %R2
-- fmdrr%d3\\t%P0, %Q1, %R1
-- fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-- fmrrd%D3\\t%Q0, %R0, %P2
-- fmrrd%d3\\t%Q0, %R0, %P1
-- fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
-+ vmov%D3.f64\\t%P0, %P2
-+ vmov%d3.f64\\t%P0, %P1
-+ vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
-+ vmov%D3\\t%P0, %Q2, %R2
-+ vmov%d3\\t%P0, %Q1, %R1
-+ vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
-+ vmov%D3\\t%Q0, %R0, %P2
-+ vmov%d3\\t%Q0, %R0, %P1
-+ vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
- [(set_attr "conds" "use")
- (set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
-@@ -567,15 +567,15 @@
- (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
- "@
-- it\\t%D3\;fcpyd%D3\\t%P0, %P2
-- it\\t%d3\;fcpyd%d3\\t%P0, %P1
-- ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-- it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
-- it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
-- ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-- it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
-- it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
-- ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
-+ it\\t%D3\;vmov%D3.f64\\t%P0, %P2
-+ it\\t%d3\;vmov%d3.f64\\t%P0, %P1
-+ ite\\t%D3\;vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
-+ it\t%D3\;vmov%D3\\t%P0, %Q2, %R2
-+ it\t%d3\;vmov%d3\\t%P0, %Q1, %R1
-+ ite\\t%D3\;vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
-+ it\t%D3\;vmov%D3\\t%Q0, %R0, %P2
-+ it\t%d3\;vmov%d3\\t%Q0, %R0, %P1
-+ ite\\t%D3\;vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
- [(set_attr "conds" "use")
- (set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
-@@ -588,7 +588,7 @@
- [(set (match_operand:SF 0 "s_register_operand" "=t")
- (abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fabss%?\\t%0, %1"
-+ "vabs%?.f32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "ffariths")]
-@@ -598,7 +598,7 @@
- [(set (match_operand:DF 0 "s_register_operand" "=w")
- (abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fabsd%?\\t%P0, %P1"
-+ "vabs%?.f64\\t%P0, %P1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "ffarithd")]
-@@ -609,7 +609,7 @@
- (neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
- "@
-- fnegs%?\\t%0, %1
-+ vneg%?.f32\\t%0, %1
- eor%?\\t%0, %1, #-2147483648"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
-@@ -621,7 +621,7 @@
- (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
- "@
-- fnegd%?\\t%P0, %P1
-+ vneg%?.f64\\t%P0, %P1
- #
- #"
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
-@@ -671,7 +671,7 @@
- (plus:SF (match_operand:SF 1 "s_register_operand" "t")
- (match_operand:SF 2 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fadds%?\\t%0, %1, %2"
-+ "vadd%?.f32\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fadds")]
-@@ -682,7 +682,7 @@
- (plus:DF (match_operand:DF 1 "s_register_operand" "w")
- (match_operand:DF 2 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "faddd%?\\t%P0, %P1, %P2"
-+ "vadd%?.f64\\t%P0, %P1, %P2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "faddd")]
-@@ -694,7 +694,7 @@
- (minus:SF (match_operand:SF 1 "s_register_operand" "t")
- (match_operand:SF 2 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fsubs%?\\t%0, %1, %2"
-+ "vsub%?.f32\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fadds")]
-@@ -705,7 +705,7 @@
- (minus:DF (match_operand:DF 1 "s_register_operand" "w")
- (match_operand:DF 2 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fsubd%?\\t%P0, %P1, %P2"
-+ "vsub%?.f64\\t%P0, %P1, %P2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "faddd")]
-@@ -719,7 +719,7 @@
- (div:SF (match_operand:SF 1 "s_register_operand" "t")
- (match_operand:SF 2 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fdivs%?\\t%0, %1, %2"
-+ "vdiv%?.f32\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fdivs")]
-@@ -730,7 +730,7 @@
- (div:DF (match_operand:DF 1 "s_register_operand" "w")
- (match_operand:DF 2 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fdivd%?\\t%P0, %P1, %P2"
-+ "vdiv%?.f64\\t%P0, %P1, %P2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fdivd")]
-@@ -744,7 +744,7 @@
- (mult:SF (match_operand:SF 1 "s_register_operand" "t")
- (match_operand:SF 2 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fmuls%?\\t%0, %1, %2"
-+ "vmul%?.f32\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmuls")]
-@@ -755,7 +755,7 @@
- (mult:DF (match_operand:DF 1 "s_register_operand" "w")
- (match_operand:DF 2 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fmuld%?\\t%P0, %P1, %P2"
-+ "vmul%?.f64\\t%P0, %P1, %P2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmuld")]
-@@ -766,7 +766,7 @@
- (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
- (match_operand:SF 2 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fnmuls%?\\t%0, %1, %2"
-+ "vnmul%?.f32\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmuls")]
-@@ -777,7 +777,7 @@
- (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
- (match_operand:DF 2 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fnmuld%?\\t%P0, %P1, %P2"
-+ "vnmul%?.f64\\t%P0, %P1, %P2"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmuld")]
-@@ -793,7 +793,7 @@
- (match_operand:SF 3 "s_register_operand" "t"))
- (match_operand:SF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fmacs%?\\t%0, %2, %3"
-+ "vmla%?.f32\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacs")]
-@@ -805,7 +805,7 @@
- (match_operand:DF 3 "s_register_operand" "w"))
- (match_operand:DF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fmacd%?\\t%P0, %P2, %P3"
-+ "vmla%?.f64\\t%P0, %P2, %P3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacd")]
-@@ -818,7 +818,7 @@
- (match_operand:SF 3 "s_register_operand" "t"))
- (match_operand:SF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fmscs%?\\t%0, %2, %3"
-+ "vnmls%?.f32\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacs")]
-@@ -830,7 +830,7 @@
- (match_operand:DF 3 "s_register_operand" "w"))
- (match_operand:DF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fmscd%?\\t%P0, %P2, %P3"
-+ "vnmls%?.f64\\t%P0, %P2, %P3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacd")]
-@@ -843,7 +843,7 @@
- (mult:SF (match_operand:SF 2 "s_register_operand" "t")
- (match_operand:SF 3 "s_register_operand" "t"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fnmacs%?\\t%0, %2, %3"
-+ "vmls%?.f32\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacs")]
-@@ -855,7 +855,7 @@
- (mult:DF (match_operand:DF 2 "s_register_operand" "w")
- (match_operand:DF 3 "s_register_operand" "w"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fnmacd%?\\t%P0, %P2, %P3"
-+ "vmls%?.f64\\t%P0, %P2, %P3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacd")]
-@@ -870,7 +870,7 @@
- (match_operand:SF 3 "s_register_operand" "t"))
- (match_operand:SF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fnmscs%?\\t%0, %2, %3"
-+ "vnmla%?.f32\\t%0, %2, %3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacs")]
-@@ -883,7 +883,7 @@
- (match_operand:DF 3 "s_register_operand" "w"))
- (match_operand:DF 1 "s_register_operand" "0")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fnmscd%?\\t%P0, %P2, %P3"
-+ "vnmla%?.f64\\t%P0, %P2, %P3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fmacd")]
-@@ -948,7 +948,7 @@
- [(set (match_operand:DF 0 "s_register_operand" "=w")
- (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fcvtds%?\\t%P0, %1"
-+ "vcvt%?.f64.f32\\t%P0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvt")]
-@@ -958,7 +958,7 @@
- [(set (match_operand:SF 0 "s_register_operand" "=t")
- (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fcvtsd%?\\t%0, %P1"
-+ "vcvt%?.f32.f64\\t%0, %P1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvt")]
-@@ -988,7 +988,7 @@
- [(set (match_operand:SI 0 "s_register_operand" "=t")
- (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "ftosizs%?\\t%0, %1"
-+ "vcvt%?.s32.f32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvtf2i")]
-@@ -998,7 +998,7 @@
- [(set (match_operand:SI 0 "s_register_operand" "=t")
- (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "ftosizd%?\\t%0, %P1"
-+ "vcvt%?.s32.f64\\t%0, %P1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvtf2i")]
-@@ -1009,7 +1009,7 @@
- [(set (match_operand:SI 0 "s_register_operand" "=t")
- (unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "ftouizs%?\\t%0, %1"
-+ "vcvt%?.u32.f32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvtf2i")]
-@@ -1019,7 +1019,7 @@
- [(set (match_operand:SI 0 "s_register_operand" "=t")
- (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "ftouizd%?\\t%0, %P1"
-+ "vcvt%?.u32.f64\\t%0, %P1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvtf2i")]
-@@ -1030,7 +1030,7 @@
- [(set (match_operand:SF 0 "s_register_operand" "=t")
- (float:SF (match_operand:SI 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fsitos%?\\t%0, %1"
-+ "vcvt%?.f32.s32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvti2f")]
-@@ -1040,7 +1040,7 @@
- [(set (match_operand:DF 0 "s_register_operand" "=w")
- (float:DF (match_operand:SI 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fsitod%?\\t%P0, %1"
-+ "vcvt%?.f64.s32\\t%P0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvti2f")]
-@@ -1051,7 +1051,7 @@
- [(set (match_operand:SF 0 "s_register_operand" "=t")
- (unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fuitos%?\\t%0, %1"
-+ "vcvt%?.f32.u32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvti2f")]
-@@ -1061,7 +1061,7 @@
- [(set (match_operand:DF 0 "s_register_operand" "=w")
- (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fuitod%?\\t%P0, %1"
-+ "vcvt%?.f64.u32\\t%P0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "f_cvti2f")]
-@@ -1074,7 +1074,7 @@
- [(set (match_operand:SF 0 "s_register_operand" "=t")
- (sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fsqrts%?\\t%0, %1"
-+ "vsqrt%?.f32\\t%0, %1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fsqrts")]
-@@ -1084,7 +1084,7 @@
- [(set (match_operand:DF 0 "s_register_operand" "=w")
- (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-- "fsqrtd%?\\t%P0, %P1"
-+ "vsqrt%?.f64\\t%P0, %P1"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fsqrtd")]
-@@ -1097,7 +1097,7 @@
- [(set (reg CC_REGNUM)
- (reg VFPCC_REGNUM))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "fmstat%?"
-+ "vmrs%?\\tAPSR_nzcv, FPSCR"
- [(set_attr "conds" "set")
- (set_attr "type" "f_flag")]
- )
-@@ -1165,6 +1165,9 @@
-
- ;; Comparison patterns
-
-+;; In the compare with FP zero case the ARM Architecture Reference Manual
-+;; specifies the immediate to be #0.0. However, some buggy assemblers only
-+;; accept #0. We don't want to autodetect broken assemblers, so output #0.
- (define_insn "*cmpsf_vfp"
- [(set (reg:CCFP VFPCC_REGNUM)
- (compare:CCFP (match_operand:SF 0 "s_register_operand" "t,t")
-@@ -1171,8 +1174,8 @@
- (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
- "@
-- fcmps%?\\t%0, %1
-- fcmpzs%?\\t%0"
-+ vcmp%?.f32\\t%0, %1
-+ vcmp%?.f32\\t%0, #0"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fcmps")]
-@@ -1184,8 +1187,8 @@
- (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
- "@
-- fcmpes%?\\t%0, %1
-- fcmpezs%?\\t%0"
-+ vcmpe%?.f32\\t%0, %1
-+ vcmpe%?.f32\\t%0, #0"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fcmps")]
-@@ -1197,8 +1200,8 @@
- (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
- "@
-- fcmpd%?\\t%P0, %P1
-- fcmpzd%?\\t%P0"
-+ vcmp%?.f64\\t%P0, %P1
-+ vcmp%?.f64\\t%P0, #0"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fcmpd")]
-@@ -1210,8 +1213,8 @@
- (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
- "@
-- fcmped%?\\t%P0, %P1
-- fcmpezd%?\\t%P0"
-+ vcmpe%?.f64\\t%P0, %P1
-+ vcmpe%?.f64\\t%P0, #0"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "type" "fcmpd")]
-@@ -1272,7 +1275,7 @@
- (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
- UNSPEC_PUSH_MULT))])]
- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-- "* return vfp_output_fstmd (operands);"
-+ "* return vfp_output_vstmd (operands);"
- [(set_attr "type" "f_stored")]
- )
-
-@@ -1285,7 +1288,7 @@
- (unspec:SDF [(match_operand:SDF 1
- "register_operand" "<F_constraint>")]
- VRINT))]
-- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
- "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
- [(set_attr "predicable" "<vrint_predicable>")
- (set_attr "predicable_short_it" "no")
-@@ -1293,6 +1296,18 @@
- (set_attr "conds" "<vrint_conds>")]
- )
-
-+;; Implements the lround, lfloor and lceil optabs.
-+(define_insn "l<vrint_pattern><su_optab><mode>si2"
-+ [(set (match_operand:SI 0 "register_operand" "=t")
-+ (FIXUORS:SI (unspec:SDF
-+ [(match_operand:SDF 1
-+ "register_operand" "<F_constraint>")] VCVT)))]
-+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+ "vcvt<vrint_variant>%?.<su>32.<V_if_elem>\\t%0, %<V_reg>1"
-+ [(set_attr "predicable" "no")
-+ (set_attr "type" "f_cvtf2i")]
-+)
-+
- ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
- ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
- ;; operand will be returned when both operands are zero (i.e. they may not
-@@ -1304,7 +1319,7 @@
- [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
- (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
- (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
-- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
- "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "f_minmax<vfp_type>")
- (set_attr "conds" "unconditional")]
-@@ -1314,12 +1329,28 @@
- [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
- (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
- (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
-- "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+ "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
- "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "f_minmax<vfp_type>")
- (set_attr "conds" "unconditional")]
- )
-
-+;; Write Floating-point Status and Control Register.
-+(define_insn "set_fpscr"
-+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
-+ "TARGET_VFP && TARGET_HARD_FLOAT"
-+ "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
-+ [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Status and Control Register.
-+(define_insn "get_fpscr"
-+ [(set (match_operand:SI 0 "register_operand" "=r")
-+ (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
-+ "TARGET_VFP && TARGET_HARD_FLOAT"
-+ "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
-+ [(set_attr "type" "mrs")])
-+
-+
- ;; Unimplemented insns:
- ;; fldm*
- ;; fstm*
---- a/src/gcc/config/arm/neon.md
-+++ b/src/gcc/config/arm/neon.md
-@@ -296,7 +296,7 @@
- UNSPEC_MISALIGNED_ACCESS))]
- "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
- "vld1.<V_sz_elem>\t{%q0}, %A1"
-- [(set_attr "type" "neon_store1_1reg<q>")])
-+ [(set_attr "type" "neon_load1_1reg<q>")])
-
- (define_insn "vec_set<mode>_internal"
- [(set (match_operand:VD 0 "s_register_operand" "=w,w")
-@@ -629,6 +629,17 @@
- [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
- )
-
-+(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
-+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-+ (FIXUORS:<V_cmp_result> (unspec:VCVTF
-+ [(match_operand:VCVTF 1 "register_operand" "w")]
-+ NEON_VCVT)))]
-+ "TARGET_NEON && TARGET_FPU_ARMV8"
-+ "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
-+ [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
-+ (set_attr "predicable" "no")]
-+)
-+
- (define_insn "ior<mode>3"
- [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
- (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
-@@ -1041,7 +1052,9 @@
- }
- else
- {
-- if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
-+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-+ && (!reg_overlap_mentioned_p (operands[0], operands[1])
-+ || REGNO (operands[0]) == REGNO (operands[1])))
- /* This clobbers CC. */
- emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
- else
-@@ -1141,7 +1154,9 @@
- }
- else
- {
-- if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
-+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-+ && (!reg_overlap_mentioned_p (operands[0], operands[1])
-+ || REGNO (operands[0]) == REGNO (operands[1])))
- /* This clobbers CC. */
- emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
- else
-@@ -1334,33 +1349,47 @@
-
- ;; Reduction operations
-
--(define_expand "reduc_splus_<mode>"
-- [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_plus_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
-- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+ rtx vec = gen_reg_rtx (<MODE>mode);
-+ neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- &gen_neon_vpadd_internal<mode>);
-+ /* The same result is actually computed into every element. */
-+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
- DONE;
- })
-
--(define_expand "reduc_splus_<mode>"
-- [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_plus_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
- {
- rtx step1 = gen_reg_rtx (<V_HALF>mode);
-- rtx res_d = gen_reg_rtx (<V_HALF>mode);
-
- emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
-- emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
-- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+ emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
-
- DONE;
- })
-
--(define_insn "reduc_splus_v2di"
-+(define_expand "reduc_plus_scal_v2di"
-+ [(match_operand:DI 0 "nonimmediate_operand" "=w")
-+ (match_operand:V2DI 1 "s_register_operand" "")]
-+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
-+{
-+ rtx vec = gen_reg_rtx (V2DImode);
-+
-+ emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
-+ emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
-+
-+ DONE;
-+})
-+
-+(define_insn "arm_reduc_plus_internal_v2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=w")
- (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
- UNSPEC_VPADD))]
-@@ -1369,115 +1398,109 @@
- [(set_attr "type" "neon_add_q")]
- )
-
--;; NEON does not distinguish between signed and unsigned addition except on
--;; widening operations.
--(define_expand "reduc_uplus_<mode>"
-- [(match_operand:VDQI 0 "s_register_operand" "")
-- (match_operand:VDQI 1 "s_register_operand" "")]
-- "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
--{
-- emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
-- DONE;
--})
--
--(define_expand "reduc_smin_<mode>"
-- [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_smin_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
-- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+ rtx vec = gen_reg_rtx (<MODE>mode);
-+
-+ neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- &gen_neon_vpsmin<mode>);
-+ /* The result is computed into every element of the vector. */
-+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
- DONE;
- })
-
--(define_expand "reduc_smin_<mode>"
-- [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_smin_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
- {
- rtx step1 = gen_reg_rtx (<V_HALF>mode);
-- rtx res_d = gen_reg_rtx (<V_HALF>mode);
-
- emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
-- emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
-- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+ emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
-
- DONE;
- })
-
--(define_expand "reduc_smax_<mode>"
-- [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_smax_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
-- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+ rtx vec = gen_reg_rtx (<MODE>mode);
-+ neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- &gen_neon_vpsmax<mode>);
-+ /* The result is computed into every element of the vector. */
-+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
- DONE;
- })
-
--(define_expand "reduc_smax_<mode>"
-- [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_smax_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
- && !BYTES_BIG_ENDIAN"
- {
- rtx step1 = gen_reg_rtx (<V_HALF>mode);
-- rtx res_d = gen_reg_rtx (<V_HALF>mode);
-
- emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
-- emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
-- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+ emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
-
- DONE;
- })
-
--(define_expand "reduc_umin_<mode>"
-- [(match_operand:VDI 0 "s_register_operand" "")
-+(define_expand "reduc_umin_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VDI 1 "s_register_operand" "")]
- "TARGET_NEON"
- {
-- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+ rtx vec = gen_reg_rtx (<MODE>mode);
-+ neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- &gen_neon_vpumin<mode>);
-+ /* The result is computed into every element of the vector. */
-+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
- DONE;
- })
-
--(define_expand "reduc_umin_<mode>"
-- [(match_operand:VQI 0 "s_register_operand" "")
-+(define_expand "reduc_umin_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VQI 1 "s_register_operand" "")]
- "TARGET_NEON && !BYTES_BIG_ENDIAN"
- {
- rtx step1 = gen_reg_rtx (<V_HALF>mode);
-- rtx res_d = gen_reg_rtx (<V_HALF>mode);
-
- emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
-- emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
-- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+ emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
-
- DONE;
- })
-
--(define_expand "reduc_umax_<mode>"
-- [(match_operand:VDI 0 "s_register_operand" "")
-+(define_expand "reduc_umax_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VDI 1 "s_register_operand" "")]
- "TARGET_NEON"
- {
-- neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+ rtx vec = gen_reg_rtx (<MODE>mode);
-+ neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- &gen_neon_vpumax<mode>);
-+ /* The result is computed into every element of the vector. */
-+ emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
- DONE;
- })
-
--(define_expand "reduc_umax_<mode>"
-- [(match_operand:VQI 0 "s_register_operand" "")
-+(define_expand "reduc_umax_scal_<mode>"
-+ [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
- (match_operand:VQI 1 "s_register_operand" "")]
- "TARGET_NEON && !BYTES_BIG_ENDIAN"
- {
- rtx step1 = gen_reg_rtx (<V_HALF>mode);
-- rtx res_d = gen_reg_rtx (<V_HALF>mode);
-
- emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
-- emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
-- emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+ emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
-
- DONE;
- })
-@@ -1842,9 +1865,9 @@
- ; good for plain vadd, vaddq.
-
- (define_expand "neon_vadd<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "=w")
-- (match_operand:VDQX 1 "s_register_operand" "w")
-- (match_operand:VDQX 2 "s_register_operand" "w")
-+ [(match_operand:VCVTF 0 "s_register_operand" "=w")
-+ (match_operand:VCVTF 1 "s_register_operand" "w")
-+ (match_operand:VCVTF 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
- "TARGET_NEON"
- {
-@@ -1869,9 +1892,9 @@
- ; Used for intrinsics when flag_unsafe_math_optimizations is false.
-
- (define_insn "neon_vadd<mode>_unspec"
-- [(set (match_operand:VDQX 0 "s_register_operand" "=w")
-- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
-- (match_operand:VDQX 2 "s_register_operand" "w")]
-+ [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
-+ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
-+ (match_operand:VCVTF 2 "s_register_operand" "w")]
- UNSPEC_VADD))]
- "TARGET_NEON"
- "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-@@ -2132,9 +2155,9 @@
- )
-
- (define_expand "neon_vsub<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "=w")
-- (match_operand:VDQX 1 "s_register_operand" "w")
-- (match_operand:VDQX 2 "s_register_operand" "w")
-+ [(match_operand:VCVTF 0 "s_register_operand" "=w")
-+ (match_operand:VCVTF 1 "s_register_operand" "w")
-+ (match_operand:VCVTF 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
- "TARGET_NEON"
- {
-@@ -2149,9 +2172,9 @@
- ; Used for intrinsics when flag_unsafe_math_optimizations is false.
-
- (define_insn "neon_vsub<mode>_unspec"
-- [(set (match_operand:VDQX 0 "s_register_operand" "=w")
-- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
-- (match_operand:VDQX 2 "s_register_operand" "w")]
-+ [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
-+ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
-+ (match_operand:VCVTF 2 "s_register_operand" "w")]
- UNSPEC_VSUB))]
- "TARGET_NEON"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-@@ -2547,6 +2570,14 @@
- [(set_attr "type" "neon_qabs<q>")]
- )
-
-+(define_insn "neon_bswap<mode>"
-+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
-+ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
-+ "TARGET_NEON"
-+ "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
-+ [(set_attr "type" "neon_rev<q>")]
-+)
-+
- (define_expand "neon_vneg<mode>"
- [(match_operand:VDQW 0 "s_register_operand" "")
- (match_operand:VDQW 1 "s_register_operand" "")
-@@ -2557,6 +2588,33 @@
- DONE;
- })
-
-+(define_expand "neon_copysignf<mode>"
-+ [(match_operand:VCVTF 0 "register_operand")
-+ (match_operand:VCVTF 1 "register_operand")
-+ (match_operand:VCVTF 2 "register_operand")]
-+ "TARGET_NEON"
-+ "{
-+ rtx v_bitmask_cast;
-+ rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
-+ int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
-+ rtvec v = rtvec_alloc (n_elt);
-+
-+ /* Create bitmask for vector select. */
-+ for (i = 0; i < n_elt; ++i)
-+ RTVEC_ELT (v, i) = GEN_INT (0x80000000);
-+
-+ emit_move_insn (v_bitmask,
-+ gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
-+ emit_move_insn (operands[0], operands[2]);
-+ v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
-+ <VCVTF:V_cmp_result>mode, 0);
-+ emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
-+ operands[1]));
-+
-+ DONE;
-+ }"
-+)
-+
- (define_insn "neon_vqneg<mode>"
- [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
-@@ -4140,17 +4198,6 @@
- [(set_attr "type" "neon_permute<q>")]
- )
-
--(define_expand "neon_vtrn<mode>"
-- [(match_operand:SI 0 "s_register_operand" "r")
-- (match_operand:VDQW 1 "s_register_operand" "w")
-- (match_operand:VDQW 2 "s_register_operand" "w")]
-- "TARGET_NEON"
--{
-- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
-- operands[0], operands[1], operands[2]);
-- DONE;
--})
--
- (define_expand "neon_vzip<mode>_internal"
- [(parallel
- [(set (match_operand:VDQW 0 "s_register_operand" "")
-@@ -4177,17 +4224,6 @@
- [(set_attr "type" "neon_zip<q>")]
- )
-
--(define_expand "neon_vzip<mode>"
-- [(match_operand:SI 0 "s_register_operand" "r")
-- (match_operand:VDQW 1 "s_register_operand" "w")
-- (match_operand:VDQW 2 "s_register_operand" "w")]
-- "TARGET_NEON"
--{
-- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
-- operands[0], operands[1], operands[2]);
-- DONE;
--})
--
- (define_expand "neon_vuzp<mode>_internal"
- [(parallel
- [(set (match_operand:VDQW 0 "s_register_operand" "")
-@@ -4214,17 +4250,6 @@
- [(set_attr "type" "neon_zip<q>")]
- )
-
--(define_expand "neon_vuzp<mode>"
-- [(match_operand:SI 0 "s_register_operand" "r")
-- (match_operand:VDQW 1 "s_register_operand" "w")
-- (match_operand:VDQW 2 "s_register_operand" "w")]
-- "TARGET_NEON"
--{
-- neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
-- operands[0], operands[1], operands[2]);
-- DONE;
--})
--
- (define_expand "neon_vreinterpretv8qi<mode>"
- [(match_operand:V8QI 0 "s_register_operand" "")
- (match_operand:VDX 1 "s_register_operand" "")]
-@@ -5357,61 +5382,6 @@
- [(set_attr "type" "neon_store4_4reg<q>")]
- )
-
--(define_expand "neon_vand<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "")
-- (match_operand:VDQX 1 "s_register_operand" "")
-- (match_operand:VDQX 2 "neon_inv_logic_op2" "")
-- (match_operand:SI 3 "immediate_operand" "")]
-- "TARGET_NEON"
--{
-- emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
-- DONE;
--})
--
--(define_expand "neon_vorr<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "")
-- (match_operand:VDQX 1 "s_register_operand" "")
-- (match_operand:VDQX 2 "neon_logic_op2" "")
-- (match_operand:SI 3 "immediate_operand" "")]
-- "TARGET_NEON"
--{
-- emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
-- DONE;
--})
--
--(define_expand "neon_veor<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "")
-- (match_operand:VDQX 1 "s_register_operand" "")
-- (match_operand:VDQX 2 "s_register_operand" "")
-- (match_operand:SI 3 "immediate_operand" "")]
-- "TARGET_NEON"
--{
-- emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
-- DONE;
--})
--
--(define_expand "neon_vbic<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "")
-- (match_operand:VDQX 1 "s_register_operand" "")
-- (match_operand:VDQX 2 "neon_logic_op2" "")
-- (match_operand:SI 3 "immediate_operand" "")]
-- "TARGET_NEON"
--{
-- emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
-- DONE;
--})
--
--(define_expand "neon_vorn<mode>"
-- [(match_operand:VDQX 0 "s_register_operand" "")
-- (match_operand:VDQX 1 "s_register_operand" "")
-- (match_operand:VDQX 2 "neon_inv_logic_op2" "")
-- (match_operand:SI 3 "immediate_operand" "")]
-- "TARGET_NEON"
--{
-- emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
-- DONE;
--})
--
- (define_insn "neon_vec_unpack<US>_lo_<mode>"
- [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
- (SE:<V_unpack> (vec_select:<V_HALF>
---- a/src/gcc/config/arm/types.md
-+++ b/src/gcc/config/arm/types.md
-@@ -66,7 +66,6 @@
- ; f_mrc transfer vfp to arm reg.
- ; f_mrrc transfer vfp to two arm regs.
- ; f_rint[d,s] double/single floating point rount to integral.
--; f_sel[d,s] double/single floating byte select.
- ; f_store[d,s] double/single store to memory. Used for VFP unit.
- ; fadd[d,s] double/single floating-point scalar addition.
- ; fcmp[d,s] double/single floating-point compare.
-@@ -571,8 +570,6 @@
- f_mrrc,\
- f_rintd,\
- f_rints,\
-- f_seld,\
-- f_sels,\
- f_stored,\
- f_stores,\
- faddd,\
---- a/src/gcc/config/arm/arm_neon_builtins.def
-+++ b/src/gcc/config/arm/arm_neon_builtins.def
-@@ -18,8 +18,7 @@
- along with GCC; see the file COPYING3. If not see
- <http://www.gnu.org/licenses/>. */
-
--VAR10 (BINOP, vadd,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (BINOP, vadd, v2sf, v4sf),
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
-@@ -54,7 +53,7 @@
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
--VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (BINOP, vsub, v2sf, v4sf),
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
-@@ -89,6 +88,7 @@
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
-+VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
- VAR2 (UNOP, vcnt, v8qi, v16qi),
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
-@@ -135,6 +135,7 @@
- VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
- VAR10 (SELECT, vbsl,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (COPYSIGNF, copysignf, v2sf, v4sf),
- VAR2 (RINT, vrintn, v2sf, v4sf),
- VAR2 (RINT, vrinta, v2sf, v4sf),
- VAR2 (RINT, vrintp, v2sf, v4sf),
-@@ -141,6 +142,18 @@
- VAR2 (RINT, vrintm, v2sf, v4sf),
- VAR2 (RINT, vrintz, v2sf, v4sf),
- VAR2 (RINT, vrintx, v2sf, v4sf),
-+VAR1 (RINT, vcvtav2sf, v2si),
-+VAR1 (RINT, vcvtav4sf, v4si),
-+VAR1 (RINT, vcvtauv2sf, v2si),
-+VAR1 (RINT, vcvtauv4sf, v4si),
-+VAR1 (RINT, vcvtpv2sf, v2si),
-+VAR1 (RINT, vcvtpv4sf, v4si),
-+VAR1 (RINT, vcvtpuv2sf, v2si),
-+VAR1 (RINT, vcvtpuv4sf, v4si),
-+VAR1 (RINT, vcvtmv2sf, v2si),
-+VAR1 (RINT, vcvtmv4sf, v4si),
-+VAR1 (RINT, vcvtmuv2sf, v2si),
-+VAR1 (RINT, vcvtmuv4sf, v4si),
- VAR1 (VTBL, vtbl1, v8qi),
- VAR1 (VTBL, vtbl2, v8qi),
- VAR1 (VTBL, vtbl3, v8qi),
-@@ -149,9 +162,6 @@
- VAR1 (VTBX, vtbx2, v8qi),
- VAR1 (VTBX, vtbx3, v8qi),
- VAR1 (VTBX, vtbx4, v8qi),
--VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
--VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
--VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
-@@ -199,14 +209,4 @@
- VAR9 (STORESTRUCT, vst4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (STORESTRUCTLANE, vst4_lane,
-- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
--VAR10 (LOGICBINOP, vand,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vorr,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (BINOP, veor,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vbic,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vorn,
-- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
---- a/src/gcc/config/arm/cortex-a7.md
-+++ b/src/gcc/config/arm/cortex-a7.md
-@@ -137,7 +137,7 @@
- (and (eq_attr "tune" "cortexa7")
- (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- bfm,rev,\
-+ bfm,clz,rbit,rev,\
- shift_imm,shift_reg,mov_reg,mvn_reg"))
- "cortex_a7_ex1")
-
---- a/src/gcc/config/arm/aarch-common-protos.h
-+++ b/src/gcc/config/arm/aarch-common-protos.h
-@@ -24,6 +24,9 @@
- #define GCC_AARCH_COMMON_PROTOS_H
-
- extern int aarch_crypto_can_dual_issue (rtx, rtx);
-+extern bool aarch_rev16_p (rtx);
-+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
-+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
- extern int arm_early_load_addr_dep (rtx, rtx);
- extern int arm_early_store_addr_dep (rtx, rtx);
- extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
-@@ -54,6 +57,7 @@
- const int bfi; /* Bit-field insert. */
- const int bfx; /* Bit-field extraction. */
- const int clz; /* Count Leading Zeros. */
-+ const int rev; /* Reverse bits/bytes. */
- const int non_exec; /* Extra cost when not executing insn. */
- const bool non_exec_costs_exec; /* True if non-execution must add the exec
- cost. */
---- a/src/gcc/config/arm/predicates.md
-+++ b/src/gcc/config/arm/predicates.md
-@@ -291,6 +291,15 @@
- || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
- (match_test "mode == GET_MODE (op)")))
-
-+(define_special_predicate "shift_nomul_operator"
-+ (and (ior (and (match_code "rotate")
-+ (match_test "CONST_INT_P (XEXP (op, 1))
-+ && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))
-+ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
-+ (match_test "!CONST_INT_P (XEXP (op, 1))
-+ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
-+ (match_test "mode == GET_MODE (op)")))
-+
- ;; True for shift operators which can be used with saturation instructions.
- (define_special_predicate "sat_shift_operator"
- (and (ior (and (match_code "mult")
-@@ -681,5 +690,6 @@
- (match_code "reg" "0")))
-
- (define_predicate "call_insn_operand"
-- (ior (match_code "symbol_ref")
-+ (ior (and (match_code "symbol_ref")
-+ (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))"))
- (match_operand 0 "s_register_operand")))
---- a/src/gcc/config/arm/arm_neon.h
-+++ b/src/gcc/config/arm/arm_neon.h
-@@ -452,114 +452,121 @@
- } poly64x2x4_t;
- #endif
-
--
--
-+/* vadd */
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vadd_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vadd_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vadd_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vadd_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a + __b;
-+#else
-+ return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3);
-+#endif
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vadd_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vadd_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vadd_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vadd_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vadd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vaddq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vaddq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vaddq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vaddq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1);
-+ return __a + __b;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vaddq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a + __b;
-+#else
-+ return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3);
-+#endif
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a + __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -949,93 +956,102 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vmul_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vmul_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vmul_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vmul_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a * __b;
-+#else
-+ return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3);
-+#endif
-+
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vmul_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a * __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vmul_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a * __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vmul_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a * __b;
- }
-
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vmul_p8 (poly8x8_t __a, poly8x8_t __b)
--{
-- return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
--}
--
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vmulq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmulq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vmulq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1);
-+ return __a * __b;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vmulq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a * __b;
-+#else
-+ return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3);
-+#endif
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a * __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a * __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a * __b;
- }
-
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+ return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
-+}
-+
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
- {
-@@ -1520,112 +1536,121 @@
- }
-
- #endif
-+
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vsub_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vsub_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vsub_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vsub_f32 (float32x2_t __a, float32x2_t __b)
- {
-- return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a - __b;
-+#else
-+ return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3);
-+#endif
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsub_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsub_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsub_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vsub_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsub_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vsubq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vsubq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vsubq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vsubq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1);
-+ return __a - __b;
- }
-
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vsubq_f32 (float32x4_t __a, float32x4_t __b)
- {
-- return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+ return __a - __b;
-+#else
-+ return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3);
-+#endif
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a - __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -11295,484 +11320,483 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vand_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vand_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vand_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vand_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vand_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vand_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vand_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vand_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vandq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vandq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vandq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vandq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vandq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vandq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vandq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vandq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a & __b;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vorr_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vorr_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vorr_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vorr_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vorr_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vorr_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vorr_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vorr_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vorrq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vorrq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vorrq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vorrq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a | __b;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- veor_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- veor_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- veor_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- veor_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- veor_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- veor_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- veor_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_veordi (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- veor_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- veorq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- veorq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- veorq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- veorq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- veorq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- veorq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- veorq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- veorq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a ^ __b;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vbic_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vbic_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vbic_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vbic_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vbic_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vbic_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vbic_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vbic_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vbicq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vbicq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vbicq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vbicq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a & ~__b;
- }
-
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vorn_s8 (int8x8_t __a, int8x8_t __b)
- {
-- return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vorn_s16 (int16x4_t __a, int16x4_t __b)
- {
-- return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vorn_s32 (int32x2_t __a, int32x2_t __b)
- {
-- return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vorn_u8 (uint8x8_t __a, uint8x8_t __b)
- {
-- return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vorn_u16 (uint16x4_t __a, uint16x4_t __b)
- {
-- return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vorn_u32 (uint32x2_t __a, uint32x2_t __b)
- {
-- return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vorn_s64 (int64x1_t __a, int64x1_t __b)
- {
-- return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vorn_u64 (uint64x1_t __a, uint64x1_t __b)
- {
-- return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vornq_s8 (int8x16_t __a, int8x16_t __b)
- {
-- return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vornq_s16 (int16x8_t __a, int16x8_t __b)
- {
-- return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vornq_s32 (int32x4_t __a, int32x4_t __b)
- {
-- return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vornq_s64 (int64x2_t __a, int64x2_t __b)
- {
-- return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vornq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
-- return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vornq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
-- return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vornq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
-- return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+ return __a | ~__b;
- }
-
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vornq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
-- return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+ return __a | ~__b;
- }
-
--
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_p16 (poly16x4_t __a)
- {
---- a/src/gcc/config/arm/aarch-common.c
-+++ b/src/gcc/config/arm/aarch-common.c
-@@ -191,6 +191,83 @@
- return 0;
- }
-
-+bool
-+aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
-+{
-+ return CONST_INT_P (val)
-+ && INTVAL (val)
-+ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
-+ mode);
-+}
-+
-+bool
-+aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
-+{
-+ return CONST_INT_P (val)
-+ && INTVAL (val)
-+ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
-+ mode);
-+}
-+
-+
-+static bool
-+aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
-+{
-+ if (GET_CODE (lhs) == AND
-+ && GET_CODE (XEXP (lhs, 0)) == ASHIFT
-+ && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
-+ && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
-+ && REG_P (XEXP (XEXP (lhs, 0), 0))
-+ && CONST_INT_P (XEXP (lhs, 1))
-+ && GET_CODE (rhs) == AND
-+ && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
-+ && REG_P (XEXP (XEXP (rhs, 0), 0))
-+ && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
-+ && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
-+ && CONST_INT_P (XEXP (rhs, 1))
-+ && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
-+
-+ {
-+ rtx lhs_mask = XEXP (lhs, 1);
-+ rtx rhs_mask = XEXP (rhs, 1);
-+
-+ return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
-+ && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
-+ }
-+
-+ return false;
-+}
-+
-+/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
-+ These will be of the form:
-+ ((x >> 8) & 0x00ff00ff)
-+ | ((x << 8) & 0xff00ff00)
-+ for SImode and with similar but wider bitmasks for DImode.
-+ The two sub-expressions of the IOR can appear on either side so check both
-+ permutations with the help of aarch_rev16_p_1 above. */
-+
-+bool
-+aarch_rev16_p (rtx x)
-+{
-+ rtx left_sub_rtx, right_sub_rtx;
-+ bool is_rev = false;
-+
-+ if (GET_CODE (x) != IOR)
-+ return false;
-+
-+ left_sub_rtx = XEXP (x, 0);
-+ right_sub_rtx = XEXP (x, 1);
-+
-+ /* There are no canonicalisation rules for the position of the two shifts
-+ involved in a rev, so try both permutations. */
-+ is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
-+
-+ if (!is_rev)
-+ is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
-+
-+ return is_rev;
-+}
-+
- /* Return nonzero if the CONSUMER instruction (a load) does need
- PRODUCER's value to calculate the address. */
- int
---- a/src/gcc/config/arm/arm-fpus.def
-+++ b/src/gcc/config/arm/arm-fpus.def
-@@ -37,6 +37,8 @@
- ARM_FPU("vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false)
- ARM_FPU("vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false)
- ARM_FPU("fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false)
-+ARM_FPU("fpv5-sp-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_SINGLE, false, true, false)
-+ARM_FPU("fpv5-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_D16, false, true, false)
- ARM_FPU("neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false)
- ARM_FPU("fp-armv8", ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false)
- ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false)
---- a/src/gcc/config/arm/cortex-a53.md
-+++ b/src/gcc/config/arm/cortex-a53.md
-@@ -75,7 +75,7 @@
- (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,csel,rev,\
-+ adr,bfm,csel,clz,rbit,rev,\
- shift_imm,shift_reg,\
- mov_imm,mov_reg,mvn_imm,mvn_reg,\
- mrs,multiple,no_insn"))
-@@ -84,8 +84,8 @@
- (define_insn_reservation "cortex_a53_alu_shift" 2
- (and (eq_attr "tune" "cortexa53")
- (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
-- logic_shift_imm,logics_shift_imm,\
-- alu_shift_reg,alus_shift_reg,\
-+ crc,logic_shift_imm,logics_shift_imm,\
-+ alu_ext,alus_ext,alu_shift_reg,alus_shift_reg,\
- logic_shift_reg,logics_shift_reg,\
- extend,mov_shift,mov_shift_reg,\
- mvn_shift,mvn_shift_reg"))
-@@ -216,7 +216,8 @@
- (and (eq_attr "tune" "cortexa53")
- (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
- f_cvt,f_cvtf2i,f_cvti2f,\
-- fcmps, fcmpd, fcsel"))
-+ fcmps, fcmpd, fcsel, f_rints, f_rintd, f_minmaxs,\
-+ f_minmaxd"))
- "cortex_a53_slot0+cortex_a53_fpadd_pipe")
-
- (define_insn_reservation "cortex_a53_fconst" 2
---- a/src/gcc/config/arm/bpabi.h
-+++ b/src/gcc/config/arm/bpabi.h
-@@ -73,7 +73,7 @@
- |mcpu=generic-armv7-a \
- |march=armv7ve \
- |march=armv7-m|mcpu=cortex-m3 \
-- |march=armv7e-m|mcpu=cortex-m4 \
-+ |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7 \
- |march=armv6-m|mcpu=cortex-m0 \
- |march=armv8-a \
- :%{!r:--be8}}}"
-@@ -91,7 +91,7 @@
- |mcpu=generic-armv7-a \
- |march=armv7ve \
- |march=armv7-m|mcpu=cortex-m3 \
-- |march=armv7e-m|mcpu=cortex-m4 \
-+ |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7 \
- |march=armv6-m|mcpu=cortex-m0 \
- |march=armv8-a \
- :%{!r:--be8}}}"
---- a/src/gcc/config/arm/iterators.md
-+++ b/src/gcc/config/arm/iterators.md
-@@ -116,6 +116,9 @@
- ;; Vector modes including 64-bit integer elements, but no floats.
- (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
-
-+;; Vector modes for H, S and D types.
-+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
-+
- ;; Vector modes for float->int conversions.
- (define_mode_iterator VCVTF [V2SF V4SF])
-
-@@ -191,6 +194,23 @@
- ;; Right shifts
- (define_code_iterator rshifts [ashiftrt lshiftrt])
-
-+;; Iterator for integer conversions
-+(define_code_iterator FIXUORS [fix unsigned_fix])
-+
-+;; Binary operators whose second operand can be shifted.
-+(define_code_iterator shiftable_ops [plus minus ior xor and])
-+
-+;; plus and minus are the only shiftable_ops for which Thumb2 allows
-+;; a stack pointer opoerand. The minus operation is a candidate for an rsub
-+;; and hence only plus is supported.
-+(define_code_attr t2_binop0
-+ [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
-+
-+;; The instruction to use when a shiftable_ops has a shift operation as
-+;; its first operand.
-+(define_code_attr arith_shift_insn
-+ [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
-+
- ;;----------------------------------------------------------------------------
- ;; Int iterators
- ;;----------------------------------------------------------------------------
-@@ -198,9 +218,13 @@
- (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
- UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
-
-+(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
-+
- (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
- UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
-
-+(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
-+
- (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
- UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
-
-@@ -502,6 +526,13 @@
- ;; Assembler mnemonics for signedness of widening operations.
- (define_code_attr US [(sign_extend "s") (zero_extend "u")])
-
-+;; Signedness suffix for float->fixed conversions. Empty for signed
-+;; conversion.
-+(define_code_attr su_optab [(fix "") (unsigned_fix "u")])
-+
-+;; Sign prefix to use in instruction type suffixes, i.e. s32, u32.
-+(define_code_attr su [(fix "s") (unsigned_fix "u")])
-+
- ;; Right shifts
- (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
- (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
---- a/src/gcc/config/arm/arm.md
-+++ b/src/gcc/config/arm/arm.md
-@@ -205,17 +205,9 @@
- (const_string "yes")]
- (const_string "no")))
-
--; Allows an insn to disable certain alternatives for reasons other than
--; arch support.
--(define_attr "insn_enabled" "no,yes"
-- (const_string "yes"))
--
- ; Enable all alternatives that are both arch_enabled and insn_enabled.
- (define_attr "enabled" "no,yes"
-- (cond [(eq_attr "insn_enabled" "no")
-- (const_string "no")
--
-- (and (eq_attr "predicable_short_it" "no")
-+ (cond [(and (eq_attr "predicable_short_it" "no")
- (and (eq_attr "predicated" "yes")
- (match_test "arm_restrict_it")))
- (const_string "no")
-@@ -2868,6 +2860,28 @@
- (set_attr "type" "multiple")]
- )
-
-+(define_insn_and_split "*anddi_notdi_zesidi"
-+ [(set (match_operand:DI 0 "s_register_operand" "=r")
-+ (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
-+ (zero_extend:DI
-+ (match_operand:SI 1 "s_register_operand" "r"))))]
-+ "TARGET_32BIT"
-+ "#"
-+ "TARGET_32BIT && reload_completed"
-+ [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
-+ (set (match_dup 3) (const_int 0))]
-+ "
-+ {
-+ operands[3] = gen_highpart (SImode, operands[0]);
-+ operands[0] = gen_lowpart (SImode, operands[0]);
-+ operands[2] = gen_lowpart (SImode, operands[2]);
-+ }"
-+ [(set_attr "length" "8")
-+ (set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "type" "multiple")]
-+)
-+
- (define_insn_and_split "*anddi_notsesidi_di"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- (and:DI (not:DI (sign_extend:DI
-@@ -8906,7 +8920,7 @@
- return \"\";
- }"
- [(set_attr "conds" "use")
-- (set_attr "type" "f_sel<vfp_type>")]
-+ (set_attr "type" "fcsel")]
- )
-
- (define_insn_and_split "*movsicc_insn"
-@@ -9343,8 +9357,10 @@
- "TARGET_32BIT"
- "
- {
-- if (!REG_P (XEXP (operands[0], 0))
-- && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
-+ if ((!REG_P (XEXP (operands[0], 0))
-+ && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)
-+ || (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
-+ && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[0], 0)))))
- XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
-
- if (operands[2] == NULL_RTX)
-@@ -9361,8 +9377,10 @@
- "TARGET_32BIT"
- "
- {
-- if (!REG_P (XEXP (operands[1], 0)) &&
-- (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
-+ if ((!REG_P (XEXP (operands[1], 0))
-+ && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)
-+ || (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-+ && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[1], 0)))))
- XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
-
- if (operands[3] == NULL_RTX)
-@@ -9848,39 +9866,35 @@
-
- ;; Patterns to allow combination of arithmetic, cond code and shifts
-
--(define_insn "*arith_shiftsi"
-- [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
-- (match_operator:SI 1 "shiftable_operator"
-- [(match_operator:SI 3 "shift_operator"
-- [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
-- (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
-- (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
-+(define_insn "*<arith_shift_insn>_multsi"
-+ [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-+ (shiftable_ops:SI
-+ (mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
-+ (match_operand:SI 3 "power_of_two_operand" ""))
-+ (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
- "TARGET_32BIT"
-- "%i1%?\\t%0, %2, %4%S3"
-+ "<arith_shift_insn>%?\\t%0, %1, %2, lsl %b3"
- [(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "no")
- (set_attr "shift" "4")
-- (set_attr "arch" "a,t2,t2,a")
-- ;; Thumb2 doesn't allow the stack pointer to be used for
-- ;; operand1 for all operations other than add and sub. In this case
-- ;; the minus operation is a candidate for an rsub and hence needs
-- ;; to be disabled.
-- ;; We have to make sure to disable the fourth alternative if
-- ;; the shift_operator is MULT, since otherwise the insn will
-- ;; also match a multiply_accumulate pattern and validate_change
-- ;; will allow a replacement of the constant with a register
-- ;; despite the checks done in shift_operator.
-- (set_attr_alternative "insn_enabled"
-- [(const_string "yes")
-- (if_then_else
-- (match_operand:SI 1 "add_operator" "")
-- (const_string "yes") (const_string "no"))
-- (const_string "yes")
-- (if_then_else
-- (match_operand:SI 3 "mult_operator" "")
-- (const_string "no") (const_string "yes"))])
-- (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")])
-+ (set_attr "arch" "a,t2")
-+ (set_attr "type" "alu_shift_imm")])
-
-+(define_insn "*<arith_shift_insn>_shiftsi"
-+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
-+ (shiftable_ops:SI
-+ (match_operator:SI 2 "shift_nomul_operator"
-+ [(match_operand:SI 3 "s_register_operand" "r,r,r")
-+ (match_operand:SI 4 "shift_amount_operand" "M,M,r")])
-+ (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>,rk")))]
-+ "TARGET_32BIT && GET_CODE (operands[3]) != MULT"
-+ "<arith_shift_insn>%?\\t%0, %1, %3%S2"
-+ [(set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
-+ (set_attr "shift" "4")
-+ (set_attr "arch" "a,t2,a")
-+ (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")])
-+
- (define_split
- [(set (match_operand:SI 0 "s_register_operand" "")
- (match_operator:SI 1 "shiftable_operator"
-@@ -12169,7 +12183,7 @@
- int num_regs = XVECLEN (operands[0], 0);
- char pattern[100];
- rtx op_list[2];
-- strcpy (pattern, \"fldmfdd\\t\");
-+ strcpy (pattern, \"vldm\\t\");
- strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
- strcat (pattern, \"!, {\");
- op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
-@@ -12373,6 +12387,7 @@
- "TARGET_32BIT && arm_arch5"
- "clz%?\\t%0, %1"
- [(set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
- (set_attr "type" "clz")])
-
- (define_insn "rbitsi2"
-@@ -12381,6 +12396,7 @@
- "TARGET_32BIT && arm_arch_thumb2"
- "rbit%?\\t%0, %1"
- [(set_attr "predicable" "yes")
-+ (set_attr "predicable_short_it" "no")
- (set_attr "type" "clz")])
-
- (define_expand "ctzsi2"
-@@ -12556,6 +12572,8 @@
- rev%?\t%0, %1"
- [(set_attr "arch" "t1,t2,32")
- (set_attr "length" "2,2,4")
-+ (set_attr "predicable" "no,yes,yes")
-+ (set_attr "predicable_short_it" "no")
- (set_attr "type" "rev")]
- )
-
-@@ -12673,6 +12691,44 @@
- (set_attr "type" "rev")]
- )
-
-+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
-+;; operations within an IOR/AND RTX, therefore we have two patterns matching
-+;; each valid permutation.
-+
-+(define_insn "arm_rev16si2"
-+ [(set (match_operand:SI 0 "register_operand" "=l,l,r")
-+ (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r")
-+ (const_int 8))
-+ (match_operand:SI 3 "const_int_operand" "n,n,n"))
-+ (and:SI (lshiftrt:SI (match_dup 1)
-+ (const_int 8))
-+ (match_operand:SI 2 "const_int_operand" "n,n,n"))))]
-+ "arm_arch6
-+ && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
-+ && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
-+ "rev16\\t%0, %1"
-+ [(set_attr "arch" "t1,t2,32")
-+ (set_attr "length" "2,2,4")
-+ (set_attr "type" "rev")]
-+)
-+
-+(define_insn "arm_rev16si2_alt"
-+ [(set (match_operand:SI 0 "register_operand" "=l,l,r")
-+ (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r")
-+ (const_int 8))
-+ (match_operand:SI 2 "const_int_operand" "n,n,n"))
-+ (and:SI (ashift:SI (match_dup 1)
-+ (const_int 8))
-+ (match_operand:SI 3 "const_int_operand" "n,n,n"))))]
-+ "arm_arch6
-+ && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
-+ && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
-+ "rev16\\t%0, %1"
-+ [(set_attr "arch" "t1,t2,32")
-+ (set_attr "length" "2,2,4")
-+ (set_attr "type" "rev")]
-+)
-+
- (define_expand "bswaphi2"
- [(set (match_operand:HI 0 "s_register_operand" "=r")
- (bswap:HI (match_operand:HI 1 "s_register_operand" "r")))]
---- a/src/gcc/config/arm/cortex-a5.md
-+++ b/src/gcc/config/arm/cortex-a5.md
-@@ -61,7 +61,7 @@
- (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,rev,\
-+ adr,bfm,clz,rbit,rev,\
- shift_imm,shift_reg,\
- mov_imm,mov_reg,mvn_imm,mvn_reg,\
- mrs,multiple,no_insn"))
---- a/src/gcc/config/arm/cortex-a9.md
-+++ b/src/gcc/config/arm/cortex-a9.md
-@@ -83,7 +83,7 @@
- (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
- alu_reg,alus_reg,logic_reg,logics_reg,\
- adc_imm,adcs_imm,adc_reg,adcs_reg,\
-- adr,bfm,rev,\
-+ adr,bfm,clz,rbit,rev,\
- shift_imm,shift_reg,\
- mov_imm,mov_reg,mvn_imm,mvn_reg,\
- mov_shift_reg,mov_shift,\
---- a/src/gcc/config/mips/mips.c
-+++ b/src/gcc/config/mips/mips.c
-@@ -7197,12 +7197,17 @@
- emit_insn (gen_slt_sf (dest, fp2, fp1));
- }
-
--/* Implement MOVE_BY_PIECES_P. */
-+/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */
-
- bool
--mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
-+mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+ unsigned int align,
-+ enum by_pieces_operation op,
-+ bool speed_p)
- {
-- if (HAVE_movmemsi)
-+ if (op == STORE_BY_PIECES)
-+ return mips_store_by_pieces_p (size, align);
-+ if (op == MOVE_BY_PIECES && HAVE_movmemsi)
- {
- /* movmemsi is meant to generate code that is at least as good as
- move_by_pieces. However, movmemsi effectively uses a by-pieces
-@@ -7219,13 +7224,12 @@
- return size < UNITS_PER_WORD;
- return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
- }
-- /* The default value. If this becomes a target hook, we should
-- call the default definition instead. */
-- return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
-- < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
-+
-+ return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
- }
-
--/* Implement STORE_BY_PIECES_P. */
-+/* Implement a handler for STORE_BY_PIECES operations
-+ for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P. */
-
- bool
- mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
-@@ -19134,6 +19138,10 @@
- #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
- #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv
-
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+ mips_use_by_pieces_infrastructure_p
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
-
- #include "gt-mips.h"
---- a/src/gcc/config/mips/mips.h
-+++ b/src/gcc/config/mips/mips.h
-@@ -2867,9 +2867,6 @@
- ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
- : MIPS_CALL_RATIO / 2)
-
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-- mips_move_by_pieces_p (SIZE, ALIGN)
--
- /* For CLEAR_RATIO, when optimizing for size, give a better estimate
- of the length of a memset call, but use the default otherwise. */
-
-@@ -2882,9 +2879,6 @@
-
- #define SET_RATIO(speed) \
- ((speed) ? 15 : MIPS_CALL_RATIO - 2)
--
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
-- mips_store_by_pieces_p (SIZE, ALIGN)
-
- /* Since the bits of the _init and _fini function is spread across
- many object files, each potentially with its own GP, we must assume
---- a/src/gcc/params.def
-+++ b/src/gcc/params.def
-@@ -303,7 +303,7 @@
- DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
- "max-completely-peeled-insns",
- "The maximum number of insns of a completely peeled loop",
-- 100, 0, 0)
-+ 200, 0, 0)
- /* The maximum number of peelings of a single loop that is peeled completely. */
- DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
- "max-completely-peel-times",
-@@ -1095,6 +1095,21 @@
- "Maximum number of nested calls to search for control dependencies "
- "during uninitialized variable analysis",
- 1000, 1, 0)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
-+ "max-fsm-thread-path-insns",
-+ "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path",
-+ 100, 1, 999999)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH,
-+ "max-fsm-thread-length",
-+ "Maximum number of basic blocks on a finite state automaton jump thread path",
-+ 10, 1, 999999)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS,
-+ "max-fsm-thread-paths",
-+ "Maximum number of new jump thread paths to create for a finite state automaton",
-+ 50, 1, 999999)
- /*
-
- Local variables:
---- a/src/gcc/tree-ssa-threadedge.c
-+++ b/src/gcc/tree-ssa-threadedge.c
-@@ -617,6 +617,7 @@
- rather than use a relational operator. These are simpler to handle. */
- if (TREE_CODE (cond) == SSA_NAME)
- {
-+ tree original_lhs = cond;
- cached_lhs = cond;
-
- /* Get the variable's current value from the equivalence chains.
-@@ -638,6 +639,12 @@
- pass specific callback to try and simplify it further. */
- if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
- cached_lhs = (*simplify) (stmt, stmt);
-+
-+ /* We couldn't find an invariant. But, callers of this
-+ function may be able to do something useful with the
-+ unmodified destination. */
-+ if (!cached_lhs)
-+ cached_lhs = original_lhs;
- }
- else
- cached_lhs = NULL;
-@@ -897,6 +904,248 @@
- return false;
- }
-
-+/* Return true if the CFG contains at least one path from START_BB to END_BB.
-+ When a path is found, record in PATH the blocks from END_BB to START_BB.
-+ VISITED_BBS is used to make sure we don't fall into an infinite loop. Bound
-+ the recursion to basic blocks belonging to LOOP. */
-+
-+static bool
-+fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
-+ vec<basic_block, va_gc> *&path,
-+ pointer_set_t *visited_bbs, loop_p loop)
-+{
-+ if (loop != start_bb->loop_father)
-+ return false;
-+
-+ if (start_bb == end_bb)
-+ {
-+ vec_safe_push (path, start_bb);
-+ return true;
-+ }
-+
-+ if (!pointer_set_insert (visited_bbs, start_bb))
-+ {
-+ edge e;
-+ edge_iterator ei;
-+ FOR_EACH_EDGE (e, ei, start_bb->succs)
-+ if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
-+ {
-+ vec_safe_push (path, start_bb);
-+ return true;
-+ }
-+ }
-+
-+ return false;
-+}
-+
-+static int max_threaded_paths;
-+
-+/* We trace the value of the variable EXPR back through any phi nodes looking
-+ for places where it gets a constant value and save the path. Stop after
-+ having recorded MAX_PATHS jump threading paths. */
-+
-+static void
-+fsm_find_control_statement_thread_paths (tree expr,
-+ pointer_set_t *visited_phis,
-+ vec<basic_block, va_gc> *&path)
-+{
-+ tree var = SSA_NAME_VAR (expr);
-+ gimple def_stmt = SSA_NAME_DEF_STMT (expr);
-+ basic_block var_bb = gimple_bb (def_stmt);
-+
-+ if (var == NULL || var_bb == NULL)
-+ return;
-+
-+ /* For the moment we assume that an SSA chain only contains phi nodes, and
-+ eventually one of the phi arguments will be an integer constant. In the
-+ future, this could be extended to also handle simple assignments of
-+ arithmetic operations. */
-+ if (gimple_code (def_stmt) != GIMPLE_PHI)
-+ return;
-+
-+ /* Avoid infinite recursion. */
-+ if (pointer_set_insert (visited_phis, def_stmt))
-+ return;
-+
-+ int next_path_length = 0;
-+ basic_block last_bb_in_path = path->last ();
-+
-+ /* Following the chain of SSA_NAME definitions, we jumped from a definition in
-+ LAST_BB_IN_PATH to a definition in VAR_BB. When these basic blocks are
-+ different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB. */
-+ if (var_bb != last_bb_in_path)
-+ {
-+ edge e;
-+ int e_count = 0;
-+ edge_iterator ei;
-+ vec<basic_block, va_gc> *next_path;
-+ vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
-+
-+ FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
-+ {
-+ pointer_set_t *visited_bbs = pointer_set_create ();
-+
-+ if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs,
-+ e->src->loop_father))
-+ ++e_count;
-+
-+ pointer_set_destroy (visited_bbs);
-+
-+ /* If there is more than one path, stop. */
-+ if (e_count > 1)
-+ {
-+ vec_free (next_path);
-+ return;
-+ }
-+ }
-+
-+ /* Stop if we have not found a path: this could occur when the recursion
-+ is stopped by one of the bounds. */
-+ if (e_count == 0)
-+ {
-+ vec_free (next_path);
-+ return;
-+ }
-+
-+ /* Append all the nodes from NEXT_PATH to PATH. */
-+ vec_safe_splice (path, next_path);
-+ next_path_length = next_path->length ();
-+ vec_free (next_path);
-+ }
-+
-+ gcc_assert (path->last () == var_bb);
-+
-+ /* Iterate over the arguments of PHI. */
-+ unsigned int i;
-+ for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
-+ {
-+ tree arg = gimple_phi_arg_def (def_stmt, i);
-+ basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src;
-+
-+ /* Skip edges pointing outside the current loop. */
-+ if (!arg || var_bb->loop_father != bbi->loop_father)
-+ continue;
-+
-+ if (TREE_CODE (arg) == SSA_NAME)
-+ {
-+ vec_safe_push (path, bbi);
-+ /* Recursively follow SSA_NAMEs looking for a constant definition. */
-+ fsm_find_control_statement_thread_paths (arg, visited_phis, path);
-+ path->pop ();
-+ continue;
-+ }
-+
-+ if (TREE_CODE (arg) != INTEGER_CST)
-+ continue;
-+
-+ int path_length = path->length ();
-+ /* A path with less than 2 basic blocks should not be jump-threaded. */
-+ if (path_length < 2)
-+ continue;
-+
-+ if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "FSM jump-thread path not considered: "
-+ "the number of basic blocks on the path "
-+ "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
-+ continue;
-+ }
-+
-+ if (max_threaded_paths <= 0)
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "FSM jump-thread path not considered: "
-+ "the number of previously recorded FSM paths to thread "
-+ "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
-+ continue;
-+ }
-+
-+ /* Add BBI to the path. */
-+ vec_safe_push (path, bbi);
-+ ++path_length;
-+
-+ int n_insns = 0;
-+ gimple_stmt_iterator gsi;
-+ int j;
-+ loop_p loop = (*path)[0]->loop_father;
-+ bool path_crosses_loops = false;
-+
-+ /* Count the number of instructions on the path: as these instructions
-+ will have to be duplicated, we will not record the path if there are
-+ too many instructions on the path. Also check that all the blocks in
-+ the path belong to a single loop. */
-+ for (j = 1; j < path_length - 1; j++)
-+ {
-+ basic_block bb = (*path)[j];
-+
-+ if (bb->loop_father != loop)
-+ {
-+ path_crosses_loops = true;
-+ break;
-+ }
-+
-+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-+ {
-+ gimple stmt = gsi_stmt (gsi);
-+ /* Do not count empty statements and labels. */
-+ if (gimple_code (stmt) != GIMPLE_NOP
-+ && gimple_code (stmt) != GIMPLE_LABEL
-+ && !is_gimple_debug (stmt))
-+ ++n_insns;
-+ }
-+ }
-+
-+ if (path_crosses_loops)
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "FSM jump-thread path not considered: "
-+ "the path crosses loops.\n");
-+ path->pop ();
-+ continue;
-+ }
-+
-+ if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "FSM jump-thread path not considered: "
-+ "the number of instructions on the path "
-+ "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
-+ path->pop ();
-+ continue;
-+ }
-+
-+ vec<jump_thread_edge *> *jump_thread_path
-+ = new vec<jump_thread_edge *> ();
-+
-+ /* Record the edges between the blocks in PATH. */
-+ for (j = 0; j < path_length - 1; j++)
-+ {
-+ edge e = find_edge ((*path)[path_length - j - 1],
-+ (*path)[path_length - j - 2]);
-+ gcc_assert (e);
-+ jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
-+ jump_thread_path->safe_push (x);
-+ }
-+
-+ /* Add the edge taken when the control variable has value ARG. */
-+ edge taken_edge = find_taken_edge ((*path)[0], arg);
-+ jump_thread_edge *x
-+ = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
-+ jump_thread_path->safe_push (x);
-+
-+ register_jump_thread (jump_thread_path);
-+ --max_threaded_paths;
-+
-+ /* Remove BBI from the path. */
-+ path->pop ();
-+ }
-+
-+ /* Remove all the nodes that we added from NEXT_PATH. */
-+ if (next_path_length)
-+ vec_safe_truncate (path, (path->length () - next_path_length));
-+}
-+
- /* We are exiting E->src, see if E->dest ends with a conditional
- jump which has a known value when reached via E.
-
-@@ -982,7 +1231,10 @@
- cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
- handle_dominating_asserts);
-
-- if (cond && is_gimple_min_invariant (cond))
-+ if (!cond)
-+ return 0;
-+
-+ if (is_gimple_min_invariant (cond))
- {
- edge taken_edge = find_taken_edge (e->dest, cond);
- basic_block dest = (taken_edge ? taken_edge->dest : NULL);
-@@ -1028,6 +1280,27 @@
- backedge_seen_p);
- return 1;
- }
-+
-+ if (!flag_expensive_optimizations
-+ || optimize_function_for_size_p (cfun)
-+ || TREE_CODE (cond) != SSA_NAME
-+ || e->dest->loop_father != e->src->loop_father
-+ || loop_depth (e->dest->loop_father) == 0)
-+ return 0;
-+
-+ /* When COND cannot be simplified, try to find paths from a control
-+ statement back through the PHI nodes which would affect that control
-+ statement. */
-+ vec<basic_block, va_gc> *bb_path;
-+ vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
-+ vec_safe_push (bb_path, e->dest);
-+ pointer_set_t *visited_phis = pointer_set_create ();
-+
-+ max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
-+ fsm_find_control_statement_thread_paths (cond, visited_phis, bb_path);
-+
-+ pointer_set_destroy (visited_phis);
-+ vec_free (bb_path);
- }
- return 0;
- }
---- a/src/gcc/convert.c
-+++ b/src/gcc/convert.c
-@@ -471,8 +471,8 @@
- break;
-
- CASE_FLT_FN (BUILT_IN_ROUND):
-- /* Only convert in ISO C99 mode. */
-- if (!targetm.libc_has_function (function_c99_misc))
-+ /* Only convert in ISO C99 mode and with -fno-math-errno. */
-+ if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
- break;
- if (outprec < TYPE_PRECISION (integer_type_node)
- || (outprec == TYPE_PRECISION (integer_type_node)
-@@ -492,8 +492,8 @@
- break;
- /* ... Fall through ... */
- CASE_FLT_FN (BUILT_IN_RINT):
-- /* Only convert in ISO C99 mode. */
-- if (!targetm.libc_has_function (function_c99_misc))
-+ /* Only convert in ISO C99 mode and with -fno-math-errno. */
-+ if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
- break;
- if (outprec < TYPE_PRECISION (integer_type_node)
- || (outprec == TYPE_PRECISION (integer_type_node)
---- a/src/libobjc/ChangeLog.linaro
-+++ b/src/libobjc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libvtv/ChangeLog.linaro
-+++ b/src/libvtv/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libgfortran/configure
-+++ b/src/libgfortran/configure
-@@ -25941,7 +25941,7 @@
- # test is copied from libgomp, and modified to not link in -lrt as
- # libgfortran calls clock_gettime via a weak reference if it's found
- # in librt.
--if test $ac_cv_func_clock_gettime = no; then
-+if test "$ac_cv_func_clock_gettime" = no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5
- $as_echo_n "checking for clock_gettime in -lrt... " >&6; }
- if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then :
---- a/src/libgfortran/configure.ac
-+++ b/src/libgfortran/configure.ac
-@@ -511,7 +511,7 @@
- # test is copied from libgomp, and modified to not link in -lrt as
- # libgfortran calls clock_gettime via a weak reference if it's found
- # in librt.
--if test $ac_cv_func_clock_gettime = no; then
-+if test "$ac_cv_func_clock_gettime" = no; then
- AC_CHECK_LIB(rt, clock_gettime,
- [AC_DEFINE(HAVE_CLOCK_GETTIME_LIBRT, 1,
- [Define to 1 if you have the `clock_gettime' function in librt.])])
---- a/src/libgfortran/ChangeLog.linaro
-+++ b/src/libgfortran/ChangeLog.linaro
-@@ -0,0 +1,59 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ Backport from trunk r209747.
-+ 2014-04-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
-+
-+ * configure.ac: Quote usage of ac_cv_func_clock_gettime in if test.
-+ * configure: Regenerate.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libada/ChangeLog.linaro
-+++ b/src/libada/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libffi/ChangeLog.linaro
-+++ b/src/libffi/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libssp/ChangeLog.linaro
-+++ b/src/libssp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libcilkrts/ChangeLog.linaro
-+++ b/src/libcilkrts/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libcpp/ChangeLog.linaro
-+++ b/src/libcpp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/libcpp/po/ChangeLog.linaro
-+++ b/src/libcpp/po/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.
---- a/src/fixincludes/ChangeLog.linaro
-+++ b/src/fixincludes/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22 Yvan Roux <yvan.roux@linaro.org>
-+
-+ GCC Linaro 4.9-2014.04 released.