1 files changed, 3 insertions, 48931 deletions
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff
index e7d7c84..b363b1f 100644
--- a/debian/patches/gcc-linaro.diff
+++ b/debian/patches/gcc-linaro.diff
@@ -1,48934 +1,6 @@
-# DP: Changes for the Linaro 4.9-2015.01 release.
+# DP: Changes for the Linaro 5-2015.xx release.
 
-LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@219502 \
-    svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@219643 \
+LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-5-branch@219502 \
+    svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-5-branch@219643 \
  | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/
 
---- a/src/libitm/ChangeLog.linaro
-+++ b/src/libitm/ChangeLog.linaro
-@@ -0,0 +1,68 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213035.
-+	2014-07-24  Richard Henderson  <rth@redhat.com>
-+
-+	* config/aarch64/sjlj.S (_ITM_beginTransaction): Use post-inc
-+	addressing mode in epilogue.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210615.
-+	2014-05-19  Richard Henderson  <rth@redhat.com>
-+
-+	* config/aarch64/sjlj.S: New file.
-+	* config/aarch64/target.h: New file.
-+	* configure.tgt: Enable aarch64.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libgomp/ChangeLog.linaro
-+++ b/src/libgomp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libquadmath/ChangeLog.linaro
-+++ b/src/libquadmath/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libsanitizer/ChangeLog.linaro
-+++ b/src/libsanitizer/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/zlib/ChangeLog.linaro
-+++ b/src/zlib/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libstdc++-v3/ChangeLog.linaro
-+++ b/src/libstdc++-v3/ChangeLog.linaro
-@@ -0,0 +1,70 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216444.
-+	2014-10-19  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
-+
-+	* testsuite/lib/libstdc++.exp (v3-copy-file): New proc split from ...
-+	(v3-copy-files): ... this.  Update.
-+	(check_v3_target_fileio): Fix race on cin_unget-1.txt file.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215101.
-+	2014-09-10  Tony Wang  <tony.wang@arm.com>
-+
-+	PR target/56846
-+	* libsupc++/eh_personality.cc (PERSONALITY_FUNCTION):
-+	Return with CONTINUE_UNWINDING when the state pattern
-+	contains: _US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libstdc++-v3/testsuite/lib/libstdc++.exp
-+++ b/src/libstdc++-v3/testsuite/lib/libstdc++.exp
-@@ -63,19 +63,24 @@
-     verbose "++ $var is $val" $n
- }
- 
-+# Copy file to the target.
-+proc v3-copy-file {src dst} {
-+    if { [catch { set symlink [file readlink $src] } x] } then {
-+	remote_download target $src $dst
-+    } else {
-+	if { [regexp "^/" "$symlink"] } then {
-+	    remote_download target $symlink $dst
-+	} else {
-+	    set dirname [file dirname $f]
-+	    remote_download target $dirname/$symlink $dst
-+	}
-+    }
-+}
-+
- # Called by v3-init below.  "Static" to this file.
- proc v3-copy-files {srcfiles} {
-     foreach f $srcfiles {
--        if { [catch { set symlink [file readlink $f] } x] } then {
--	    remote_download target $f
--        } else {
--            if { [regexp "^/" "$symlink"] } then {
--		remote_download target $symlink
--            } else {
--                set dirname [file dirname $f]
--		remote_download target $dirname/$symlink
--            }
--        }
-+	v3-copy-file $f [file tail $f]
-     }
- }
- 
-@@ -681,8 +686,8 @@
- 	# the file functions
- 	set src fileio[pid].cc
- 	set exe fileio[pid].x
--	set testfile "cin_unget-1.txt"
--	v3-copy-files "$srcdir/data/$testfile"
-+	set testfile "cin_unget-1.[pid].txt"
-+	v3-copy-file "$srcdir/data/cin_unget-1.txt" "$testfile"
- 
- 	set f [open $src "w"]
- 	puts $f "#include <sys/types.h>"
---- a/src/configure.ac
-+++ b/src/configure.ac
-@@ -331,7 +331,8 @@
-     if test "$is_elf" = "yes"; then
-       # Check for target supported by gold.
-       case "${target}" in
--        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
-+        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-+        | aarch64*-*-* | tilegx*-*-*)
- 	  configdirs="$configdirs gold"
- 	  if test x${ENABLE_GOLD} = xdefault; then
- 	    default_ld=gold
---- a/src/intl/ChangeLog.linaro
-+++ b/src/intl/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/ChangeLog.linaro
-+++ b/src/ChangeLog.linaro
-@@ -0,0 +1,59 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215865.
-+	2014-10-03  Jing Yu  <jingyu@google.com>
-+
-+	* configure.ac: Add aarch64 to list of targets that support gold.
-+	* configure: Regenerate.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/boehm-gc/ChangeLog.linaro
-+++ b/src/boehm-gc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/include/ChangeLog.linaro
-+++ b/src/include/ChangeLog.linaro
-@@ -0,0 +1,58 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209649.
-+	2014-04-22  Yufeng Zhang  <yufeng.zhang@arm.com>
-+
-+	* longlong.h: Merge from glibc.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/include/longlong.h
-+++ b/src/include/longlong.h
-@@ -1,5 +1,5 @@
- /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
--   Copyright (C) 1991-2013 Free Software Foundation, Inc.
-+   Copyright (C) 1991-2014 Free Software Foundation, Inc.
- 
-    This file is part of the GNU C Library.
- 
-@@ -122,6 +122,22 @@
- #define __AND_CLOBBER_CC , "cc"
- #endif /* __GNUC__ < 2 */
- 
-+#if defined (__aarch64__)
-+
-+#if W_TYPE_SIZE == 32
-+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
-+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
-+#define COUNT_LEADING_ZEROS_0 32
-+#endif /* W_TYPE_SIZE == 32 */
-+
-+#if W_TYPE_SIZE == 64
-+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clzll (X))
-+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
-+#define COUNT_LEADING_ZEROS_0 64
-+#endif /* W_TYPE_SIZE == 64 */
-+
-+#endif /* __aarch64__ */
-+
- #if defined (__alpha) && W_TYPE_SIZE == 64
- #define umul_ppmm(ph, pl, m0, m1) \
-   do {									\
---- a/src/libiberty/ChangeLog.linaro
-+++ b/src/libiberty/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/lto-plugin/ChangeLog.linaro
-+++ b/src/lto-plugin/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/regression/ChangeLog.linaro
-+++ b/src/contrib/regression/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/ChangeLog.linaro
-+++ b/src/contrib/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/contrib/reghunt/ChangeLog.linaro
-+++ b/src/contrib/reghunt/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libatomic/ChangeLog.linaro
-+++ b/src/libatomic/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/config/ChangeLog.linaro
-+++ b/src/config/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libbacktrace/ChangeLog.linaro
-+++ b/src/libbacktrace/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/libltdl/ChangeLog.linaro
-+++ b/src/libjava/libltdl/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/ChangeLog.linaro
-+++ b/src/libjava/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libjava/classpath/ChangeLog.linaro
-+++ b/src/libjava/classpath/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gnattools/ChangeLog.linaro
-+++ b/src/gnattools/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/maintainer-scripts/ChangeLog.linaro
-+++ b/src/maintainer-scripts/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/configure
-+++ b/src/configure
-@@ -2971,7 +2971,8 @@
-     if test "$is_elf" = "yes"; then
-       # Check for target supported by gold.
-       case "${target}" in
--        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* | tilegx*-*-*)
-+        i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-+        | aarch64*-*-* | tilegx*-*-*)
- 	  configdirs="$configdirs gold"
- 	  if test x${ENABLE_GOLD} = xdefault; then
- 	    default_ld=gold
---- a/src/libgcc/config.host
-+++ b/src/libgcc/config.host
-@@ -316,13 +316,15 @@
- case ${host} in
- aarch64*-*-elf)
- 	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
-+	extra_parts="$extra_parts crtfastmath.o"
- 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
--	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
-+	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
- 	;;
- aarch64*-*-linux*)
-+	extra_parts="$extra_parts crtfastmath.o"
- 	md_unwind_header=aarch64/linux-unwind.h
- 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
--	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
-+	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
- 	;;
- alpha*-*-linux*)
- 	tmake_file="${tmake_file} alpha/t-alpha alpha/t-ieee t-crtfm alpha/t-linux"
---- a/src/libgcc/ChangeLog.linaro
-+++ b/src/libgcc/ChangeLog.linaro
-@@ -0,0 +1,69 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215013.
-+	2014-09-08  Joseph Myers  <joseph@codesourcery.com>
-+
-+	* fp-bit.c (pack_d, unpack_d): Remove LARGEST_EXPONENT_IS_NORMAL
-+	and ROUND_TOWARDS_ZERO conditionals.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215086.
-+	2014-09-09  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+	Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config.host (aarch64*): Include crtfastmath.o and
-+	t-crtfm.
-+	* config/aarch64/crtfastmath.c: New file.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libgcc/config/aarch64/crtfastmath.c
-+++ b/src/libgcc/config/aarch64/crtfastmath.c
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (C) 2014 Free Software Foundation, Inc.
-+ *
-+ * This file is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License as published by the
-+ * Free Software Foundation; either version 3, or (at your option) any
-+ * later version.
-+ *
-+ * This file is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License for more details.
-+ *
-+ * Under Section 7 of GPL version 3, you are granted additional
-+ * permissions described in the GCC Runtime Library Exception, version
-+ * 3.1, as published by the Free Software Foundation.
-+ *
-+ * You should have received a copy of the GNU General Public License and
-+ * a copy of the GCC Runtime Library Exception along with this program;
-+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-+ * <http://www.gnu.org/licenses/>.
-+ */
-+
-+#define _FPU_FPCR_FZ 0x1000000
-+
-+#define _FPU_SETCW(fpcr)				   \
-+  {							   \
-+    __asm__ __volatile__ ("msr	fpcr, %0" : : "r" (fpcr)); \
-+  }
-+
-+static void __attribute__((constructor))
-+set_fast_math (void)
-+{
-+  /* Flush to zero, round to nearest, IEEE exceptions disabled.  */
-+  _FPU_SETCW (_FPU_FPCR_FZ);
-+}
---- a/src/libgcc/config/arm/bpabi-v6m.S
-+++ b/src/libgcc/config/arm/bpabi-v6m.S
-@@ -148,7 +148,7 @@
- 	mov r0, sp
- 	push {r0, lr}
- 	ldr r0, [sp, #8]
--	bl SYM(__gnu_uldivmod_helper)
-+	bl SYM(__udivmoddi4)
- 	ldr r3, [sp, #4]
- 	mov lr, r3
- 	add sp, sp, #8
---- a/src/libgcc/config/arm/bpabi.c
-+++ b/src/libgcc/config/arm/bpabi.c
-@@ -26,9 +26,6 @@
- extern unsigned long long __udivdi3 (unsigned long long, 
- 				     unsigned long long);
- extern long long __gnu_ldivmod_helper (long long, long long, long long *);
--extern unsigned long long __gnu_uldivmod_helper (unsigned long long, 
--						 unsigned long long, 
--						 unsigned long long *);
- 
- 
- long long
-@@ -43,14 +40,3 @@
-   return quotient;
- }
- 
--unsigned long long
--__gnu_uldivmod_helper (unsigned long long a, 
--		       unsigned long long b,
--		       unsigned long long *remainder)
--{
--  unsigned long long quotient;
--
--  quotient = __udivdi3 (a, b);
--  *remainder = a - b * quotient;
--  return quotient;
--}
---- a/src/libgcc/config/arm/bpabi.S
-+++ b/src/libgcc/config/arm/bpabi.S
-@@ -22,6 +22,8 @@
-    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-    <http://www.gnu.org/licenses/>.  */
- 
-+	.cfi_sections .debug_frame
-+
- #ifdef __ARM_EABI__
- /* Some attributes that are common to all routines in this file.  */
- 	/* Tag_ABI_align_needed: This code does not require 8-byte
-@@ -120,49 +122,137 @@
- #endif
- .endm
- 
-+/* we can use STRD/LDRD on v5TE and later, and any Thumb-2 architecture. */
-+#if (defined(__ARM_EABI__)                                            \
-+     && (defined(__thumb2__)                                          \
-+         || (__ARM_ARCH >= 5 && defined(__TARGET_FEATURE_DSP))))
-+#define CAN_USE_LDRD 1
-+#else
-+#define CAN_USE_LDRD 0
-+#endif
-+
-+/* set up stack from for call to __udivmoddi4. At the end of the macro the
-+   stack is arranged as follows:
-+		sp+12	/ space for remainder
-+		sp+8	\ (written by __udivmoddi4)
-+		sp+4	lr
-+		sp+0	sp+8 [rp (remainder pointer) argument for __udivmoddi4]
-+
-+ */
-+.macro push_for_divide fname
-+#if defined(__thumb2__) && CAN_USE_LDRD
-+	sub	ip, sp, #8
-+	strd	ip, lr, [sp, #-16]!
-+#else
-+	sub	sp, sp, #8
-+	do_push	{sp, lr}
-+#endif
-+	.cfi_adjust_cfa_offset 16
-+	.cfi_offset 14, -12
-+.endm
-+
-+/* restore stack */
-+.macro pop_for_divide
-+	ldr	lr, [sp, #4]
-+#if CAN_USE_LDRD
-+	ldrd	r2, r3, [sp, #8]
-+	add	sp, sp, #16
-+#else
-+	add	sp, sp, #8
-+	do_pop	{r2, r3}
-+#endif
-+	.cfi_restore 14
-+	.cfi_adjust_cfa_offset 0
-+.endm
-+
- #ifdef L_aeabi_ldivmod
- 
-+/* Perform 64 bit signed division.
-+   Inputs:
-+	r0:r1	numerator
-+	r2:r3	denominator
-+   Outputs:
-+	r0:r1	quotient
-+	r2:r3	remainder
-+ */
- ARM_FUNC_START aeabi_ldivmod
--	cfi_start	__aeabi_ldivmod, LSYM(Lend_aeabi_ldivmod)
--	test_div_by_zero signed
-+	.cfi_startproc
-+	test_div_by_zero	signed
- 
--	sub sp, sp, #8
--#if defined(__thumb2__)
--	mov ip, sp
--	push {ip, lr}
--#else
--	do_push {sp, lr}
--#endif
--98:	cfi_push 98b - __aeabi_ldivmod, 0xe, -0xc, 0x10
--	bl SYM(__gnu_ldivmod_helper) __PLT__
--	ldr lr, [sp, #4]
--	add sp, sp, #8
--	do_pop {r2, r3}
-+	push_for_divide	__aeabi_ldivmod
-+	cmp	xxh, #0
-+	blt	1f
-+	cmp	yyh, #0
-+	blt	2f
-+	/* arguments in (r0:r1), (r2:r3) and *sp */
-+	bl	SYM(__udivmoddi4) __PLT__
-+	.cfi_remember_state
-+	pop_for_divide
- 	RET
--	cfi_end	LSYM(Lend_aeabi_ldivmod)
-+
-+1: /* xxh:xxl is negative */
-+	.cfi_restore_state
-+	negs	xxl, xxl
-+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	cmp	yyh, #0
-+	blt	3f
-+	/* arguments in (r0:r1), (r2:r3) and *sp */
-+	bl	SYM(__udivmoddi4) __PLT__
-+	.cfi_remember_state
-+	pop_for_divide
-+	negs	xxl, xxl
-+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	negs	yyl, yyl
-+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	RET
-+
-+2: /* only yyh:yyl is negative */
-+	.cfi_restore_state
-+	negs	yyl, yyl
-+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	/* arguments in (r0:r1), (r2:r3) and *sp */
-+	bl	SYM(__udivmoddi4) __PLT__
-+	.cfi_remember_state
-+	pop_for_divide
-+	negs	xxl, xxl
-+	sbc	xxh, xxh, xxh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	RET
-+
-+3: /* both xxh:xxl and yyh:yyl are negative */
-+	.cfi_restore_state
-+	negs	yyl, yyl
-+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	/* arguments in (r0:r1), (r2:r3) and *sp */
-+	bl	SYM(__udivmoddi4) __PLT__
-+	pop_for_divide
-+	negs	yyl, yyl
-+	sbc	yyh, yyh, yyh, lsl #1	/* Thumb-2 has no RSC, so use X - 2X */
-+	RET
-+
-+	.cfi_endproc
- 	
- #endif /* L_aeabi_ldivmod */
- 
- #ifdef L_aeabi_uldivmod
- 
-+/* Perform 64 bit signed division.
-+   Inputs:
-+	r0:r1	numerator
-+	r2:r3	denominator
-+   Outputs:
-+	r0:r1	quotient
-+	r2:r3	remainder
-+ */
- ARM_FUNC_START aeabi_uldivmod
--	cfi_start	__aeabi_uldivmod, LSYM(Lend_aeabi_uldivmod)
--	test_div_by_zero unsigned
-+	.cfi_startproc
-+	test_div_by_zero	unsigned
- 
--	sub sp, sp, #8
--#if defined(__thumb2__)
--	mov ip, sp
--	push {ip, lr}
--#else
--	do_push {sp, lr}
--#endif
--98:	cfi_push 98b - __aeabi_uldivmod, 0xe, -0xc, 0x10
--	bl SYM(__gnu_uldivmod_helper) __PLT__
--	ldr lr, [sp, #4]
--	add sp, sp, #8
--	do_pop {r2, r3}
-+	push_for_divide	__aeabi_uldivmod
-+	/* arguments in (r0:r1), (r2:r3) and *sp */
-+	bl	SYM(__udivmoddi4) __PLT__
-+	pop_for_divide
- 	RET
--	cfi_end	LSYM(Lend_aeabi_uldivmod)
-+	.cfi_endproc
- 
- #endif /* L_aeabi_divmod */
- 	
---- a/src/libgcc/config/libbid/ChangeLog.linaro
-+++ b/src/libgcc/config/libbid/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libgcc/fp-bit.c
-+++ b/src/libgcc/fp-bit.c
-@@ -202,17 +202,9 @@
-   int sign = src->sign;
-   int exp = 0;
- 
--  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
-+  if (isnan (src))
-     {
--      /* We can't represent these values accurately.  By using the
--	 largest possible magnitude, we guarantee that the conversion
--	 of infinity is at least as big as any finite number.  */
-       exp = EXPMAX;
--      fraction = ((fractype) 1 << FRACBITS) - 1;
--    }
--  else if (isnan (src))
--    {
--      exp = EXPMAX;
-       /* Restore the NaN's payload.  */
-       fraction >>= NGARDS;
-       fraction &= QUIET_NAN - 1;
-@@ -291,8 +283,7 @@
- 	  fraction >>= NGARDS;
- #endif /* NO_DENORMALS */
- 	}
--      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
--	       && __builtin_expect (src->normal_exp > EXPBIAS, 0))
-+      else if (__builtin_expect (src->normal_exp > EXPBIAS, 0))
- 	{
- 	  exp = EXPMAX;
- 	  fraction = 0;
-@@ -300,35 +291,25 @@
-       else
- 	{
- 	  exp = src->normal_exp + EXPBIAS;
--	  if (!ROUND_TOWARDS_ZERO)
-+	  /* IF the gard bits are the all zero, but the first, then we're
-+	     half way between two numbers, choose the one which makes the
-+	     lsb of the answer 0.  */
-+	  if ((fraction & GARDMASK) == GARDMSB)
- 	    {
--	      /* IF the gard bits are the all zero, but the first, then we're
--		 half way between two numbers, choose the one which makes the
--		 lsb of the answer 0.  */
--	      if ((fraction & GARDMASK) == GARDMSB)
--		{
--		  if (fraction & (1 << NGARDS))
--		    fraction += GARDROUND + 1;
--		}
--	      else
--		{
--		  /* Add a one to the guards to round up */
--		  fraction += GARDROUND;
--		}
--	      if (fraction >= IMPLICIT_2)
--		{
--		  fraction >>= 1;
--		  exp += 1;
--		}
-+	      if (fraction & (1 << NGARDS))
-+		fraction += GARDROUND + 1;
- 	    }
--	  fraction >>= NGARDS;
--
--	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
-+	  else
- 	    {
--	      /* Saturate on overflow.  */
--	      exp = EXPMAX;
--	      fraction = ((fractype) 1 << FRACBITS) - 1;
-+	      /* Add a one to the guards to round up */
-+	      fraction += GARDROUND;
- 	    }
-+	  if (fraction >= IMPLICIT_2)
-+	    {
-+	      fraction >>= 1;
-+	      exp += 1;
-+	    }
-+	  fraction >>= NGARDS;
- 	}
-     }
- 
-@@ -556,8 +537,7 @@
- 	  dst->fraction.ll = fraction;
- 	}
-     }
--  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
--	   && __builtin_expect (exp == EXPMAX, 0))
-+  else if (__builtin_expect (exp == EXPMAX, 0))
-     {
-       /* Huge exponent*/
-       if (fraction == 0)
-@@ -915,7 +895,7 @@
-       low <<= 1;
-     }
- 
--  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
-+  if ((high & GARDMASK) == GARDMSB)
-     {
-       if (high & (1 << NGARDS))
- 	{
-@@ -1035,7 +1015,7 @@
- 	numerator *= 2;
-       }
- 
--    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
-+    if ((quotient & GARDMASK) == GARDMSB)
-       {
- 	if (quotient & (1 << NGARDS))
- 	  {
---- a/src/libdecnumber/ChangeLog.linaro
-+++ b/src/libdecnumber/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/LINARO-VERSION
-+++ b/src/gcc/LINARO-VERSION
-@@ -0,0 +1 @@
-+4.9-2015.01
---- a/src/gcc/ira-conflicts.c
-+++ b/src/gcc/ira-conflicts.c
-@@ -774,6 +774,27 @@
- 				temp_hard_reg_set);
- 	    }
- 
-+	  /* Now we deal with paradoxical subreg cases where certain registers
-+	     cannot be accessed in the widest mode.  */
-+	  enum machine_mode outer_mode = ALLOCNO_WMODE (a);
-+	  enum machine_mode inner_mode = ALLOCNO_MODE (a);
-+	  if (GET_MODE_SIZE (outer_mode) > GET_MODE_SIZE (inner_mode))
-+	    {
-+	      enum reg_class aclass = ALLOCNO_CLASS (a);
-+	      for (int j = ira_class_hard_regs_num[aclass] - 1; j >= 0; --j)
-+		{
-+		   int inner_regno = ira_class_hard_regs[aclass][j];
-+		   int outer_regno = simplify_subreg_regno (inner_regno,
-+							    inner_mode, 0,
-+							    outer_mode);
-+		   if (outer_regno < 0
-+		       || !in_hard_reg_set_p (reg_class_contents[aclass],
-+					      outer_mode, outer_regno))
-+		     SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj),
-+				       inner_regno);
-+		}
-+	    }
-+
- 	  if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0)
- 	    {
- 	      int regno;
---- a/src/gcc/targhooks.c
-+++ b/src/gcc/targhooks.c
-@@ -1357,7 +1357,62 @@
- #endif
- }
- 
-+/* For hooks which use the MOVE_RATIO macro, this gives the legacy default
-+   behaviour.  SPEED_P is true if we are compiling for speed.  */
-+
-+static unsigned int
-+get_move_ratio (bool speed_p ATTRIBUTE_UNUSED)
-+{
-+  unsigned int move_ratio;
-+#ifdef MOVE_RATIO
-+  move_ratio = (unsigned int) MOVE_RATIO (speed_p);
-+#else
-+#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
-+  move_ratio = 2;
-+#else /* No movmem patterns, pick a default.  */
-+  move_ratio = ((speed_p) ? 15 : 3);
-+#endif
-+#endif
-+  return move_ratio;
-+}
-+
-+/* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be
-+   used; return FALSE if the movmem/setmem optab should be expanded, or
-+   a call to memcpy emitted.  */
-+
- bool
-+default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+					unsigned int alignment,
-+					enum by_pieces_operation op,
-+					bool speed_p)
-+{
-+  unsigned int max_size = 0;
-+  unsigned int ratio = 0;
-+
-+  switch (op)
-+    {
-+      case CLEAR_BY_PIECES:
-+	max_size = STORE_MAX_PIECES;
-+	ratio = CLEAR_RATIO (speed_p);
-+	break;
-+      case MOVE_BY_PIECES:
-+	max_size = MOVE_MAX_PIECES;
-+	ratio = get_move_ratio (speed_p);
-+	break;
-+      case SET_BY_PIECES:
-+	max_size = STORE_MAX_PIECES;
-+	ratio = SET_RATIO (speed_p);
-+	break;
-+      case STORE_BY_PIECES:
-+	max_size = STORE_MAX_PIECES;
-+	ratio = get_move_ratio (speed_p);
-+	break;
-+    }
-+
-+  return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
-+}
-+
-+bool
- default_profile_before_prologue (void)
- {
- #ifdef PROFILE_BEFORE_PROLOGUE
---- a/src/gcc/targhooks.h
-+++ b/src/gcc/targhooks.h
-@@ -177,6 +177,11 @@
- extern int default_register_move_cost (enum machine_mode, reg_class_t,
- 				       reg_class_t);
- 
-+extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+						    unsigned int,
-+						    enum by_pieces_operation,
-+						    bool);
-+
- extern bool default_profile_before_prologue (void);
- extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
- extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
---- a/src/gcc/cppbuiltin.c
-+++ b/src/gcc/cppbuiltin.c
-@@ -53,18 +53,41 @@
-     *patchlevel = s_patchlevel;
- }
- 
-+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]"
-+   to create Linaro release number YYYYMM and spin version.  */
-+static void
-+parse_linarover (int *release, int *spin)
-+{
-+  static int s_year = -1, s_month, s_spin;
- 
-+  if (s_year == -1)
-+    if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3)
-+      {
-+	sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month);
-+	s_spin = 0;
-+      }
-+
-+  if (release)
-+    *release = s_year * 100 + s_month;
-+
-+  if (spin)
-+    *spin = s_spin;
-+}
-+
- /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__.  */
- static void
- define__GNUC__ (cpp_reader *pfile)
- {
--  int major, minor, patchlevel;
-+  int major, minor, patchlevel, linaro_release, linaro_spin;
- 
-   parse_basever (&major, &minor, &patchlevel);
-+  parse_linarover (&linaro_release, &linaro_spin);
-   cpp_define_formatted (pfile, "__GNUC__=%d", major);
-   cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
-   cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
-   cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
-+  cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release);
-+  cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin);
-   cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
-   cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
-   cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
---- a/src/gcc/tree-ssa-threadupdate.c
-+++ b/src/gcc/tree-ssa-threadupdate.c
-@@ -156,8 +156,9 @@
- 		       bool registering)
- {
-   fprintf (dump_file,
--	   "  %s jump thread: (%d, %d) incoming edge; ",
-+	   "  %s%s jump thread: (%d, %d) incoming edge; ",
- 	   (registering ? "Registering" : "Cancelling"),
-+	   (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
- 	   path[0]->e->src->index, path[0]->e->dest->index);
- 
-   for (unsigned int i = 1; i < path.length (); i++)
-@@ -1622,6 +1623,155 @@
-   return false;
- }
- 
-+/* Verify that the REGION is a Single Entry Multiple Exits region: make sure no
-+   edge other than ENTRY is entering the REGION.  */
-+
-+DEBUG_FUNCTION void
-+verify_seme (edge entry, basic_block *region, unsigned n_region)
-+{
-+  bitmap bbs = BITMAP_ALLOC (NULL);
-+
-+  for (unsigned i = 0; i < n_region; i++)
-+    bitmap_set_bit (bbs, region[i]->index);
-+
-+  for (unsigned i = 0; i < n_region; i++)
-+    {
-+      edge e;
-+      edge_iterator ei;
-+      basic_block bb = region[i];
-+
-+      /* All predecessors other than ENTRY->src should be in the region.  */
-+      for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
-+	if (e != entry)
-+	  gcc_assert (bitmap_bit_p (bbs, e->src->index));
-+    }
-+
-+  BITMAP_FREE (bbs);
-+}
-+
-+/* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic
-+   blocks).  The ENTRY edge is redirected to the duplicate of the region.  If
-+   REGION is not a Single Entry region, ignore any incoming edges other than
-+   ENTRY: this makes the copied region a Single Entry region.
-+
-+   Remove the last conditional statement in the last basic block in the REGION,
-+   and create a single fallthru edge pointing to the same destination as the
-+   EXIT edge.
-+
-+   The new basic blocks are stored to REGION_COPY in the same order as they had
-+   in REGION, provided that REGION_COPY is not NULL.
-+
-+   Returns false if it is unable to copy the region, true otherwise.  */
-+
-+static bool
-+duplicate_seme_region (edge entry, edge exit,
-+		       basic_block *region, unsigned n_region,
-+		       basic_block *region_copy)
-+{
-+  unsigned i;
-+  bool free_region_copy = false, copying_header = false;
-+  struct loop *loop = entry->dest->loop_father;
-+  edge exit_copy;
-+  edge redirected;
-+  int total_freq = 0, entry_freq = 0;
-+  gcov_type total_count = 0, entry_count = 0;
-+
-+  if (!can_copy_bbs_p (region, n_region))
-+    return false;
-+
-+  /* Some sanity checking.  Note that we do not check for all possible
-+     missuses of the functions.  I.e. if you ask to copy something weird,
-+     it will work, but the state of structures probably will not be
-+     correct.  */
-+  for (i = 0; i < n_region; i++)
-+    {
-+      /* We do not handle subloops, i.e. all the blocks must belong to the
-+	 same loop.  */
-+      if (region[i]->loop_father != loop)
-+	return false;
-+    }
-+
-+  initialize_original_copy_tables ();
-+
-+  if (copying_header)
-+    set_loop_copy (loop, loop_outer (loop));
-+  else
-+    set_loop_copy (loop, loop);
-+
-+  if (!region_copy)
-+    {
-+      region_copy = XNEWVEC (basic_block, n_region);
-+      free_region_copy = true;
-+    }
-+
-+  if (entry->dest->count)
-+    {
-+      total_count = entry->dest->count;
-+      entry_count = entry->count;
-+      /* Fix up corner cases, to avoid division by zero or creation of negative
-+	 frequencies.  */
-+      if (entry_count > total_count)
-+	entry_count = total_count;
-+    }
-+  else
-+    {
-+      total_freq = entry->dest->frequency;
-+      entry_freq = EDGE_FREQUENCY (entry);
-+      /* Fix up corner cases, to avoid division by zero or creation of negative
-+	 frequencies.  */
-+      if (total_freq == 0)
-+	total_freq = 1;
-+      else if (entry_freq > total_freq)
-+	entry_freq = total_freq;
-+    }
-+
-+  copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
-+	    split_edge_bb_loc (entry), 0);
-+  if (total_count)
-+    {
-+      scale_bbs_frequencies_gcov_type (region, n_region,
-+				       total_count - entry_count,
-+				       total_count);
-+      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
-+				       total_count);
-+    }
-+  else
-+    {
-+      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
-+				 total_freq);
-+      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
-+    }
-+
-+#ifdef ENABLE_CHECKING
-+  /* Make sure no edge other than ENTRY is entering the copied region.  */
-+  verify_seme (entry, region_copy, n_region);
-+#endif
-+
-+  /* Remove the last branch in the jump thread path.  */
-+  remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
-+  edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
-+
-+  if (e) {
-+    rescan_loop_exit (e, true, false);
-+    e->probability = REG_BR_PROB_BASE;
-+    e->count = region_copy[n_region - 1]->count;
-+  }
-+
-+  /* Redirect the entry and add the phi node arguments.  */
-+  redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
-+  gcc_assert (redirected != NULL);
-+  flush_pending_stmts (entry);
-+
-+  /* Add the other PHI node arguments.  */
-+  add_phi_args_after_copy (region_copy, n_region, NULL);
-+
-+  if (free_region_copy)
-+    free (region_copy);
-+
-+  free_original_copy_tables ();
-+  return true;
-+}
-+
- /* Walk through all blocks and thread incoming edges to the appropriate
-    outgoing edge for each edge pair recorded in THREADED_EDGES.
- 
-@@ -1651,6 +1801,57 @@
-   threaded_blocks = BITMAP_ALLOC (NULL);
-   memset (&thread_stats, 0, sizeof (thread_stats));
- 
-+  /* Jump-thread all FSM threads before other jump-threads.  */
-+  for (i = 0; i < paths.length ();)
-+    {
-+      vec<jump_thread_edge *> *path = paths[i];
-+      edge entry = (*path)[0]->e;
-+
-+      if ((*path)[0]->type != EDGE_FSM_THREAD
-+	  /* Do not jump-thread twice from the same block.  */
-+	  || bitmap_bit_p (threaded_blocks, entry->src->index)) {
-+	i++;
-+	continue;
-+      }
-+
-+      unsigned len = path->length ();
-+      edge exit = (*path)[len - 1]->e;
-+      basic_block *region = XNEWVEC (basic_block, len - 1);
-+
-+      for (unsigned int j = 0; j < len - 1; j++)
-+	region[j] = (*path)[j]->e->dest;
-+
-+      if (duplicate_seme_region (entry, exit, region, len - 1, NULL))
-+	{
-+	  /* We do not update dominance info.  */
-+	  free_dominance_info (CDI_DOMINATORS);
-+	  bitmap_set_bit (threaded_blocks, entry->src->index);
-+	  retval = true;
-+	}
-+
-+      delete_jump_thread_path (path);
-+      paths.unordered_remove (i);
-+    }
-+
-+  /* Remove from PATHS all the jump-threads starting with an edge already
-+     jump-threaded.  */
-+  for (i = 0; i < paths.length ();)
-+    {
-+      vec<jump_thread_edge *> *path = paths[i];
-+      edge entry = (*path)[0]->e;
-+
-+      /* Do not jump-thread twice from the same block.  */
-+      if (bitmap_bit_p (threaded_blocks, entry->src->index))
-+	{
-+	  delete_jump_thread_path (path);
-+	  paths.unordered_remove (i);
-+	}
-+      else
-+	i++;
-+    }
-+
-+  bitmap_clear (threaded_blocks);
-+
-   mark_threaded_blocks (threaded_blocks);
- 
-   initialize_original_copy_tables ();
---- a/src/gcc/tree-ssa-threadupdate.h
-+++ b/src/gcc/tree-ssa-threadupdate.h
-@@ -26,6 +26,7 @@
- enum jump_thread_edge_type
- {
-   EDGE_START_JUMP_THREAD,
-+  EDGE_FSM_THREAD,
-   EDGE_COPY_SRC_BLOCK,
-   EDGE_COPY_SRC_JOINER_BLOCK,
-   EDGE_NO_COPY_SRC_BLOCK
---- a/src/gcc/c-family/ChangeLog.linaro
-+++ b/src/gcc/c-family/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/java/ChangeLog.linaro
-+++ b/src/gcc/java/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/c/c-parser.c
-+++ b/src/gcc/c/c-parser.c
-@@ -4210,7 +4210,8 @@
- 		  init.original_type = NULL;
- 		  c_parser_error (parser, "expected identifier");
- 		  c_parser_skip_until_found (parser, CPP_COMMA, NULL);
--		  process_init_element (init, false, braced_init_obstack);
-+		  process_init_element (input_location, init, false,
-+					braced_init_obstack);
- 		  return;
- 		}
- 	    }
-@@ -4342,7 +4343,8 @@
- 		  init.original_type = NULL;
- 		  c_parser_error (parser, "expected %<=%>");
- 		  c_parser_skip_until_found (parser, CPP_COMMA, NULL);
--		  process_init_element (init, false, braced_init_obstack);
-+		  process_init_element (input_location, init, false,
-+					braced_init_obstack);
- 		  return;
- 		}
- 	    }
-@@ -4363,11 +4365,12 @@
- {
-   struct c_expr init;
-   gcc_assert (!after || c_dialect_objc ());
-+  location_t loc = c_parser_peek_token (parser)->location;
-+
-   if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after)
-     init = c_parser_braced_init (parser, NULL_TREE, true);
-   else
-     {
--      location_t loc = c_parser_peek_token (parser)->location;
-       init = c_parser_expr_no_commas (parser, after);
-       if (init.value != NULL_TREE
- 	  && TREE_CODE (init.value) != STRING_CST
-@@ -4374,7 +4377,7 @@
- 	  && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR)
- 	init = convert_lvalue_to_rvalue (loc, init, true, true);
-     }
--  process_init_element (init, false, braced_init_obstack);
-+  process_init_element (loc, init, false, braced_init_obstack);
- }
- 
- /* Parse a compound statement (possibly a function body) (C90 6.6.2,
---- a/src/gcc/c/c-typeck.c
-+++ b/src/gcc/c/c-typeck.c
-@@ -102,8 +102,8 @@
- static char *print_spelling (char *);
- static void warning_init (int, const char *);
- static tree digest_init (location_t, tree, tree, tree, bool, bool, int);
--static void output_init_element (tree, tree, bool, tree, tree, int, bool,
--				 struct obstack *);
-+static void output_init_element (location_t, tree, tree, bool, tree, tree, int,
-+				 bool, struct obstack *);
- static void output_pending_init_elements (int, struct obstack *);
- static int set_designator (int, struct obstack *);
- static void push_range_stack (tree, struct obstack *);
-@@ -7187,13 +7187,15 @@
- 	  if ((TREE_CODE (constructor_type) == RECORD_TYPE
- 	       || TREE_CODE (constructor_type) == UNION_TYPE)
- 	      && constructor_fields == 0)
--	    process_init_element (pop_init_level (1, braced_init_obstack),
-+	    process_init_element (input_location,
-+				  pop_init_level (1, braced_init_obstack),
- 				  true, braced_init_obstack);
- 	  else if (TREE_CODE (constructor_type) == ARRAY_TYPE
- 		   && constructor_max_index
- 		   && tree_int_cst_lt (constructor_max_index,
- 				       constructor_index))
--	    process_init_element (pop_init_level (1, braced_init_obstack),
-+	    process_init_element (input_location,
-+				  pop_init_level (1, braced_init_obstack),
- 				  true, braced_init_obstack);
- 	  else
- 	    break;
-@@ -7393,10 +7395,9 @@
-       /* When we come to an explicit close brace,
- 	 pop any inner levels that didn't have explicit braces.  */
-       while (constructor_stack->implicit)
--	{
--	  process_init_element (pop_init_level (1, braced_init_obstack),
--				true, braced_init_obstack);
--	}
-+	process_init_element (input_location,
-+			      pop_init_level (1, braced_init_obstack),
-+			      true, braced_init_obstack);
-       gcc_assert (!constructor_range_stack);
-     }
- 
-@@ -7574,10 +7575,9 @@
-       /* Designator list starts at the level of closest explicit
- 	 braces.  */
-       while (constructor_stack->implicit)
--	{
--	  process_init_element (pop_init_level (1, braced_init_obstack),
--				true, braced_init_obstack);
--	}
-+	process_init_element (input_location,
-+			      pop_init_level (1, braced_init_obstack),
-+			      true, braced_init_obstack);
-       constructor_designated = 1;
-       return 0;
-     }
-@@ -8197,9 +8197,9 @@
-    existing initializer.  */
- 
- static void
--output_init_element (tree value, tree origtype, bool strict_string, tree type,
--		     tree field, int pending, bool implicit,
--		     struct obstack * braced_init_obstack)
-+output_init_element (location_t loc, tree value, tree origtype,
-+		     bool strict_string, tree type, tree field, int pending,
-+		     bool implicit, struct obstack * braced_init_obstack)
- {
-   tree semantic_type = NULL_TREE;
-   bool maybe_const = true;
-@@ -8297,8 +8297,8 @@
- 
-   if (semantic_type)
-     value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value);
--  value = digest_init (input_location, type, value, origtype, npc,
--      		       strict_string, require_constant_value);
-+  value = digest_init (loc, type, value, origtype, npc, strict_string,
-+		       require_constant_value);
-   if (value == error_mark_node)
-     {
-       constructor_erroneous = 1;
-@@ -8425,8 +8425,8 @@
- 	{
- 	  if (tree_int_cst_equal (elt->purpose,
- 				  constructor_unfilled_index))
--	    output_init_element (elt->value, elt->origtype, true,
--				 TREE_TYPE (constructor_type),
-+	    output_init_element (input_location, elt->value, elt->origtype,
-+				 true, TREE_TYPE (constructor_type),
- 				 constructor_unfilled_index, 0, false,
- 				 braced_init_obstack);
- 	  else if (tree_int_cst_lt (constructor_unfilled_index,
-@@ -8480,8 +8480,8 @@
- 	  if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos))
- 	    {
- 	      constructor_unfilled_fields = elt->purpose;
--	      output_init_element (elt->value, elt->origtype, true,
--				   TREE_TYPE (elt->purpose),
-+	      output_init_element (input_location, elt->value, elt->origtype,
-+				   true, TREE_TYPE (elt->purpose),
- 				   elt->purpose, 0, false,
- 				   braced_init_obstack);
- 	    }
-@@ -8554,7 +8554,7 @@
-    existing initializer.  */
- 
- void
--process_init_element (struct c_expr value, bool implicit,
-+process_init_element (location_t loc, struct c_expr value, bool implicit,
- 		      struct obstack * braced_init_obstack)
- {
-   tree orig_value = value.value;
-@@ -8598,7 +8598,7 @@
-       if ((TREE_CODE (constructor_type) == RECORD_TYPE
- 	   || TREE_CODE (constructor_type) == UNION_TYPE)
- 	  && constructor_fields == 0)
--	process_init_element (pop_init_level (1, braced_init_obstack),
-+	process_init_element (loc, pop_init_level (1, braced_init_obstack),
- 			      true, braced_init_obstack);
-       else if ((TREE_CODE (constructor_type) == ARRAY_TYPE
- 	        || TREE_CODE (constructor_type) == VECTOR_TYPE)
-@@ -8605,7 +8605,7 @@
- 	       && constructor_max_index
- 	       && tree_int_cst_lt (constructor_max_index,
- 				   constructor_index))
--	process_init_element (pop_init_level (1, braced_init_obstack),
-+	process_init_element (loc, pop_init_level (1, braced_init_obstack),
- 			      true, braced_init_obstack);
-       else
- 	break;
-@@ -8683,7 +8683,7 @@
- 	  if (value.value)
- 	    {
- 	      push_member_name (constructor_fields);
--	      output_init_element (value.value, value.original_type,
-+	      output_init_element (loc, value.value, value.original_type,
- 				   strict_string, fieldtype,
- 				   constructor_fields, 1, implicit,
- 				   braced_init_obstack);
-@@ -8775,7 +8775,7 @@
- 	  if (value.value)
- 	    {
- 	      push_member_name (constructor_fields);
--	      output_init_element (value.value, value.original_type,
-+	      output_init_element (loc, value.value, value.original_type,
- 				   strict_string, fieldtype,
- 				   constructor_fields, 1, implicit,
- 				   braced_init_obstack);
-@@ -8827,7 +8827,7 @@
- 	  if (value.value)
- 	    {
- 	      push_array_bounds (tree_to_uhwi (constructor_index));
--	      output_init_element (value.value, value.original_type,
-+	      output_init_element (loc, value.value, value.original_type,
- 				   strict_string, elttype,
- 				   constructor_index, 1, implicit,
- 				   braced_init_obstack);
-@@ -8862,7 +8862,7 @@
- 	    {
- 	      if (TREE_CODE (value.value) == VECTOR_CST)
- 		elttype = TYPE_MAIN_VARIANT (constructor_type);
--	      output_init_element (value.value, value.original_type,
-+	      output_init_element (loc, value.value, value.original_type,
- 				   strict_string, elttype,
- 				   constructor_index, 1, implicit,
- 				   braced_init_obstack);
-@@ -8891,7 +8891,7 @@
-       else
- 	{
- 	  if (value.value)
--	    output_init_element (value.value, value.original_type,
-+	    output_init_element (loc, value.value, value.original_type,
- 				 strict_string, constructor_type,
- 				 NULL_TREE, 1, implicit,
- 				 braced_init_obstack);
-@@ -8910,8 +8910,8 @@
- 	  while (constructor_stack != range_stack->stack)
- 	    {
- 	      gcc_assert (constructor_stack->implicit);
--	      process_init_element (pop_init_level (1,
--						    braced_init_obstack),
-+	      process_init_element (loc,
-+				    pop_init_level (1, braced_init_obstack),
- 				    true, braced_init_obstack);
- 	    }
- 	  for (p = range_stack;
-@@ -8919,7 +8919,8 @@
- 	       p = p->prev)
- 	    {
- 	      gcc_assert (constructor_stack->implicit);
--	      process_init_element (pop_init_level (1, braced_init_obstack),
-+	      process_init_element (loc,
-+				    pop_init_level (1, braced_init_obstack),
- 				    true, braced_init_obstack);
- 	    }
- 
---- a/src/gcc/c/c-tree.h
-+++ b/src/gcc/c/c-tree.h
-@@ -612,7 +612,8 @@
- extern struct c_expr pop_init_level (int, struct obstack *);
- extern void set_init_index (tree, tree, struct obstack *);
- extern void set_init_label (tree, struct obstack *);
--extern void process_init_element (struct c_expr, bool, struct obstack *);
-+extern void process_init_element (location_t, struct c_expr, bool,
-+				  struct obstack *);
- extern tree build_compound_literal (location_t, tree, tree, bool);
- extern void check_compound_literal_type (location_t, struct c_type_name *);
- extern tree c_start_case (location_t, location_t, tree);
---- a/src/gcc/c/ChangeLog.linaro
-+++ b/src/gcc/c/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/target.def
-+++ b/src/gcc/target.def
-@@ -3039,6 +3039,43 @@
-  int, (enum machine_mode mode, reg_class_t rclass, bool in),
-  default_memory_move_cost)
- 
-+DEFHOOK
-+(use_by_pieces_infrastructure_p,
-+ "GCC will attempt several strategies when asked to copy between\n\
-+two areas of memory, or to set, clear or store to memory, for example\n\
-+when copying a @code{struct}. The @code{by_pieces} infrastructure\n\
-+implements such memory operations as a sequence of load, store or move\n\
-+insns.  Alternate strategies are to expand the\n\
-+@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\
-+unit-by-unit, loop-based operations.\n\
-+\n\
-+This target hook should return true if, for a memory operation with a\n\
-+given @var{size} and @var{alignment}, using the @code{by_pieces}\n\
-+infrastructure is expected to result in better code generation.\n\
-+Both @var{size} and @var{alignment} are measured in terms of storage\n\
-+units.\n\
-+\n\
-+The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
-+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
-+These describe the type of memory operation under consideration.\n\
-+\n\
-+The parameter @var{speed_p} is true if the code is currently being\n\
-+optimized for speed rather than size.\n\
-+\n\
-+Returning true for higher values of @var{size} can improve code generation\n\
-+for speed if the target does not provide an implementation of the\n\
-+@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\
-+@code{setmem} implementation would be more expensive than a sequence of\n\
-+insns, or if the overhead of a library call would dominate that of\n\
-+the body of the memory operation.\n\
-+\n\
-+Returning true for higher values of @code{size} may also cause an increase\n\
-+in code size, for example where the number of insns emitted to perform a\n\
-+move would be greater than that of a library call.",
-+ bool, (unsigned HOST_WIDE_INT size, unsigned int alignment,
-+        enum by_pieces_operation op, bool speed_p),
-+ default_use_by_pieces_infrastructure_p)
-+
- /* True for MODE if the target expects that registers in this mode will
-    be allocated to registers in a small register class.  The compiler is
-    allowed to use registers explicitly used in the rtl as spill registers
---- a/src/gcc/optabs.c
-+++ b/src/gcc/optabs.c
-@@ -4234,7 +4234,7 @@
- 	    y = const0_rtx;
- 	}
- 
--      *pmode = word_mode;
-+      *pmode = ret_mode;
-       prepare_cmp_insn (x, y, comparison, NULL_RTX, unsignedp, methods,
- 			ptest, pmode);
-     }
---- a/src/gcc/defaults.h
-+++ b/src/gcc/defaults.h
-@@ -914,14 +914,6 @@
- #define PREFERRED_DEBUGGING_TYPE NO_DEBUG
- #endif
- 
--#ifndef LARGEST_EXPONENT_IS_NORMAL
--#define LARGEST_EXPONENT_IS_NORMAL(SIZE) 0
--#endif
--
--#ifndef ROUND_TOWARDS_ZERO
--#define ROUND_TOWARDS_ZERO 0
--#endif
--
- #ifndef FLOAT_LIB_COMPARE_RETURNS_BOOL
- #define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) false
- #endif
-@@ -1065,6 +1057,15 @@
- #define MOVE_MAX_PIECES   MOVE_MAX
- #endif
- 
-+/* STORE_MAX_PIECES is the number of bytes at a time that we can
-+   store efficiently.  Due to internal GCC limitations, this is
-+   MOVE_MAX_PIECES limited by the number of bytes GCC can represent
-+   for an immediate constant.  */
-+
-+#ifndef STORE_MAX_PIECES
-+#define STORE_MAX_PIECES  MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
-+#endif
-+
- #ifndef MAX_MOVE_MAX
- #define MAX_MOVE_MAX MOVE_MAX
- #endif
---- a/src/gcc/target.h
-+++ b/src/gcc/target.h
-@@ -78,6 +78,17 @@
-   SWITCH_TYPE_LINE_END		/* Please emit a line terminator.  */
- };
- 
-+/* Types of memory operation understood by the "by_pieces" infrastructure.
-+   Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook.  */
-+
-+enum by_pieces_operation
-+{
-+  CLEAR_BY_PIECES,
-+  MOVE_BY_PIECES,
-+  SET_BY_PIECES,
-+  STORE_BY_PIECES
-+};
-+
- typedef int (* print_switch_fn_type) (print_switch_type, const char *);
- 
- /* An example implementation for ELF targets.  Defined in varasm.c  */
---- a/src/gcc/configure
-+++ b/src/gcc/configure
-@@ -1686,7 +1686,8 @@
-                           use sysroot as the system root during the build
-   --with-sysroot[=DIR]    search for usr/lib, usr/include, et al, within DIR
-   --with-specs=SPECS      add SPECS to driver command-line processing
--  --with-pkgversion=PKG   Use PKG in the version string in place of "GCC"
-+  --with-pkgversion=PKG   Use PKG in the version string in place of "Linaro
-+                          GCC `cat $srcdir/LINARO-VERSION`"
-   --with-bugurl=URL       Direct users to URL to report a bug
-   --with-multilib-list    select multilibs (AArch64, SH and x86-64 only)
-   --with-gnu-ld           assume the C compiler uses GNU ld default=no
-@@ -7231,7 +7232,7 @@
-       *)   PKGVERSION="($withval) " ;;
-      esac
- else
--  PKGVERSION="(GCC) "
-+  PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) "
- 
- fi
- 
-@@ -17936,7 +17937,7 @@
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 17939 "configure"
-+#line 17940 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-@@ -18042,7 +18043,7 @@
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<_LT_EOF
--#line 18045 "configure"
-+#line 18046 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
---- a/src/gcc/lra-eliminations.c
-+++ b/src/gcc/lra-eliminations.c
-@@ -1164,7 +1164,9 @@
- 		     ep->from, ep->to);
- 	  /* If after processing RTL we decides that SP can be used as
- 	     a result of elimination, it can not be changed.  */
--	  gcc_assert (ep->to_rtx != stack_pointer_rtx);
-+	  gcc_assert ((ep->to_rtx != stack_pointer_rtx)
-+		      || (ep->from < FIRST_PSEUDO_REGISTER
-+			  && fixed_regs [ep->from]));
- 	  /* Mark that is not eliminable anymore.  */
- 	  elimination_map[ep->from] = NULL;
- 	  for (ep1 = ep + 1; ep1 < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep1++)
---- a/src/gcc/objc/ChangeLog.linaro
-+++ b/src/gcc/objc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/ChangeLog.linaro
-+++ b/src/gcc/ChangeLog.linaro
-@@ -0,0 +1,3211 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+	* LINARO-VERSION: Update.
-+
-+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Fix Linaro PR #902
-+
-+	Partial Backport from trunk r211798.
-+	2014-06-18  Radovan Obradovic  <robradovic@mips.com>
-+		    Tom de Vries  <tom@codesourcery.com>
-+
-+	* config/arm/arm.c (arm_emit_call_insn): Add IP and CC clobbers to
-+	CALL_INSN_FUNCTION_USAGE.
-+
-+	Backport from trunk r209800.
-+	2014-04-25  Tom de Vries  <tom@codesourcery.com>
-+
-+	* expr.c (clobber_reg_mode): New function.
-+	* expr.h (clobber_reg): New function.
-+
-+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211783.
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/arm.c (neon_vector_mem_operand): Allow register
-+	POST_MODIFY for neon loads and stores.
-+	(arm_print_operand): Output post-index register for neon loads and
-+	stores.
-+
-+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r218451.
-+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Sebastian Pop  <s.pop@samsung.com>
-+		    Brian Rzycki  <b.rzycki@samsung.com>
-+
-+	PR tree-optimization/54742
-+	* params.def (max-fsm-thread-path-insns, max-fsm-thread-length,
-+	max-fsm-thread-paths): New.
-+
-+	* doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length,
-+	max-fsm-thread-paths): Documented.
-+
-+	* tree-cfg.c (split_edge_bb_loc): Export.
-+	* tree-cfg.h (split_edge_bb_loc): Declared extern.
-+
-+	* tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the
-+	original value of cond when simplification fails.
-+	(fsm_find_thread_path): New.
-+	(fsm_find_control_statement_thread_paths): New.
-+	(thread_through_normal_block): Call find_control_statement_thread_paths.
-+
-+	* tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print
-+	EDGE_FSM_THREAD.
-+	(verify_seme): New.
-+	(duplicate_seme_region): New.
-+	(thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges
-+	calling duplicate_seme_region.
-+
-+	* tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD.
-+
-+2015-01-13  Michael Collison  <michael.collison@linaro.org>
-+
-+	Backport from trunk r217394.
-+	2014-11-11  Andrew Pinski  <apinski@cavium.com>
-+
-+	Bug target/61997
-+	* config.gcc (aarch64*-*-*): Set target_gtfiles to include
-+	aarch64-builtins.c.
-+	* config/aarch64/aarch64-builtins.c: Include gt-aarch64-builtins.h
-+	at the end of the file.
-+
-+2015-01-13  Michael Collison  <michael.collison@linaro.org>
-+
-+	Backport from trunk r216267, r216547, r216548, r217072, r217192, r217405,
-+	r217406, r217768.
-+	2014-11-19  Renlin Li  <renlin.li@arm.com>
-+
-+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP_FAST,
-+	__ARM_FEATURE_FMA, __ARM_FP, __ARM_FEATURE_NUMERIC_MAXMIN, __ARM_NEON_FP.
-+
-+	2014-11-12  Tejas Belagod  <tejas.belagod@arm.com>
-+
-+	* Makefile.in (TEXI_GCC_FILES): Remove arm-acle-intrinsics.texi,
-+	arm-neon-intrinsics.texi, aarch64-acle-intrinsics.texi.
-+	* doc/aarch64-acle-intrinsics.texi: Remove.
-+	* doc/arm-acle-intrinsics.texi: Remove.
-+	* doc/arm-neon-intrinsics.texi: Remove.
-+	* doc/extend.texi: Consolidate sections AArch64 intrinsics,
-+	ARM NEON Intrinsics, ARM ACLE Intrinsics into one ARM C Language
-+	Extension section. Add references to public ACLE specification.
-+
-+	2014-11-06  Renlin Li  <renlin.li@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_architecture_version): New.
-+	(processor): New architecture_version field.
-+	(aarch64_override_options): Initialize aarch64_architecture_version.
-+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_ARCH,
-+	__ARM_ARCH_PROFILE, aarch64_arch_name macro.
-+
-+	2014-11-04  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Fix typo in definition
-+	of __ARM_FEATURE_IDIV.
-+
-+	2014-10-22  Jiong Wang <jiong.wang@arm.com>
-+
-+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Add missing '\'.
-+
-+	2014-10-22  Renlin Li <renlin.li@arm.com>
-+
-+	* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Define
-+	__ARM_FEATURE_IDIV__.
-+
-+	2014-10-15  Renlin Li <renlin.li@arm.com>
-+
-+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
-+	__ARM_BIG_ENDIAN, __ARM_SIZEOF_MINIMAL_ENUM. Add __ARM_64BIT_STATE,
-+	__ARM_ARCH_ISA_A64, __ARM_FEATURE_CLZ, __ARM_FEATURE_IDIV,
-+	__ARM_FEATURE_UNALIGNED, __ARM_PCS_AAPCS64, __ARM_SIZEOF_WCHAR_T.
-+
-+2015-01-13  Michael Collison  <michael.collison@linaro.org>
-+
-+	Backport from trunk r211789, r211790, r211791, r211792, r211793, r211794,
-+	r211795, r211796, r211797.
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.c (__gnu_uldivmod_helper): Remove.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi-v6m.S (__aeabi_uldivmod): Perform division using
-+	__udivmoddi4.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_ldivmod, __aeabi_uldivmod,
-+	push_for_divide, pop_for_divide): Use .cfi_* directives for DWARF
-+	annotations. Fix DWARF information.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_ldivmod): Perform division using
-+	__udivmoddi4, and fixups for negative operands.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_ldivmod): Optimise stack manipulation.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_uldivmod): Perform division using call
-+	to __udivmoddi4.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_uldivmod): Optimise stack pointer
-+	manipulation.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_uldivmod, __aeabi_ldivmod): Add comment
-+	describing register usage on function entry and exit.
-+
-+	2014-06-18  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	* config/arm/bpabi.S (__aeabi_uldivmod): Fix whitespace.
-+	(__aeabi_ldivmod): Fix whitespace.
-+
-+2015-01-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217593.
-+	2014-11-14  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64-cores.def (thunderx): Change the scheduler
-+	over to thunderx.
-+	* config/aarch64/aarch64.md: Include thunderx.md.
-+	(generic_sched): Set to no for thunderx.
-+	* config/aarch64/thunderx.md: New file.
-+
-+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217717.
-+	2014-11-18  Felix Yang  <felix.yang@huawei.com>
-+
-+	* config/aarch64/aarch64.c (doloop_end): New pattern.
-+	* config/aarch64/aarch64.md (TARGET_CAN_USE_DOLOOP_P): Implement.
-+
-+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217661.
-+	2014-11-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64-cores.def (cortex-a53): Remove
-+	AARCH64_FL_CRYPTO from feature flags.
-+	(cortex-a57): Likewise.
-+	(cortex-a57.cortex-a53): Likewise.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r218319.
-+	2014-12-03  Andrew Stubbs  <ams@codesourcery.com>
-+
-+	Revert:
-+
-+	2014-09-17  Andrew Stubbs  <ams@codesourcery.com>
-+
-+	* config/arm/arm.c (arm_option_override): Reject -mfpu=neon
-+	when architecture is older than ARMv7.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217691.
-+	2014-11-18  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* lra-eliminations.c (update_reg_eliminate): Relax gcc_assert for fixed
-+	registers.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215503.
-+	2014-09-23  Wilco Dijkstra  <wdijkstr@arm.com>
-+
-+	* common/config/aarch64/aarch64-common.c:
-+	(default_options aarch_option_optimization_table):
-+	Default to -fsched-pressure.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211132.
-+	2014-06-02  Tom de Vries  <tom@codesourcery.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_float_const_representable_p): Handle
-+	case that x has VOIDmode.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209620.
-+	2014-04-22  Vidya Praveen  <vidyapraveen@arm.com>
-+
-+	* aarch64.md (float<GPI:mode><GPF:mode>2): Remove.
-+	(floatuns<GPI:mode><GPF:mode>2): Remove.
-+	(<optab><fcvt_target><GPF:mode>2): New pattern for equal width float
-+	and floatuns conversions.
-+	(<optab><fcvt_iesize><GPF:mode>2): New pattern for inequal width float
-+	and floatuns conversions.
-+	* iterators.md (fcvt_target, FCVT_TARGET): Support SF and DF modes.
-+	(w1,w2): New mode attributes for inequal width conversions.
-+ 
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217362, r217546.
-+	2014-11-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	PR target/63724
-+        * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
-+        numerical immediate handling to...
-+        (aarch64_internal_mov_immediate): ...this. New.
-+        (aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
-+        (aarch64_mov_operand_p): Relax predicate.
-+        * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
-+        (*movsi_aarch64): Turn into define_insn_and_split and new alternative
-+        for 'n'.
-+        (*movdi_aarch64): Likewise.
-+
-+	2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md
-+	(aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
-+	(aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
-+	are punning between float vectors and integer vectors.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217079, r217080.
-+	2014-11-04  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	config/arm/neon.md (reduc_smin_<mode> *2): Rename to...
-+	(reduc_smin_scal_<mode> *2): ...this; extract scalar result.
-+	(reduc_smax_<mode> *2): Rename to...
-+	(reduc_smax_scal_<mode> *2): ...this; extract scalar result.
-+	(reduc_umin_<mode> *2): Rename to...
-+	(reduc_umin_scal_<mode> *2): ...this; extract scalar result.
-+	(reduc_umax_<mode> *2): Rename to...
-+	(reduc_umax_scal_<mode> *2): ...this; extract scalar result.
-+
-+	2014-11-04  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	config/arm/neon.md (reduc_plus_*): Rename to...
-+	(reduc_plus_scal_*): ...this; reduce to temp and extract scalar result.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Fix Backport from trunk r216524 (committed at r218379).
-+	Add missing file: config/aarch64/aarch64-cost-tables.h
-+
-+	* config/aarch64/aarch64-cost-tables.h: New file.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217076.
-+	2014-11-04  Michael Collison <michael.collison@linaro.org>
-+
-+	* config/aarch64/iterators.md (lconst_atomic): New mode attribute
-+	to support constraints for CONST_INT in atomic operations.
-+	* config/aarch64/atomics.md
-+	(atomic_<atomic_optab><mode>): Use lconst_atomic constraint.
-+	(atomic_nand<mode>): Likewise.
-+	(atomic_fetch_<atomic_optab><mode>): Likewise.
-+	(atomic_fetch_nand<mode>): Likewise.
-+	(atomic_<atomic_optab>_fetch<mode>): Likewise.
-+	(atomic_nand_fetch<mode>): Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217026.
-+	2014-11-03  Zhenqiang Chen  <zhenqiang.chen@arm.com>
-+
-+	* ifcvt.c (noce_emit_cmove, noce_get_alt_condition, noce_get_condition):
-+	Allow CC mode if HAVE_cbranchcc4.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217014.
-+	2014-11-02  Michael Collison  <michael.collison@linaro.org>
-+
-+	* config/arm/arm.h (CLZ_DEFINED_VALUE_AT_ZERO) : Update
-+	to support vector modes.
-+	(CTZ_DEFINED_VALUE_AT_ZERO): Ditto.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216996, r216998, r216999, r217001, r217002, r217003,
-+	r217004, r217742.
-+	2014-11-18  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	PR target/63937
-+	* target.def (use_by_pieces_infrastructure_p): Take unsigned
-+	HOST_WIDE_INT as the size parameter.
-+	* targhooks.c (default_use_by_pieces_infrastructure_p): Likewise.
-+	* targhooks.h (default_use_by_pieces_infrastructure_p): Likewise.
-+	* config/arc/arc.c (arc_use_by_pieces_infrastructure_p)): Likewise.
-+	* config/mips/mips.c (mips_use_by_pieces_infrastructure_p)): Likewise.
-+	* config/s390/s390.c (s390_use_by_pieces_infrastructure_p)): Likewise.
-+	* config/sh/sh.c (sh_use_by_pieces_infrastructure_p)): Likewise.
-+	* config/aarch64/aarch64.c
-+	(aarch64_use_by_pieces_infrastructure_p)): Likewise.
-+	* doc/tm.texi: Regenerate.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* doc/tm.texi.in (MOVE_BY_PIECES_P): Remove.
-+	(CLEAR_BY_PIECES_P): Likewise.
-+	(SET_BY_PIECES_P): Likewise.
-+	(STORE_BY_PIECES_P): Likewise.
-+	* doc/tm.texi: Regenerate.
-+	* system.h: Poison MOVE_BY_PIECES_P, CLEAR_BY_PIECES_P,
-+	SET_BY_PIECES_P, STORE_BY_PIECES_P.
-+	* expr.c (MOVE_BY_PIECES_P): Remove.
-+	(CLEAR_BY_PIECES_P): Likewise.
-+	(SET_BY_PIECES_P): Likewise.
-+	(STORE_BY_PIECES_P): Likewise.
-+	(can_move_by_pieces): Rewrite in terms of
-+	targetm.use_by_pieces_infrastructure_p.
-+	(emit_block_move_hints): Likewise.
-+	(can_store_by_pieces): Likewise.
-+	(store_by_pieces): Likewise.
-+	(clear_storage_hints): Likewise.
-+	(emit_push_insn): Likewise.
-+	(expand_constructor): Likewise.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_use_by_pieces_infrastructre_p): New.
-+	(TARGET_USE_BY_PIECES_INFRASTRUCTURE): Likewise.
-+	* config/aarch64/aarch64.h (STORE_BY_PIECES_P): Delete.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/mips/mips.h (MOVE_BY_PIECES_P): Remove.
-+	(STORE_BY_PIECES_P): Likewise.
-+	* config/mips/mips.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+	(mips_move_by_pieces_p): Rename to...
-+	(mips_use_by_pieces_infrastructure_p): ...this, use new hook
-+	parameters, use the default hook implementation as a
-+	fall-back.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/sh/sh.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+	(sh_use_by_pieces_infrastructure_p): Likewise.
-+	* config/sh/sh.h (MOVE_BY_PIECES_P): Remove.
-+	(STORE_BY_PIECES_P): Likewise.
-+	(SET_BY_PIECES_P): Likewise.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/arc/arc.c (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): New.
-+	(arc_use_by_pieces_infrastructure_p): Likewise.
-+	* confir/arc/arc.h (MOVE_BY_PIECES_P): Delete.
-+	(CAN_MOVE_BY_PIECES): Likewise.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/s390/s390.c (s390_use_by_pieces_infrastructure_p): New.
-+	(TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Likewise.
-+	* config/s390/s390.h (MOVE_BY_PIECES_P): Remove.
-+	(CLEAR_BY_PIECES): Likewise.
-+	(SET_BY_PIECES): Likewise.
-+	(STORE_BY_PIECES): Likewise.
-+
-+	2014-11-01  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* target.def (use_by_pieces_infrastructure_p): New.
-+	* doc/tm.texi.in (MOVE_BY_PIECES_P): Describe that this macro
-+	is deprecated.
-+	(STORE_BY_PIECES_P): Likewise.
-+	(CLEAR_BY_PIECES_P): Likewise.
-+	(SET_BY_PIECES_P): Likewise.
-+	(TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook.
-+	* doc/tm.texi: Regenerate.
-+	* expr.c (MOVE_BY_PIECES_P): Rewrite in terms of
-+	TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.
-+	(STORE_BY_PIECES_P): Likewise.
-+	(CLEAR_BY_PIECES_P): Likewise.
-+	(SET_BY_PIECES_P): Likewise.
-+	(STORE_MAX_PIECES): Move to...
-+	* defaults.h (STORE_MAX_PIECES): ...here.
-+	* targhooks.c (get_move_ratio): New.
-+	(default_use_by_pieces_infrastructure_p): Likewise.
-+	* targhooks.h (default_use_by_pieces_infrastructure_p): New.
-+	* target.h (by_pieces_operation): New.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216765.
-+	2014-10-27  Jiong Wang <jiong.wang@arm.com>
-+
-+	PR target/63442
-+	* optabs.c (prepare_cmp_insn): Use "ret_mode" instead of "word_mode".
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216630.
-+	2014-10-24  Felix Yang  <felix.yang@huawei.com>
-+	Jiji Jiang  <jiangjiji@huawei.com>
-+
-+	PR target/63173
-+	* config/aarch64/arm_neon.h (__LD2R_FUNC): Remove macro.
-+	(__LD3R_FUNC): Ditto.
-+	(__LD4R_FUNC): Ditto.
-+	(vld2_dup_s8, vld2_dup_s16, vld2_dup_s32, vld2_dup_f32, vld2_dup_f64,
-+	 vld2_dup_u8, vld2_dup_u16, vld2_dup_u32, vld2_dup_p8, vld2_dup_p16
-+	 vld2_dup_s64, vld2_dup_u64, vld2q_dup_s8, vld2q_dup_p8, 
-+	 vld2q_dup_s16, vld2q_dup_p16, vld2q_dup_s32, vld2q_dup_s64, 
-+	 vld2q_dup_u8, vld2q_dup_u16, vld2q_dup_u32, vld2q_dup_u64 
-+	 vld2q_dup_f32, vld2q_dup_f64): Rewrite using builtin functions.
-+	(vld3_dup_s64, vld3_dup_u64, vld3_dup_f64, vld3_dup_s8 
-+	 vld3_dup_p8, vld3_dup_s16, vld3_dup_p16, vld3_dup_s32 
-+	 vld3_dup_u8, vld3_dup_u16, vld3_dup_u32, vld3_dup_f32
-+	 vld3q_dup_s8, vld3q_dup_p8, vld3q_dup_s16, vld3q_dup_p16 
-+	 vld3q_dup_s32, vld3q_dup_s64, vld3q_dup_u8, vld3q_dup_u16 
-+	 vld3q_dup_u32, vld3q_dup_u64, vld3q_dup_f32, vld3q_dup_f64): Likewise.
-+	(vld4_dup_s64, vld4_dup_u64, vld4_dup_f64, vld4_dup_s8 
-+	 vld4_dup_p8, vld4_dup_s16, vld4_dup_p16, vld4_dup_s32 
-+	 vld4_dup_u8, vld4_dup_u16, vld4_dup_u32, vld4_dup_f32 
-+	 vld4q_dup_s8, vld4q_dup_p8, vld4q_dup_s16, vld4q_dup_p16 
-+	 vld4q_dup_s32, vld4q_dup_s64, vld4q_dup_u8, vld4q_dup_u16 
-+	 vld4q_dup_u32, vld4q_dup_u64, vld4q_dup_f32, vld4q_dup_f64): Likewise.
-+	* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
-+	UNSPEC_LD2_DUP, UNSPEC_LD3_DUP, UNSPEC_LD4_DUP.
-+	* config/aarch64/aarch64-simd-builtins.def (ld2r, ld3r, ld4r): New
-+	builtins.
-+	* config/aarch64/aarch64-simd.md (aarch64_simd_ld2r<mode>): New pattern.
-+	(aarch64_simd_ld3r<mode>): Likewise.
-+	(aarch64_simd_ld4r<mode>): Likewise.
-+	(aarch64_ld2r<mode>): New expand.
-+	(aarch64_ld3r<mode>): Likewise.
-+	(aarch64_ld4r<mode>): Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217971.
-+	2014-11-22  Uros Bizjak  <ubizjak@gmail.com>
-+
-+	* params.def (PARAM_MAX_COMPLETELY_PEELED_INSNS): Increase to 200.
-+	* config/i386/i386.c (ix86_option_override_internal): Do not increase
-+	PARAM_MAX_COMPLETELY_PEELED_INSNS.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216524.
-+	2014-10-21  Andrew Pinski  <apinski@cavium.com>
-+
-+	* doc/invoke.texi (AARCH64/mtune): Document thunderx as an
-+	available option also.
-+	* config/aarch64/aarch64-cost-tables.h: New file.
-+	* config/aarch64/aarch64-cores.def (thunderx): New core.
-+	* config/aarch64/aarch64-tune.md: Regenerate.
-+	* config/aarch64/aarch64.c: Include aarch64-cost-tables.h instead
-+	of config/arm/aarch-cost-tables.h.
-+	(thunderx_regmove_cost): New variable.
-+	(thunderx_tunings): New variable.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216336.
-+	2014-10-16  Richard Earnshaw  <rearnsha@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_legitimize_address): New function.
-+	(TARGET_LEGITIMIZE_ADDRESS): Redefine.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216253.
-+	2014-10-15  Renlin Li <renlin.li@arm.com>
-+
-+	* config/aarch64/aarch64.h (ARM_DEFAULT_PCS, arm_pcs_variant): Delete.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215711.
-+	2014-09-30  Terry Guo  <terry.guo@arm.com>
-+
-+	* config/arm/arm-cores.def (cortex-m7): New core name.
-+	* config/arm/arm-fpus.def (fpv5-sp-d16): New fpu name.
-+	(fpv5-d16): Ditto.
-+	* config/arm/arm-tables.opt: Regenerated.
-+	* config/arm/arm-tune.md: Regenerated.
-+	* config/arm/arm.h (TARGET_VFP5): New macro.
-+	* config/arm/bpabi.h (BE8_LINK_SPEC): Include cortex-m7.
-+	* config/arm/vfp.md (<vrint_pattern><SDF:mode>2,
-+	smax<mode>3, smin<mode>3): Enabled for FPU FPv5.
-+	* doc/invoke.texi: Document new cpu and fpu names.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215707, r215842.
-+	2014-10-03  David Sherwood  <david.sherwood@arm.com>
-+
-+	* ira-int.h (ira_allocno): Mark hard_regno as signed.
-+
-+	2014-09-30  David Sherwood  <david.sherwood@arm.com>
-+
-+	* ira-int.h (ira_allocno): Add "wmode" field.
-+	* ira-build.c (create_insn_allocnos): Add new "parent" function
-+	parameter.
-+	* ira-conflicts.c (ira_build_conflicts): Add conflicts for registers
-+	that cannot be accessed in wmode.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215540.
-+	2014-09-24  Zhenqiang Chen  <zhenqiang.chen@arm.com>
-+
-+	PR rtl-optimization/63210
-+	* ira-color.c (assign_hard_reg): Ignore conflict cost if the
-+	HARD_REGNO is not available for CONFLICT_A.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215046.
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/61749
-+	* config/aarch64/aarch64-builtins.c (aarch64_types_quadop_qualifiers):
-+	Use qualifier_immediate for last operand.  Rename to...
-+	(aarch64_types_ternop_lane_qualifiers): ... This.
-+	(TYPES_QUADOP): Rename to...
-+	(TYPES_TERNOP_LANE): ... This.
-+	(aarch64_simd_expand_args): Return const0_rtx when encountering user
-+	error.  Change return of 0 to return of NULL_RTX.
-+	(aarch64_crc32_expand_builtin): Likewise.
-+	(aarch64_expand_builtin): Return NULL_RTX instead of 0.
-+	ICE when expanding unknown builtin.
-+	* config/aarch64/aarch64-simd-builtins.def (sqdmlal_lane): Use
-+	TERNOP_LANE qualifiers.
-+	(sqdmlsl_lane): Likewise.
-+	(sqdmlal_laneq): Likewise.
-+	(sqdmlsl_laneq): Likewise.
-+	(sqdmlal2_lane): Likewise.
-+	(sqdmlsl2_lane): Likewise.
-+	(sqdmlal2_laneq): Likewise.
-+	(sqdmlsl2_laneq): Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215013.
-+	2014-09-08  Joseph Myers  <joseph@codesourcery.com>
-+
-+	* defaults.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
-+	Remove.
-+	* doc/tm.texi.in (ROUND_TOWARDS_ZERO, LARGEST_EXPONENT_IS_NORMAL):
-+	Remove.
-+	* doc/tm.texi: Regenerate.
-+	* system.h (LARGEST_EXPONENT_IS_NORMAL, ROUND_TOWARDS_ZERO):
-+	Poison.
-+	* config/arm/arm.h (LARGEST_EXPONENT_IS_NORMAL): Remove.
-+	* config/cris/cris.h (__make_dp): Remove.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214952.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/arm_neon.h (__GET_HIGH): New macro.
-+	(vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16,
-+	vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64,
-+	vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64):
-+	Remove temporary __asm__ and reimplement.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214948, r214949.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
-+	handling cmge, cmgt, cmeq, cmtst.
-+
-+	* config/aarch64/aarch64-simd-builtins.def (cmeq, cmge, cmgt, cmle,
-+	cmlt, cmgeu, cmgtu, cmtst): Remove.
-+
-+	* config/aarch64/arm_neon.h (vceq_*, vceqq_*, vceqz_*, vceqzq_*,
-+	vcge_*, vcgeq_*, vcgez_*, vcgezq_*, vcgt_*, vcgtq_*, vcgtz_*,
-+	vcgtzq_*, vcle_*, vcleq_*, vclez_*, vclezq_*, vclt_*, vcltq_*,
-+	vcltz_*, vcltzq_*, vtst_*, vtstq_*): Use gcc vector extensions.
-+
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
-+	TYPES_TST): Define.
-+	(aarch64_fold_builtin): Update pattern for cmtst.
-+
-+	* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
-+	Declare.
-+
-+	* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
-+
-+	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
-+	Switch operands, separate out more cases, refactor.
-+
-+	(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
-+
-+	* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
-+	argument; rename old version to...
-+	(aarch64_const_vec_all_same_in_range_p): ...this.
-+	(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
-+
-+	* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214008.
-+	2014-08-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Move
-+	one_match > zero_match case to just before simple_sequence.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213382.
-+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vpadd_<suf><8,16,32,64>): Move to
-+	correct alphabetical position.
-+	(vpaddd_f64): Rewrite using builtins.
-+	(vpaddd_s64): Move to correct alphabetical position.
-+	(vpaddd_u64): New.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210735, r215206, r215207, r215208.
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+	for A57.
-+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
-+	cost to spilling from integer to FP registers.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+	move handling.
-+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+	are now handled correctly.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+	handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+	2014-05-22  Kugan Vivekanandarajah  <kuganv@linaro.org>
-+
-+	* config/aarch64/aarch64.c (aarch64_regno_regclass) : Change CORE_REGS
-+	to GENERAL_REGS.
-+	(aarch64_secondary_reload) : LikeWise.
-+	(aarch64_class_max_nregs) : Remove CORE_REGS.
-+	* config/aarch64/aarch64.h (enum reg_class) : Remove CORE_REGS.
-+	(REG_CLASS_NAMES) : Likewise.
-+	(REG_CLASS_CONTENTS) : LikeWise.
-+	(INDEX_REG_CLASS) : Change CORE_REGS to GENERAL_REGS.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Add Linaro release macros (Linaro only patch.)
-+
-+	* Makefile.in (LINAROVER, LINAROVER_C, LINAROVER_S): Define.
-+	(CFLAGS-cppbuiltin.o): Add LINAROVER macro definition.
-+	(cppbuiltin.o): Depend on $(LINAROVER).
-+	* cppbuiltin.c (parse_linarover): New.
-+	(define_GNUC__): Define __LINARO_RELEASE__ and __LINARO_SPIN__ macros.
-+
-+2014-11-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216229, r216230.
-+	2014-10-14  Andrew Pinski  <apinski@cavium.com>
-+
-+	* explow.c (convert_memory_address_addr_space): Rename to ...
-+	(convert_memory_address_addr_space_1): This.  Add in_const argument.
-+	Inside a CONST RTL, permute the conversion and addition of constant
-+	for zero and sign extended pointers.
-+	(convert_memory_address_addr_space): New function.
-+
-+	2014-10-14  Andrew Pinski  <apinski@cavium.com>
-+
-+	Revert:
-+	2011-08-19  H.J. Lu  <hongjiu.lu@intel.com>
-+
-+        PR middle-end/49721
-+        * explow.c (convert_memory_address_addr_space): Also permute the
-+        conversion and addition of constant for zero-extend.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-10-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Revert:
-+	2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215206, r215207, r215208.
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+	for A57.
-+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
-+	cost to spilling from integer to FP registers.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+	move handling.
-+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+	are now handled correctly.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+	handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214825, r214826.
-+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/62275
-+	* config/arm/neon.md
-+	(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
-+	<v_cmp_result>): New pattern.
-+	* config/arm/iterators.md (NEON_VCVT): New int iterator.
-+	* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
-+	vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
-+	vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
-+	* config/arm/arm.c (arm_builtin_vectorized_function): Handle
-+	BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
-+
-+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/62275
-+	* config/arm/iterators.md (FIXUORS): New code iterator.
-+	(VCVT): New int iterator.
-+	(su_optab): New code attribute.
-+	(su): Likewise.
-+	* config/arm/vfp.md (l<vrint_pattern><su_optab><mode>si2): New pattern.
-+
-+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215471.
-+	2014-09-22  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/geniterators.sh: New.
-+	* config/aarch64/iterators.md (VDQF_DF): New.
-+	* config/aarch64/t-aarch64: Generate aarch64-builtin-iterators.h.
-+	* config/aarch64/aarch64-builtins.c (BUILTIN_*) Remove.
-+
-+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215206, r215207, r215208.
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* gcc/config/aarch64/aarch64.c (cortexa57_regmove_cost): New cost table
-+	for A57.
-+	(cortexa53_regmove_cost): New cost table for A53.  Increase GP2FP/FP2GP
-+	cost to spilling from integer to FP registers.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Fix Q register
-+	move handling.
-+	(generic_regmove_cost): Undo raised FP2FP move cost as Q register moves
-+	are now handled correctly.
-+
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_register_move_cost): Add cost
-+	handling of CALLER_SAVE_REGS and POINTER_REGS.
-+
-+2014-10-07  Yvan Roux  <yvan.roux@linaro.org>
-+ 
-+	Backport from trunk r214824.
-+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/predicates.md (aarch64_comparison_operation):
-+	New special predicate.
-+	* config/aarch64/aarch64.md (*csinc2<mode>_insn): Use
-+	aarch64_comparison_operation instead of matching an operator.
-+	Update operand numbers.
-+	(csinc3<mode>_insn): Likewise.
-+	(*csinv3<mode>_insn): Likewise.
-+	(*csneg3<mode>_insn): Likewise.
-+	(ffs<mode>2): Update gen_csinc3<mode>_insn callsite.
-+	* config/aarch64/aarch64.c (aarch64_get_condition_code):
-+	Return -1 instead of aborting on invalid condition codes.
-+	(aarch64_print_operand): Update aarch64_get_condition_code callsites
-+	to assert that the returned condition code is valid.
-+	* config/aarch64/aarch64-protos.h (aarch64_get_condition_code): Export.
-+
-+2014-10-07  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
-+
-+	Backport from trunk r209643, r211881.
-+	2014-06-22  Richard Henderson  <rth@redhat.com>
-+
-+	PR target/61565
-+	* compare-elim.c (struct comparison): Add eh_note.
-+	(find_comparison_dom_walker::before_dom_children): Don't eliminate
-+	a redundant comparison in a different EH region.  Purge EH edges if
-+	necessary.
-+
-+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-10-06  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	Backport from trunk r214945.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Replace
-+	varargs with pointer parameter.
-+	(aarch64_simd_expand_builtin): pass pointer into previous.
-+
-+2014-10-06  Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>
-+
-+	Backport from trunk r214944.
-+	2014-09-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/cortex-a53.md (cortex_a53_alu_shift): Add alu_ext,
-+	alus_ext.
-+
-+2014-10-06  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
-+
-+	Backport from trunk r214943.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
-+	* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
-+	* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
-+	Replace temporary asm with call to builtin.
-+	(vrbit_p8, vrbitq_p8): New functions.
-+
-+2014-10-06  Michael Collison  <michael.collison@linaro.org>
-+
-+	Backport from trunk r214886.
-+	2014-09-03  Richard Henderson  <rth@redhat.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_popwb_single_reg): Remove.
-+	(aarch64_popwb_pair_reg): Remove.
-+	(aarch64_set_frame_expr): Remove.
-+	(aarch64_restore_callee_saves): Add CFI_OPS argument; fill it with
-+	the restore ops performed by the insns generated.
-+	(aarch64_expand_epilogue): Attach CFI_OPS to the stack deallocation
-+	insn.  Perform the calls_eh_return addition later; do not attempt to
-+	preserve the CFA in that case.  Don't use aarch64_set_frame_expr.
-+	(aarch64_expand_prologue): Use REG_CFA_ADJUST_CFA directly, or no
-+	special markup at all.  Load cfun->machine->frame.hard_fp_offset
-+	into a local variable.
-+	(aarch64_frame_pointer_required): Don't check calls_alloca.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215385.
-+	2014-09-19  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.md (stack_protect_test_<mode>): Mark
-+	scratch register as written.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215346.
-+	2014-09-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/neon.md (*movmisalign<mode>_neon_load): Change type
-+	to neon_load1_1reg<q>.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215321.
-+	2014-09-17  Andrew Stubbs  <ams@codesourcery.com>
-+
-+	* config/arm/arm.c (arm_option_override): Reject -mfpu=neon
-+	when architecture is older than ARMv7.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215260.
-+	2014-09-14  David Sherwood  <david.sherwood@arm.com>
-+
-+	* gcc.target/aarch64/vdup_lane_2.c (force_simd): Emit simd mov.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215205.
-+	2014-09-12  Wilco Dijkstra  <wilco.dijkstra@arm.com>
-+
-+	* gcc/ree.c (combine_reaching_defs): Ensure inserted copy don't change
-+	the number of hard registers.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215136.
-+	2014-09-10  Xinliang David Li  <davidxl@google.com>
-+
-+	PR target/63209
-+	* config/arm/arm.md (movcond_addsi): Handle case where source
-+	and target operands are the same.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215086.
-+	2014-09-09  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+	Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	 * config/aarch64/aarch64-elf-raw.h (ENDFILE_SPEC): Add crtfastmath.o.
-+         * config/aarch64/aarch64-linux.h (GNU_USER_TARGET_MATH_ENDFILE_SPEC):
-+	Define.
-+        (ENDFILE_SPEC): Define and use GNU_USER_TARGET_MATH_ENDFILE_SPEC.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215067.
-+	2014-09-09  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/arm/arm.c (NEON_COPYSIGNF): New enum.
-+	(arm_init_neon_builtins): Support NEON_COPYSIGNF.
-+	(arm_builtin_vectorized_function): Likewise.
-+	* config/arm/arm_neon_builtins.def: New macro for copysignf.
-+	* config/arm/neon.md (neon_copysignf<mode>): New pattern for vector
-+	copysignf.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215050, r215051, r215052, r215053, r215054,
-+	r215055, r215056.
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.md (vfp_pop_multiple_with_writeback): Use vldm
-+	mnemonic instead of fldmfdd.
-+	* config/arm/arm.c (vfp_output_fstmd): Rename to...
-+	(vfp_output_vstmd): ... This.  Convert output to UAL syntax.
-+	Output vpush when address register is SP.
-+	* config/arm/arm-protos.h (vfp_output_fstmd): Rename to...
-+	(vfp_output_vstmd): ... This.
-+	* config/arm/vfp.md (push_multi_vfp): Update call to
-+	vfp_output_vstmd.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/vfp.md (*movcc_vfp): Use UAL syntax.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/vfp.md (*sqrtsf2_vfp): Use UAL assembly syntax.
-+	(*sqrtdf2_vfp): Likewise.
-+	(*cmpsf_vfp): Likewise.
-+	(*cmpsf_trap_vfp): Likewise.
-+	(*cmpdf_vfp): Likewise.
-+	(*cmpdf_trap_vfp): Likewise.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/vfp.md (*extendsfdf2_vfp): Use UAL assembly syntax.
-+	(*truncdfsf2_vfp): Likewise.
-+	(*truncsisf2_vfp): Likewise.
-+	(*truncsidf2_vfp): Likewise.
-+	(fixuns_truncsfsi2): Likewise.
-+	(fixuns_truncdfsi2): Likewise.
-+	(*floatsisf2_vfp): Likewise.
-+	(*floatsidf2_vfp): Likewise.
-+	(floatunssisf2): Likewise.
-+	(floatunssidf2): Likewise.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/vfp.md (*mulsf3_vfp): Use UAL assembly syntax.
-+	(*muldf3_vfp): Likewise.
-+	(*mulsf3negsf_vfp): Likewise.
-+	(*muldf3negdf_vfp): Likewise.
-+	(*mulsf3addsf_vfp): Likewise.
-+	(*muldf3adddf_vfp): Likewise.
-+	(*mulsf3subsf_vfp): Likewise.
-+	(*muldf3subdf_vfp): Likewise.
-+	(*mulsf3negsfaddsf_vfp): Likewise.
-+	(*fmuldf3negdfadddf_vfp): Likewise.
-+	(*mulsf3negsfsubsf_vfp): Likewise.
-+	(*muldf3negdfsubdf_vfp): Likewise.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/vfp.md (*abssf2_vfp): Use UAL assembly syntax.
-+	(*absdf2_vfp): Likewise.
-+	(*negsf2_vfp): Likewise.
-+	(*negdf2_vfp): Likewise.
-+	(*addsf3_vfp): Likewise.
-+	(*adddf3_vfp): Likewise.
-+	(*subsf3_vfp): Likewise.
-+	(*subdf3_vfp): Likewise.
-+	(*divsf3_vfp): Likewise.
-+	(*divdf3_vfp): Likewise.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (output_move_vfp): Use UAL syntax for load/store
-+	multiple.
-+	(arm_print_operand): Don't convert real values to decimal
-+	representation in default case.
-+	(fp_immediate_constant): Delete.
-+	* config/arm/arm-protos.h (fp_immediate_constant): Likewise.
-+	* config/arm/vfp.md (*arm_movsi_vfp): Convert to VFP moves to UAL
-+	syntax.
-+	(*thumb2_movsi_vfp): Likewise.
-+	(*movdi_vfp): Likewise.
-+	(*movdi_vfp_cortexa8): Likewise.
-+	(*movhf_vfp_neon): Likewise.
-+	(*movhf_vfp): Likewise.
-+	(*movsf_vfp): Likewise.
-+	(*thumb2_movsf_vfp): Likewise.
-+	(*movdf_vfp): Likewise.
-+	(*thumb2_movdf_vfp): Likewise.
-+	(*movsfcc_vfp): Likewise.
-+	(*thumb2_movsfcc_vfp): Likewise.
-+	(*movdfcc_vfp): Likewise.
-+	(*thumb2_movdfcc_vfp): Likewise.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214959.
-+	2014-09-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/cortex-a53.md (cortex_a53_fpalu): Add f_rints, f_rintd,
-+	f_minmaxs, f_minmaxd types.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214947.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
-+	Remove qualifier_const_pointer, update comment.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214940.
-+	2014-09-05  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.md (sibcall_value_insn): Give operand 1
-+	DImode.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213090.
-+	2014-07-26  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64.md (*extr_insv_lower_reg<mode>): Remove +
-+	from the read only register.
-+
-+2014-09-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-09-09  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
-+
-+	Backport from trunk r215004.
-+	2014-09-07 Venkataramanan Kumar <venkataramanan.kumar@linaro.org>
-+
-+	PR target/63190
-+	* config/aarch64/aarch64.md (stack_protect_test_<mode>) Add register
-+	constraint for operand0 and remove write only modifier from operand3.
-+
-+2014-09-09  Michael Collison  <michael.collison@linaro.org>
-+
-+	Backport from trunk r212178
-+	2014-06-30  Joseph Myers  <joseph@codesourcery.com>
-+
-+	* var-tracking.c (add_stores): Return instead of asserting if old
-+	and new values for conditional store are the same.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Revert:
-+	2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213712.
-+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.md (absdi2): Set simd attribute.
-+	(aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
-+	(aarch64_movdi_<mode>high): Likewise.
-+	(aarch64_mov<mode>high_di): Likewise.
-+	(aarch64_movdi_<mode>low): Likewise.
-+	(aarch64_mov<mode>low_di): Likewise.
-+	(aarch64_movtilow_tilow): Likewise.
-+	Add comment explaining usage of fp,simd attributes and of
-+	TARGET_FLOAT and TARGET_SIMD.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213712.
-+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.md (absdi2): Set simd attribute.
-+	(aarch64_reload_mov<mode>): Predicate on TARGET_FLOAT.
-+	(aarch64_movdi_<mode>high): Likewise.
-+	(aarch64_mov<mode>high_di): Likewise.
-+	(aarch64_movdi_<mode>low): Likewise.
-+	(aarch64_mov<mode>low_di): Likewise.
-+	(aarch64_movtilow_tilow): Likewise.
-+	Add comment explaining usage of fp,simd attributes and of
-+	TARGET_FLOAT and TARGET_SIMD.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214526.
-+	2014-08-26  Joseph Myers  <joseph@codesourcery.com>
-+
-+	PR target/60606
-+	PR target/61330
-+	* varasm.c (make_decl_rtl): Clear DECL_ASSEMBLER_NAME and
-+	DECL_HARD_REGISTER and return for invalid register specifications.
-+	* cfgexpand.c (expand_one_var): If expand_one_hard_reg_var clears
-+	DECL_HARD_REGISTER, call expand_one_error_var.
-+	* config/arm/arm.c (arm_hard_regno_mode_ok): Do not allow
-+	CC_REGNUM with non-MODE_CC modes.
-+	(arm_regno_class): Return NO_REGS for PC_REGNUM.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214503.
-+	2014-08-26  Evandro Menezes <e.menezes@samsung.com>
-+
-+	* config/arm/aarch64/aarch64.c (generic_addrcost_table): Delete
-+	qi cost; add di cost.
-+	(cortexa57_addrcost_table): Likewise.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213659.
-+	2014-08-06  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_evpc_dup): Enable for bigendian.
-+	(aarch64_expand_vec_perm_const): Check for dup before zip.
-+
-+2014-09-02  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213651.
-+	2014-08-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_classify_address): Use REG_P and
-+	CONST_INT_P instead of GET_CODE and compare.
-+	(aarch64_select_cc_mode): Likewise.
-+	(aarch64_print_operand): Likewise.
-+	(aarch64_rtx_costs): Likewise.
-+	(aarch64_simd_valid_immediate): Likewise.
-+	(aarch64_simd_check_vect_par_cnst_half): Likewise.
-+	(aarch64_simd_emit_pair_result_insn): Likewise.
-+
-+2014-08-29  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212978.
-+	2014-07-24  Andreas Schwab  <schwab@suse.de>
-+
-+	* lib/target-supports.exp (check_effective_target_arm_nothumb):
-+	Also check for __arm__.
-+
-+2014-08-29  Christophe Lyon  <christophe.lyon@linaro.org>
-+
-+	Fix backport from trunk 211440:
-+	* config.gcc (aarch64*-*-*): Restore need_64bit_hwint=yes.
-+
-+	This is necessary to build aarch64* compilers on i686 host.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213627.
-+	2014-08-05  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c
-+	(aarch64_simd_builtin_type_mode): Delete.
-+	(v8qi_UP): Remap to V8QImode.
-+	(v4hi_UP): Remap to V4HImode.
-+	(v2si_UP): Remap to V2SImode.
-+	(v2sf_UP): Remap to V2SFmode.
-+	(v1df_UP): Remap to V1DFmode.
-+	(di_UP): Remap to DImode.
-+	(df_UP): Remap to DFmode.
-+	(v16qi_UP):V16QImode.
-+	(v8hi_UP): Remap to V8HImode.
-+	(v4si_UP): Remap to V4SImode.
-+	(v4sf_UP): Remap to V4SFmode.
-+	(v2di_UP): Remap to V2DImode.
-+	(v2df_UP): Remap to V2DFmode.
-+	(ti_UP): Remap to TImode.
-+	(ei_UP): Remap to EImode.
-+	(oi_UP): Remap to OImode.
-+	(ci_UP): Map to CImode.
-+	(xi_UP): Remap to XImode.
-+	(si_UP): Remap to SImode.
-+	(sf_UP): Remap to SFmode.
-+	(hi_UP): Remap to HImode.
-+	(qi_UP): Remap to QImode.
-+	(aarch64_simd_builtin_datum): Make mode a machine_mode.
-+	(VAR1): Build builtin name.
-+	(aarch64_init_simd_builtins): Remove dead code.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213713.
-+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.md (*cmov<mode>): Set type attribute to fcsel.
-+	* config/arm/types.md (f_sels, f_seld): Delete.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213711.
-+	2014-08-07  Ian Bolton  <ian.bolton@arm.com>
-+		    Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_mov_immediate):
-+	Use MOVN when one of the half-words is 0xffff.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213632.
-+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/cortex-a15.md (cortex_a15_alu_shift): Add crc type
-+	to reservation.
-+	* config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213630.
-+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.md (clzsi2): Set predicable_short_it attr to no.
-+	(rbitsi2): Likewise.
-+	(*arm_rev): Set predicable and predicable_short_it attributes.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213557.
-+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+		    James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* doc/md.texi (clrsb): Document.
-+	(clz): Change reference to x into operand 1.
-+	(ctz): Likewise.
-+	(popcount): Likewise.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213551, r213556.
-+	2014-08-04  Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
-+		    Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* sched-deps.c (try_group_insn): Generalise macro fusion hook usage
-+	to any two insns.  Update comment.  Rename to sched_macro_fuse_insns.
-+	(sched_analyze_insn): Update use of try_group_insn to
-+	sched_macro_fuse_insns.
-+	* config/i386/i386.c (ix86_macro_fusion_pair_p): Reject 2nd
-+	arguments that are not conditional jumps.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213490.
-+	2014-08-01  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-simd-builtins.def (dup_lane, get_lane): Delete.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213488.
-+	2014-08-01  Jiong Wang <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_classify_address): Accept all offset
-+	for frame access when strict_p is false.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213485, r213486, r213487.
-+	2014-08-01  Renlin Li <renlin.li@arm.com>
-+		    Jiong Wang <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (offset_7bit_signed_scaled_p): Rename to
-+	aarch64_offset_7bit_signed_scaled_p, remove static and use it.
-+	* config/aarch64/aarch64-protos.h (aarch64_offset_7bit_signed_scaled_p):
-+	Declaration.
-+	* config/aarch64/predicates.md (aarch64_mem_pair_offset): Define new
-+	predicate.
-+	* config/aarch64/aarch64.md (loadwb_pair, storewb_pair): Use
-+	aarch64_mem_pair_offset.
-+
-+	2014-08-01  Jiong Wang <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.md (loadwb_pair<GPI:mode>_<P:mode>): Fix
-+	offset.
-+	(loadwb_pair<GPI:mode>_<P:mode>): Likewise.
-+	* config/aarch64/aarch64.c (aarch64_gen_loadwb_pair): Likewise.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213379.
-+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c
-+	(aarch64_gimple_fold_builtin): Don't fold reduction operations for
-+	BYTES_BIG_ENDIAN.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213378.
-+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_simd_vect_par_cnst_half): Vary
-+	the generated mask based on BYTES_BIG_ENDIAN.
-+	(aarch64_simd_check_vect_par_cnst_half): New.
-+	* config/aarch64/aarch64-protos.h
-+	(aarch64_simd_check_vect_par_cnst_half): New.
-+	* config/aarch64/predicates.md (vect_par_cnst_hi_half): Refactor
-+	the check out to aarch64_simd_check_vect_par_cnst_half.
-+	(vect_par_cnst_lo_half): Likewise.
-+	* config/aarch64/aarch64-simd.md
-+	(aarch64_simd_move_hi_quad_<mode>): Always use vec_par_cnst_lo_half.
-+	(move_hi_quad_<mode>): Always generate a low mask.
-+
-+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212927, r213304.
-+	2014-07-30  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/arm/arm.c (arm_get_frame_offsets): Adjust condition for
-+	Thumb2.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/arm/arm.c (arm_get_frame_offsets): If both r3 and other
-+	callee-saved registers are available for padding purpose
-+	and r3 is not mandatory, then prefer use those callee-saved
-+	instead of r3.
-+
-+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211717, r213692.
-+	2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
-+
-+	* config/arm/arm.c (bdesc_2arg): Fix typo.
-+	(arm_atomic_assign_expand_fenv): Remove The default implementation.
-+
-+	2014-06-17  Kugan Vivekanandarajah  <kuganv@linaro.org>
-+
-+	* config/arm/arm.c (arm_atomic_assign_expand_fenv): call
-+	default_atomic_assign_expand_fenv for !TARGET_HARD_FLOAT.
-+	(arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
-+	__builtins_arm_get_fpscr only when TARGET_HARD_FLOAT.
-+	* config/arm/vfp.md (set_fpscr): Make pattern conditional on
-+	TARGET_HARD_FLOAT.
-+	(get_fpscr) : Likewise.
-+
-+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212989, r213628.
-+	2014-08-05  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* convert.c (convert_to_integer): Guard transformation to lrint by
-+	-fno-math-errno.
-+
-+	2014-07-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR middle-end/61876
-+	* convert.c (convert_to_integer): Do not convert BUILT_IN_ROUND and cast
-+	when flag_errno_math is on.
-+
-+2014-08-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212912, r212913.
-+	2014-07-22  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle CLRSB, CLZ.
-+	(case UNSPEC): Handle UNSPEC_RBIT.
-+
-+	2014-07-22  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.md: Delete UNSPEC_CLS.
-+	(clrsb<mode>2): Use clrsb RTL code instead of UNSPEC_CLS.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213555.
-+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/61713
-+	* gcc/optabs.c (expand_atomic_test_and_set): Do not try to emit
-+	move to subtarget in serial version if result is ignored.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213376.
-+	2014-07-31  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	PR target/61948
-+	* config/arm/neon.md (ashldi3_neon): Don't emit arm_ashldi3_1bit unless
-+	constraints are satisfied.
-+	(<shift>di3_neon): Likewise.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211270, r211271, r211273, r211275, r212943,
-+	r212945, r212946, r212947, r212949, r212950, r212951, r212952, r212954,
-+	r212955, r212956, r212957, r212958, r212976, r212996, r212997, r212999,
-+	r213000.
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_popwb_single_reg): New function.
-+	(aarch64_expand_epilogue): Optimize epilogue when !frame_pointer_needed.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_pushwb_single_reg): New function.
-+	(aarch64_expand_prologue): Optimize prologue when !frame_pointer_needed.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_restore_callee_saves)
-+	(aarch64_save_callee_saves): New parameter "skip_wb".
-+	(aarch64_expand_prologue, aarch64_expand_epilogue): Update call site.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.h (frame): New fields "wb_candidate1" and
-+	"wb_candidate2".
-+	* config/aarch64/aarch64.c (aarch64_layout_frame): Initialize above.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_epilogue): Don't
-+	subtract outgoing area size when restoring stack_pointer_rtx.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_popwb_pair_reg)
-+	(aarch64_gen_loadwb_pair): New helper function.
-+	(aarch64_expand_epilogue): Simplify code using new helper functions.
-+	* config/aarch64/aarch64.md (loadwb_pair<GPF:mode>_<P:mode>): Define.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_pushwb_pair_reg)
-+	(aarch64_gen_storewb_pair): New helper function.
-+	(aarch64_expand_prologue): Simplify code using new helper functions.
-+	* config/aarch64/aarch64.md (storewb_pair<GPF:mode>_<P:mode>): Define.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.md: (aarch64_save_or_restore_callee_saves):
-+	Rename to aarch64_save_callee_saves, remove restore code.
-+	(aarch64_restore_callee_saves): New function.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Deleted.
-+	(aarch64_save_callee_saves): New function to handle reg save
-+	for both core and vectore regs.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_gen_load_pair)
-+	(aarch64_gen_store_pair): New helper function.
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Use new helper functions.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_next_callee_save): New function.
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Use aarch64_next_callee_save.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Hoist calculation of register rtx.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Remove 'increment'.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Use register offset in
-+	cfun->machine->frame.reg_offset.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_save_or_restore_callee_save_registers)
-+	(aarch64_save_or_restore_fprs): Remove base_rtx.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c
-+	(aarch64_save_or_restore_callee_save_registers): Rename 'offset'
-+	to 'start_offset'.  Remove local variable 'start_offset'.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Change
-+	type to HOST_WIDE_INT.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_prologue)
-+	(aarch64_save_or_restore_fprs)
-+	(aarch64_save_or_restore_callee_save_registers): GNU-Stylize code.
-+
-+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.h (aarch64_frame): Add hard_fp_offset and
-+	frame_size.
-+	* config/aarch64/aarch64.c (aarch64_layout_frame): Initialize
-+	aarch64_frame hard_fp_offset and frame_size.
-+	(aarch64_expand_prologue): Use aarch64_frame hard_fp_offset and
-+	frame_size; remove original_frame_size.
-+	(aarch64_expand_epilogue, aarch64_final_eh_return_addr): Likewise.
-+	(aarch64_initial_elimination_offset): Remove frame_size and
-+	offset.  Use aarch64_frame frame_size.
-+
-+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+		    Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_layout_frame): Correct
-+	initialization of R30 offset.  Update offset.  Iterate core
-+	regisers upto X30.  Remove X29, X30 specific code.
-+
-+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+		    Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (SLOT_NOT_REQUIRED, SLOT_REQUIRED): Define.
-+	(aarch64_layout_frame): Use SLOT_NOT_REQUIRED and SLOT_REQUIRED.
-+	(aarch64_register_saved_on_entry): Adjust test.
-+
-+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.h (machine_function): Move
-+	saved_varargs_size from here...
-+	(aarch64_frameGTY): ... to here.
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_prologue)
-+	(aarch64_expand_epilogue, aarch64_final_eh_return_addr)
-+	(aarch64_initial_elimination_offset)
-+	(aarch64_setup_incoming_varargs): Adjust location of
-+	saved_varargs_size.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212753.
-+	2014-07-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_frint_unspec_p): New function.
-+	(aarch64_rtx_costs): Handle FIX, UNSIGNED_FIX, UNSPEC.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212752.
-+	2014-07-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vmlal_high_lane_s16): Fix type.
-+	(vmlal_high_lane_s32): Likewise.
-+	(vmlal_high_lane_u16): Likewise.
-+	(vmlal_high_lane_u32): Likewise.
-+	(vmlsl_high_lane_s16): Likewise.
-+	(vmlsl_high_lane_s32): Likewise.
-+	(vmlsl_high_lane_u16): Likewise.
-+	(vmlsl_high_lane_u32): Likewise.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212512.
-+	2014-07-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/cortex-a15.md (cortex_a15_alu): Handle clz, rbit.
-+	* config/arm/cortex-a5.md (cortex_a5_alu): Likewise.
-+	* config/arm/cortex-a53.md (cortex_a53_alu): Likewise.
-+	* config/arm/cortex-a7.md (cortex_a7_alu_reg): Likewise.
-+	* config/arm/cortex-a9.md (cortex_a9_dp): Likewise.
-+	* config/arm/cortex-m4.md (cortex_m4_alu): Likewise.
-+	* config/arm/cortex-r4.md (cortex_r4_alu): Likewise.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212358.
-+	2014-07-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (cortexa5_extra_costs): New table.
-+	(arm_cortex_a5_tune): Use cortexa5_extra_costs.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212296.
-+	2014-07-04  Tom de Vries  <tom@codesourcery.com>
-+
-+	* config/aarch64/aarch64-simd.md
-+	(define_insn "vec_unpack_trunc_<mode>"): Fix constraint.
-+
-+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212142, r212225.
-+	2014-07-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Delete unused
-+	variable i.
-+
-+	2014-06-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md (vec_perm): Enable for bigendian.
-+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm): Remove assert
-+	against bigendian and adjust indices.
-+
-+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211779.
-+	2014-06-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm_neon.h (vadd_f32): Change #ifdef to __FAST_MATH.
-+
-+2014-07-30  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211503.
-+	2014-06-12  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vmlaq_n_f64, vmlsq_n_f64, vrsrtsq_f64,
-+	vcge_p8, vcgeq_p8, vcgez_p8, vcgez_u8, vcgez_u16, vcgez_u32, vcgez_u64,
-+	vcgezq_p8, vcgezq_u8, vcgezq_u16, vcgezq_u32, vcgezq_u64, vcgezd_u64,
-+	vcgt_p8, vcgtq_p8, vcgtz_p8, vcgtz_u8, vcgtz_u16, vcgtz_u32, vcgtz_u64,
-+	vcgtzq_p8, vcgtzq_u8, vcgtzq_u16, vcgtzq_u32, vcgtzq_u64, vcgtzd_u64,
-+	vcle_p8, vcleq_p8, vclez_p8, vclez_u64, vclezq_p8, vclezd_u64, vclt_p8,
-+	vcltq_p8, vcltz_p8, vcltzq_p8, vcltzd_u64): Remove functions as they are
-+	not in the spec.
-+
-+2014-07-30  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211140.
-+	2014-06-02  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.md (set_fpcr): Drop ISB after FPCR write.
-+
-+2014-07-29  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-07-20  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Revert:
-+	2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211129.
-+	2014-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	PR target/61154
-+	* config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
-+	* config/arm/arm.md (mov64 splitter): Replace const_double_operand
-+	with immediate_operand.
-+
-+2014-07-19  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211887, r211899.
-+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to
-+	"yes" where needed.
-+
-+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in
-+	vector registers.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211440.
-+	2014-06-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers.
-+	* Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to
-+	dependencies.
-+	* config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define.
-+	(aarch64_crc_builtin_datum): New struct.
-+	(aarch64_crc_builtin_data): New.
-+	(aarch64_init_crc32_builtins): New function.
-+	(aarch64_init_builtins): Initialise CRC32 builtins when appropriate.
-+	(aarch64_crc32_expand_builtin): New.
-+	(aarch64_expand_builtin): Add CRC32 builtin expansion case.
-+	* config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
-+	__ARM_FEATURE_CRC32 when appropriate.
-+	(TARGET_CRC32): Define.
-+	* config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H,
-+	UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH,
-+	UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values.
-+	(aarch64_<crc_variant>): New pattern.
-+	* config/aarch64/arm_acle.h: New file.
-+	* config/aarch64/iterators.md (CRC): New int iterator.
-+	(crc_variant, crc_mode): New int attributes.
-+	* doc/aarch64-acle-intrinsics.texi: New file.
-+	* doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics.
-+	Include aarch64-acle-intrinsics.texi.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211174.
-+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md (aarch64_rev<REVERSE:rev-op><mode>):
-+	New pattern.
-+	* config/aarch64/aarch64.c (aarch64_evpc_rev): New function.
-+	(aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev.
-+	* config/aarch64/iterators.md (REVERSE): New iterator.
-+	(UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements.
-+	(rev_op): New int_attribute.
-+	* config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8,
-+	vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8,
-+	vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8,
-+	vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8,
-+	vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16,
-+	vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8,
-+	vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32):
-+	Replace temporary __asm__ with __builtin_shuffle.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210216, r210218, r210219.
-+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm_neon.h: Update comment.
-+	* config/arm/neon-docgen.ml: Delete.
-+	* config/arm/neon-gen.ml: Delete.
-+	* doc/arm-neon-intrinsics.texi: Update comment.
-+
-+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf
-+	and v4sf versions.
-+	(vand, vorr, veor, vorn, vbic): Remove.
-+	* config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust
-+	iterator.
-+	(neon_vsub_unspec): Likewise.
-+	(neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove.
-+
-+	2014-05-08  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm_neon.h (vadd_s8): GNU C implementation
-+	(vadd_s16): Likewise.
-+	(vadd_s32): Likewise.
-+	(vadd_f32): Likewise.
-+	(vadd_u8): Likewise.
-+	(vadd_u16): Likewise.
-+	(vadd_u32): Likewise.
-+	(vadd_s64): Likewise.
-+	(vadd_u64): Likewise.
-+	(vaddq_s8): Likewise.
-+	(vaddq_s16): Likewise.
-+	(vaddq_s32): Likewise.
-+	(vaddq_s64): Likewise.
-+	(vaddq_f32): Likewise.
-+	(vaddq_u8): Likewise.
-+	(vaddq_u16): Likewise.
-+	(vaddq_u32): Likewise.
-+	(vaddq_u64): Likewise.
-+	(vmul_s8): Likewise.
-+	(vmul_s16): Likewise.
-+	(vmul_s32): Likewise.
-+	(vmul_f32): Likewise.
-+	(vmul_u8): Likewise.
-+	(vmul_u16): Likewise.
-+	(vmul_u32): Likewise.
-+	(vmul_p8): Likewise.
-+	(vmulq_s8): Likewise.
-+	(vmulq_s16): Likewise.
-+	(vmulq_s32): Likewise.
-+	(vmulq_f32): Likewise.
-+	(vmulq_u8): Likewise.
-+	(vmulq_u16): Likewise.
-+	(vmulq_u32): Likewise.
-+	(vsub_s8): Likewise.
-+	(vsub_s16): Likewise.
-+	(vsub_s32): Likewise.
-+	(vsub_f32): Likewise.
-+	(vsub_u8): Likewise.
-+	(vsub_u16): Likewise.
-+	(vsub_u32): Likewise.
-+	(vsub_s64): Likewise.
-+	(vsub_u64): Likewise.
-+	(vsubq_s8): Likewise.
-+	(vsubq_s16): Likewise.
-+	(vsubq_s32): Likewise.
-+	(vsubq_s64): Likewise.
-+	(vsubq_f32): Likewise.
-+	(vsubq_u8): Likewise.
-+	(vsubq_u16): Likewise.
-+	(vsubq_u32): Likewise.
-+	(vsubq_u64): Likewise.
-+	(vand_s8): Likewise.
-+	(vand_s16): Likewise.
-+	(vand_s32): Likewise.
-+	(vand_u8): Likewise.
-+	(vand_u16): Likewise.
-+	(vand_u32): Likewise.
-+	(vand_s64): Likewise.
-+	(vand_u64): Likewise.
-+	(vandq_s8): Likewise.
-+	(vandq_s16): Likewise.
-+	(vandq_s32): Likewise.
-+	(vandq_s64): Likewise.
-+	(vandq_u8): Likewise.
-+	(vandq_u16): Likewise.
-+	(vandq_u32): Likewise.
-+	(vandq_u64): Likewise.
-+	(vorr_s8): Likewise.
-+	(vorr_s16): Likewise.
-+	(vorr_s32): Likewise.
-+	(vorr_u8): Likewise.
-+	(vorr_u16): Likewise.
-+	(vorr_u32): Likewise.
-+	(vorr_s64): Likewise.
-+	(vorr_u64): Likewise.
-+	(vorrq_s8): Likewise.
-+	(vorrq_s16): Likewise.
-+	(vorrq_s32): Likewise.
-+	(vorrq_s64): Likewise.
-+	(vorrq_u8): Likewise.
-+	(vorrq_u16): Likewise.
-+	(vorrq_u32): Likewise.
-+	(vorrq_u64): Likewise.
-+	(veor_s8): Likewise.
-+	(veor_s16): Likewise.
-+	(veor_s32): Likewise.
-+	(veor_u8): Likewise.
-+	(veor_u16): Likewise.
-+	(veor_u32): Likewise.
-+	(veor_s64): Likewise.
-+	(veor_u64): Likewise.
-+	(veorq_s8): Likewise.
-+	(veorq_s16): Likewise.
-+	(veorq_s32): Likewise.
-+	(veorq_s64): Likewise.
-+	(veorq_u8): Likewise.
-+	(veorq_u16): Likewise.
-+	(veorq_u32): Likewise.
-+	(veorq_u64): Likewise.
-+	(vbic_s8): Likewise.
-+	(vbic_s16): Likewise.
-+	(vbic_s32): Likewise.
-+	(vbic_u8): Likewise.
-+	(vbic_u16): Likewise.
-+	(vbic_u32): Likewise.
-+	(vbic_s64): Likewise.
-+	(vbic_u64): Likewise.
-+	(vbicq_s8): Likewise.
-+	(vbicq_s16): Likewise.
-+	(vbicq_s32): Likewise.
-+	(vbicq_s64): Likewise.
-+	(vbicq_u8): Likewise.
-+	(vbicq_u16): Likewise.
-+	(vbicq_u32): Likewise.
-+	(vbicq_u64): Likewise.
-+	(vorn_s8): Likewise.
-+	(vorn_s16): Likewise.
-+	(vorn_s32): Likewise.
-+	(vorn_u8): Likewise.
-+	(vorn_u16): Likewise.
-+	(vorn_u32): Likewise.
-+	(vorn_s64): Likewise.
-+	(vorn_u64): Likewise.
-+	(vornq_s8): Likewise.
-+	(vornq_s16): Likewise.
-+	(vornq_s32): Likewise.
-+	(vornq_s64): Likewise.
-+	(vornq_u8): Likewise.
-+	(vornq_u16): Likewise.
-+	(vornq_u32): Likewise.
-+	(vornq_u64): Likewise.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210151.
-+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8,
-+	vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32,
-+	vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32,
-+	vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32,
-+	vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8,
-+	vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16,
-+	vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16,
-+	vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209794.
-+	2014-04-25  Marek Polacek  <polacek@redhat.com>
-+
-+	PR c/60114
-+	* c-parser.c (c_parser_initelt): Pass input_location to
-+	process_init_element.
-+	(c_parser_initval): Pass loc to process_init_element.
-+	* c-tree.h (process_init_element): Adjust declaration.
-+	* c-typeck.c (push_init_level): Pass input_location to
-+	process_init_element.
-+	(pop_init_level): Likewise.
-+	(set_designator): Likewise.
-+	(output_init_element): Add location_t parameter.  Pass loc to
-+	digest_init.
-+	(output_pending_init_elements): Pass input_location to
-+	output_init_element.
-+	(process_init_element): Add location_t parameter.  Pass loc to
-+	output_init_element.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211771.
-+	2014-06-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* genattrtab.c (n_bypassed): New variable.
-+	(process_bypasses): Initialise n_bypassed.
-+	Count number of bypassed reservations.
-+	(make_automaton_attrs): Allocate space for bypassed reservations
-+	rather than number of bypasses.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210861.
-+	2014-05-23  Jiong Wang   <jiong.wang@arm.com>
-+
-+	* config/aarch64/predicates.md (aarch64_call_insn_operand): New
-+	predicate.
-+	* config/aarch64/constraints.md ("Ucs", "Usf"):  New constraints.
-+	* config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn):
-+	Adjust for tailcalling through registers.
-+	* config/aarch64/aarch64.h (enum reg_class): New caller save
-+	register class.
-+	(REG_CLASS_NAMES): Likewise.
-+	(REG_CLASS_CONTENTS): Likewise.
-+	* config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall):
-+	Allow tailcalling without decls.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211314.
-+	2014-06-06  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New.
-+	* config/aarch64/aarch64.c (aarch64_move_pointer): New.
-+	(aarch64_progress_pointer): Likewise.
-+	(aarch64_copy_one_part_and_move_pointers): Likewise.
-+	(aarch64_expand_movmen): Likewise.
-+	* config/aarch64/aarch64.h (MOVE_RATIO): Set low.
-+	* config/aarch64/aarch64.md (movmem<mode>): New.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211185, 211186.
-+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc/config/aarch64/aarch64-builtins.c
-+	(aarch64_types_binop_uus_qualifiers,
-+	aarch64_types_shift_to_unsigned_qualifiers,
-+	aarch64_types_unsigned_shiftacc_qualifiers): Define.
-+	* gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd,
-+	uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n,
-+	sqshlu_n, uqshl_n): Update qualifiers.
-+	* gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32,
-+	vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8,
-+	vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32,
-+	vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8,
-+	vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16,
-+	vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32,
-+	vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64,
-+	vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16,
-+	vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64,
-+	vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16,
-+	vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32,
-+	vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64,
-+	vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8,
-+	vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8,
-+	vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16,
-+	vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32,
-+	vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64,
-+	vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8,
-+	vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8,
-+	vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16,
-+	vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16,
-+	vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32,
-+	vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64,
-+	vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8,
-+	vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8,
-+	vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16,
-+	vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts.
-+
-+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc/config/aarch64/aarch64-builtins.c
-+	(aarch64_types_binop_ssu_qualifiers): New static data.
-+	(TYPES_BINOP_SSU): Define.
-+	* gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl,
-+	urshr_n, ushll_n): Use appropriate unsigned qualifiers.	47
-+	* gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32,
-+	vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64,
-+	vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8,	50
-+	vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8,
-+	vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32,	52
-+	vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64,	53
-+	vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness
-+	suffix to builtin function name, remove cast.	55
-+	(vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32,
-+	vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8,	57
-+	vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211408, 211416.
-+	2014-06-10  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix
-+	REG_CFA_RESTORE mode.
-+
-+	2014-06-10  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_save_or_restore_fprs)
-+	(aarch64_save_or_restore_callee_save_registers): Fix layout.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211418.
-+	2014-06-10  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md (move_lo_quad_<mode>):
-+	Change second alternative type to f_mcr.
-+	* config/aarch64/aarch64.md (*movsi_aarch64): Change 11th
-+	and 12th alternatives' types to f_mcr and f_mrc.
-+	(*movdi_aarch64): Same for 12th and 13th alternatives.
-+	(*movsf_aarch64): Change 9th alternatives' type to mov_reg.
-+	(aarch64_movtilow_tilow): Change type to fmov.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211371.
-+	2014-06-09  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm-modes.def: Remove XFmode.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211268.
-+	2014-06-05  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack
-+	layout comment.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211129.
-+	2014-06-02  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	PR target/61154
-+	* config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define.
-+	* config/arm/arm.md (mov64 splitter): Replace const_double_operand
-+	with immediate_operand.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211073.
-+	2014-05-30  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw
-+	to mov_imm.
-+	* config/arm/vfp.md (*thumb2_movsi_vfp): Likewise.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211050.
-+	2014-05-29  Richard Earnshaw <rearnsha@arm.com>
-+	Richard Sandiford  <rdsandiford@googlemail.com>
-+
-+	* arm/iterators.md (shiftable_ops): New code iterator.
-+	(t2_binop0, arith_shift_insn): New code attributes.
-+	* arm/predicates.md (shift_nomul_operator): New predicate.
-+	* arm/arm.md (insn_enabled): Delete.
-+	(enabled): Remove insn_enabled test.
-+	(*arith_shiftsi): Delete.  Replace with ...
-+	(*<arith_shift_insn>_multsi): ... new pattern.
-+	(*<arith_shift_insn>_shiftsi): ... new pattern.
-+	* config/arm/arm.c (arm_print_operand): Handle operand format 'b'.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210996.
-+	2014-05-27  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64.md (stack_protect_set_<mode>):
-+	Use <w> for the register in assembly template.
-+	(stack_protect_test): Use the mode of operands[0] for the
-+	result.
-+	(stack_protect_test_<mode>): Use <w> for the register
-+	in assembly template.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210967.
-+	2014-05-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/neon.md (neon_bswap<mode>): New pattern.
-+	* config/arm/arm.c (neon_itype): Add NEON_BSWAP.
-+	(arm_init_neon_builtins): Handle NEON_BSWAP.
-+	Define required type nodes.
-+	(arm_expand_neon_builtin): Handle NEON_BSWAP.
-+	(arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
-+	* config/arm/arm_neon_builtins.def (bswap): Define builtins.
-+	* config/arm/iterators.md (VDQHSD): New mode iterator.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210471.
-+	2014-05-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL
-+	enum name for PARAM_SCHED_PRESSURE_ALGORITHM.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210369.
-+	2014-05-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR.
-+	(arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR.
-+	Remove associated type declarations and initialisations.
-+	(arm_expand_neon_builtin): Likewise.
-+	(neon_emit_pair_result_insn): Delete.
-+	* config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete.
-+	* config/arm/neon.md (neon_vtrn<mode>): Delete.
-+	(neon_vzip<mode>): Likewise.
-+	(neon_vuzp<mode>): Likewise.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211058, 211177.
-+	2014-05-29  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
-+	TYPES_BINOPV): New static data.
-+	* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
-+	* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
-+	New patterns.
-+	* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
-+	patterns for EXT.
-+	(aarch64_evpc_ext): New function.
-+
-+	* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
-+
-+	* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
-+	vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
-+	vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
-+	vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
-+	vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
-+
-+	2014-06-03  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle
-+	location == 0.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209797.
-+	2014-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p):
-+	Use HOST_WIDE_INT_C for mask literal.
-+	(aarch_rev16_shleft_mask_imm_p): Likewise.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211148.
-+	2014-06-02  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER):
-+	/lib/ld-linux32-aarch64.so.1 is used for ILP32.
-+	(LINUX_TARGET_LINK_SPEC): Update linker script for ILP32.
-+	file whose name depends on -mabi= and -mbig-endian.
-+	* config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64
-+	better and handle ilp32 too.
-+	(MULTILIB_OPTIONS): Delete.
-+	(MULTILIB_DIRNAMES): Delete.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210828, r211103.
-+	2014-05-31  Kugan Vivekanandarajah  <kuganv@linaro.org>
-+
-+	* config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define.
-+	(arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR.
-+	(bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr
-+	and __builtins_arm_get_fpscr.
-+	(arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and
-+	__builtins_arm_get_fpscr.
-+	(arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and
-+	__builtins_arm_ldfpscr.
-+	(arm_atomic_assign_expand_fenv): New function.
-+	* config/arm/vfp.md (set_fpscr): New pattern.
-+	(get_fpscr) : Likewise.
-+	* config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and
-+	VUNSPEC_SET_FPSCR.
-+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
-+	__builtins_arm_set_fpscr, __builtins_arm_get_fpscr.
-+
-+	2014-05-23  Kugan Vivekanandarajah  <kuganv@linaro.org>
-+
-+	* config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New
-+	define.
-+	* config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv):
-+	New function declaration.
-+	* config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add
-+	AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR.
-+	AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR.
-+	(aarch64_init_builtins) : Initialize builtins
-+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
-+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
-+	(aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr
-+	__builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr,
-+	and __builtins_aarch64_set_fpsr.
-+	(aarch64_atomic_assign_expand_fenv): New function.
-+	* config/aarch64/aarch64.md (set_fpcr): New pattern.
-+	(get_fpcr) : Likewise.
-+	(set_fpsr) : Likewise.
-+	(get_fpsr) : Likewise.
-+	(unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR
-+	 and UNSPECV_SET_FPSR.
-+	* doc/extend.texi (AARCH64 Built-in Functions) : Document
-+	__builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr.
-+	__builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210355.
-+	2014-05-13  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* config/aarch64/aarch64-protos.h
-+	(aarch64_hard_regno_caller_save_mode): New prototype.
-+	* config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode):
-+	New function.
-+	* config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209943.
-+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8,
-+	vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32,
-+	vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32,
-+	vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32,
-+	vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8,
-+	vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16,
-+	vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16,
-+	vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle.
-+
-+2014-06-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-06-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Revert:
-+	2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209643.
-+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498,
-+	210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507,
-+	210508, 210509, 210510, 210512, 211205, 211206.
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New.
-+	(cpu_addrcost_table): Use it.
-+	* config/aarch64/aarch64.c (generic_addrcost_table): Initialize it.
-+	(aarch64_address_cost): Rewrite using aarch64_classify_address,
-+	move it.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (cortexa57_addrcost_table): New.
-+	(cortexa57_vector_cost): Likewise.
-+	(cortexa57_tunings): Use them.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New.
-+	(TARGET_RTX_COSTS): Call it.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_build_constant): Conditionally
-+	emit instructions, return number of instructions which would
-+	be emitted.
-+	(aarch64_add_constant): Update call to aarch64_build_constant.
-+	(aarch64_output_mi_thunk): Likewise.
-+	(aarch64_rtx_costs): Estimate cost of a CONST_INT, cost
-+	a CONST_DOUBLE.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename
-+	to...
-+	(aarch64_strip_extend): ...this, don't strip shifts, check RTX is
-+	well formed.
-+	(aarch64_rtx_mult_cost): New.
-+	(aarch64_rtx_costs): Use it, refactor as appropriate.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philip Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing
-+	for SET RTX.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Use address
-+	costs when costing loads and stores to memory.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for
-+	logical operations.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost
-+	ZERO_EXTEND and SIGN_EXTEND better.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
-+	rotates and shifts.
-+
-+	2014-03-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New.
-+	(aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for
-+	DIV/MOD.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison
-+	operators.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA,
-+	FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF,
-+	HIGH, LO_SUM.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case
-+	where we were unable to cost an RTX.
-+
-+	2014-05-16  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case.
-+
-+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function.
-+	(aarch64_rtx_costs): Use aarch64_if_then_else_costs.
-+
-+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non
-+	comparisons for OP0.
-+
-+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-06-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211211.
-+	2014-06-04  Bin Cheng  <bin.cheng@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_classify_address)
-+	(aarch64_legitimize_reload_address): Support full addressing modes
-+	for vector modes.
-+	* config/aarch64/aarch64.md (mov<mode>, movmisalign<mode>)
-+	(*aarch64_simd_mov<mode>, *aarch64_simd_mov<mode>): Relax predicates.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209906.
-+	2014-04-29  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8,
-+	vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32,
-+	vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32,
-+	vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32,
-+	vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8,
-+	vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16,
-+	vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16,
-+	vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209897.
-+	2014-04-29  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* calls.c (initialize_argument_information): Always treat
-+	PUSH_ARGS_REVERSED as 1, simplify code accordingly.
-+	(expand_call): Likewise.
-+	(emit_library_call_calue_1): Likewise.
-+	* expr.c (PUSH_ARGS_REVERSED): Do not define.
-+	(emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify
-+	code accordingly.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209880.
-+	2014-04-28  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c
-+	(aarch64_types_storestruct_lane_qualifiers): New.
-+	(TYPES_STORESTRUCT_LANE): Likewise.
-+	* config/aarch64/aarch64-simd-builtins.def (st2_lane): New.
-+	(st3_lane): Likewise.
-+	(st4_lane): Likewise.
-+	* config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane<mode>): New.
-+	(vec_store_lanesci_lane<mode>): Likewise.
-+	(vec_store_lanesxi_lane<mode>): Likewise.
-+		(aarch64_st2_lane<VQ:mode>): Likewise.
-+	(aarch64_st3_lane<VQ:mode>): Likewise.
-+	(aarch64_st4_lane<VQ:mode>): Likewise.
-+	* config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE.
-+	* config/aarch64/arm_neon.h
-+		(__ST2_LANE_FUNC): Rewrite using builtins, update use points to
-+	use new macro arguments.
-+	(__ST3_LANE_FUNC): Likewise.
-+	(__ST4_LANE_FUNC): Likewise.
-+	* config/aarch64/iterators.md (V_TWO_ELEM): New.
-+	(V_THREE_ELEM): Likewise.
-+	(V_FOUR_ELEM): Likewise.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209878.
-+	2014-04-28  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New.
-+	* config/aarch64/aarch64.c
-+	(aarch64_cannot_change_mode_class): Weaken conditions.
-+	(aarch64_modes_tieable_p): New.
-+	* config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209808.
-+	2014-04-25  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/arm/predicates.md (call_insn_operand): Add long_call check.
-+	* config/arm/arm.md (sibcall, sibcall_value): Force the address to
-+	reg for long_call.
-+	* config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call
-+	restriction.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209806.
-+	2014-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (arm_cortex_a8_tune): Initialise
-+	T16-related fields.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209742, 209749.
-+	2014-04-24  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian.
-+
-+	2014-04-24  Tejas Belagod  <tejas.belagod@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements
-+	for big-endian.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209736.
-+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c
-+	(aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16,
-+	BUILT_IN_BSWAP32, BUILT_IN_BSWAP64.
-+	* config/aarch64/aarch64-simd.md (bswap<mode>): New pattern.
-+	* config/aarch64/aarch64-simd-builtins.def: Define vector bswap
-+	builtins.
-+	* config/aarch64/iterator.md (VDQHSD): New mode iterator.
-+	(Vrevsuff): New mode attribute.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209712.
-+	2014-04-23 Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
-+
-+	* config/aarch64/aarch64.md (stack_protect_set, stack_protect_test)
-+	(stack_protect_set_<mode>, stack_protect_test_<mode>): Add
-+	machine descriptions for Stack Smashing Protector.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209711.
-+	2014-04-23  Richard Earnshaw  <rearnsha@arm.com>
-+
-+	* aarch64.md (<optab>_rol<mode>3): New pattern.
-+	(<optab>_rolsi3_uxtw): Likewise.
-+	* aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209710.
-+	2014-04-23  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields.
-+	(arm_cortex_a12_tune): Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209706.
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209701, 209702, 209703, 209704, 209705.
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.md (arm_rev16si2): New pattern.
-+	(arm_rev16si2_alt): Likewise.
-+	* config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case.
-+
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+	* config/aarch64/aarch64.md (rev16<mode>2): New pattern.
-+	(rev16<mode>2_alt): Likewise.
-+	* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case.
-+	* config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New.
-+	(aarch_rev16_shleft_mask_imm_p): Likewise.
-+	(aarch_rev16_p_1): Likewise.
-+	(aarch_rev16_p): Likewise.
-+	* config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern.
-+	(aarch_rev16_shright_mask_imm_p): Likewise.
-+	(aarch_rev16_shleft_mask_imm_p): Likewise.
-+
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/aarch-common-protos.h (alu_cost_table): Add rev field.
-+	* config/arm/aarch-cost-tables.h (generic_extra_costs): Specify
-+	rev cost.
-+	(cortex_a53_extra_costs): Likewise.
-+	(cortex_a57_extra_costs): Likewise.
-+	* config/arm/arm.c (cortexa9_extra_costs): Likewise.
-+	(cortexa7_extra_costs): Likewise.
-+	(cortexa8_extra_costs): Likewise.
-+	(cortexa12_extra_costs): Likewise.
-+	(cortexa15_extra_costs): Likewise.
-+	(v7m_extra_costs): Likewise.
-+	(arm_new_rtx_costs): Handle BSWAP.
-+
-+	2013-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (cortexa8_extra_costs): New table.
-+	(arm_cortex_a8_tune): New tuning struct.
-+	* config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct.
-+
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* config/arm/arm.c (arm_new_rtx_costs): Handle FMA.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209659.
-+	2014-04-22  Richard Henderson  <rth@redhat.com>
-+
-+	* config/aarch64/aarch64 (addti3, subti3): New expanders.
-+	(add<GPI>3_compare0): Remove leading * from name.
-+	(add<GPI>3_carryin): Likewise.
-+	(sub<GPI>3_compare0): Likewise.
-+	(sub<GPI>3_carryin): Likewise.
-+	(<su_optab>mulditi3): New expander.
-+	(multi3): New expander.
-+	(madd<GPI>): Remove leading * from name.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209645.
-+	2014-04-22  Andrew Pinski  <apinski@cavium.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
-+	Handle TLS for ILP32.
-+	* config/aarch64/aarch64.md (tlsie_small): Rename to ...
-+	(tlsie_small_<mode>): this and handle PTR.
-+	(tlsie_small_sidi): New pattern.
-+	(tlsle_small): Change to an expand to handle ILP32.
-+	(tlsle_small_<mode>): New pattern.
-+	(tlsdesc_small): Rename to ...
-+	(tlsdesc_small_<mode>): this and handle PTR.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209643.
-+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209641, 209642.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
-+	(aarch64_types_signed_unsigned_qualifiers): Qualifier added.
-+	(aarch64_types_signed_poly_qualifiers): Likewise.
-+	(aarch64_types_unsigned_signed_qualifiers): Likewise.
-+	(aarch64_types_poly_signed_qualifiers): Likewise.
-+	(TYPES_REINTERP_SS): Type macro added.
-+	(TYPES_REINTERP_SU): Likewise.
-+	(TYPES_REINTERP_SP): Likewise.
-+	(TYPES_REINTERP_US): Likewise.
-+	(TYPES_REINTERP_PS): Likewise.
-+	(aarch64_fold_builtin): New expression folding added.
-+	* config/aarch64/aarch64-simd-builtins.def (REINTERP):
-+	Declarations removed.
-+	(REINTERP_SS): Declarations added.
-+	(REINTERP_US): Likewise.
-+	(REINTERP_PS): Likewise.
-+	(REINTERP_SU): Likewise.
-+	(REINTERP_SP): Likewise.
-+	* config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented.
-+	(vreinterpretq_p8_f64): Likewise.
-+	(vreinterpret_p16_f64): Likewise.
-+	(vreinterpretq_p16_f64): Likewise.
-+	(vreinterpret_f32_f64): Likewise.
-+	(vreinterpretq_f32_f64): Likewise.
-+	(vreinterpret_f64_f32): Likewise.
-+	(vreinterpret_f64_p8): Likewise.
-+	(vreinterpret_f64_p16): Likewise.
-+	(vreinterpret_f64_s8): Likewise.
-+	(vreinterpret_f64_s16): Likewise.
-+	(vreinterpret_f64_s32): Likewise.
-+	(vreinterpret_f64_s64): Likewise.
-+	(vreinterpret_f64_u8): Likewise.
-+	(vreinterpret_f64_u16): Likewise.
-+	(vreinterpret_f64_u32): Likewise.
-+	(vreinterpret_f64_u64): Likewise.
-+	(vreinterpretq_f64_f32): Likewise.
-+	(vreinterpretq_f64_p8): Likewise.
-+	(vreinterpretq_f64_p16): Likewise.
-+	(vreinterpretq_f64_s8): Likewise.
-+	(vreinterpretq_f64_s16): Likewise.
-+	(vreinterpretq_f64_s32): Likewise.
-+	(vreinterpretq_f64_s64): Likewise.
-+	(vreinterpretq_f64_u8): Likewise.
-+	(vreinterpretq_f64_u16): Likewise.
-+	(vreinterpretq_f64_u32): Likewise.
-+	(vreinterpretq_f64_u64): Likewise.
-+	(vreinterpret_s64_f64): Likewise.
-+	(vreinterpretq_s64_f64): Likewise.
-+	(vreinterpret_u64_f64): Likewise.
-+	(vreinterpretq_u64_f64): Likewise.
-+	(vreinterpret_s8_f64): Likewise.
-+	(vreinterpretq_s8_f64): Likewise.
-+	(vreinterpret_s16_f64): Likewise.
-+	(vreinterpretq_s16_f64): Likewise.
-+	(vreinterpret_s32_f64): Likewise.
-+	(vreinterpretq_s32_f64): Likewise.
-+	(vreinterpret_u8_f64): Likewise.
-+	(vreinterpretq_u8_f64): Likewise.
-+	(vreinterpret_u16_f64): Likewise.
-+	(vreinterpretq_u16_f64): Likewise.
-+	(vreinterpret_u32_f64): Likewise.
-+	(vreinterpretq_u32_f64): Likewise.
-+
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed.
-+	* config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed.
-+	(vreinterpret_p8_s8): Likewise.
-+	* config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast.
-+	(vreinterpret_p8_s16): Likewise.
-+	(vreinterpret_p8_s32): Likewise.
-+	(vreinterpret_p8_s64): Likewise.
-+	(vreinterpret_p8_f32): Likewise.
-+	(vreinterpret_p8_u8): Likewise.
-+	(vreinterpret_p8_u16): Likewise.
-+	(vreinterpret_p8_u32): Likewise.
-+	(vreinterpret_p8_u64): Likewise.
-+	(vreinterpret_p8_p16): Likewise.
-+	(vreinterpretq_p8_s8): Likewise.
-+	(vreinterpretq_p8_s16): Likewise.
-+	(vreinterpretq_p8_s32): Likewise.
-+	(vreinterpretq_p8_s64): Likewise.
-+	(vreinterpretq_p8_f32): Likewise.
-+	(vreinterpretq_p8_u8): Likewise.
-+	(vreinterpretq_p8_u16): Likewise.
-+	(vreinterpretq_p8_u32): Likewise.
-+	(vreinterpretq_p8_u64): Likewise.
-+	(vreinterpretq_p8_p16): Likewise.
-+	(vreinterpret_p16_s8): Likewise.
-+	(vreinterpret_p16_s16): Likewise.
-+	(vreinterpret_p16_s32): Likewise.
-+	(vreinterpret_p16_s64): Likewise.
-+	(vreinterpret_p16_f32): Likewise.
-+	(vreinterpret_p16_u8): Likewise.
-+	(vreinterpret_p16_u16): Likewise.
-+	(vreinterpret_p16_u32): Likewise.
-+	(vreinterpret_p16_u64): Likewise.
-+	(vreinterpret_p16_p8): Likewise.
-+	(vreinterpretq_p16_s8): Likewise.
-+	(vreinterpretq_p16_s16): Likewise.
-+	(vreinterpretq_p16_s32): Likewise.
-+	(vreinterpretq_p16_s64): Likewise.
-+	(vreinterpretq_p16_f32): Likewise.
-+	(vreinterpretq_p16_u8): Likewise.
-+	(vreinterpretq_p16_u16): Likewise.
-+	(vreinterpretq_p16_u32): Likewise.
-+	(vreinterpretq_p16_u64): Likewise.
-+	(vreinterpretq_p16_p8): Likewise.
-+	(vreinterpret_f32_s8): Likewise.
-+	(vreinterpret_f32_s16): Likewise.
-+	(vreinterpret_f32_s32): Likewise.
-+	(vreinterpret_f32_s64): Likewise.
-+	(vreinterpret_f32_u8): Likewise.
-+	(vreinterpret_f32_u16): Likewise.
-+	(vreinterpret_f32_u32): Likewise.
-+	(vreinterpret_f32_u64): Likewise.
-+	(vreinterpret_f32_p8): Likewise.
-+	(vreinterpret_f32_p16): Likewise.
-+	(vreinterpretq_f32_s8): Likewise.
-+	(vreinterpretq_f32_s16): Likewise.
-+	(vreinterpretq_f32_s32): Likewise.
-+	(vreinterpretq_f32_s64): Likewise.
-+	(vreinterpretq_f32_u8): Likewise.
-+	(vreinterpretq_f32_u16): Likewise.
-+	(vreinterpretq_f32_u32): Likewise.
-+	(vreinterpretq_f32_u64): Likewise.
-+	(vreinterpretq_f32_p8): Likewise.
-+	(vreinterpretq_f32_p16): Likewise.
-+	(vreinterpret_s64_s8): Likewise.
-+	(vreinterpret_s64_s16): Likewise.
-+	(vreinterpret_s64_s32): Likewise.
-+	(vreinterpret_s64_f32): Likewise.
-+	(vreinterpret_s64_u8): Likewise.
-+	(vreinterpret_s64_u16): Likewise.
-+	(vreinterpret_s64_u32): Likewise.
-+	(vreinterpret_s64_u64): Likewise.
-+	(vreinterpret_s64_p8): Likewise.
-+	(vreinterpret_s64_p16): Likewise.
-+	(vreinterpretq_s64_s8): Likewise.
-+	(vreinterpretq_s64_s16): Likewise.
-+	(vreinterpretq_s64_s32): Likewise.
-+	(vreinterpretq_s64_f32): Likewise.
-+	(vreinterpretq_s64_u8): Likewise.
-+	(vreinterpretq_s64_u16): Likewise.
-+	(vreinterpretq_s64_u32): Likewise.
-+	(vreinterpretq_s64_u64): Likewise.
-+	(vreinterpretq_s64_p8): Likewise.
-+	(vreinterpretq_s64_p16): Likewise.
-+	(vreinterpret_u64_s8): Likewise.
-+	(vreinterpret_u64_s16): Likewise.
-+	(vreinterpret_u64_s32): Likewise.
-+	(vreinterpret_u64_s64): Likewise.
-+	(vreinterpret_u64_f32): Likewise.
-+	(vreinterpret_u64_u8): Likewise.
-+	(vreinterpret_u64_u16): Likewise.
-+	(vreinterpret_u64_u32): Likewise.
-+	(vreinterpret_u64_p8): Likewise.
-+	(vreinterpret_u64_p16): Likewise.
-+	(vreinterpretq_u64_s8): Likewise.
-+	(vreinterpretq_u64_s16): Likewise.
-+	(vreinterpretq_u64_s32): Likewise.
-+	(vreinterpretq_u64_s64): Likewise.
-+	(vreinterpretq_u64_f32): Likewise.
-+	(vreinterpretq_u64_u8): Likewise.
-+	(vreinterpretq_u64_u16): Likewise.
-+	(vreinterpretq_u64_u32): Likewise.
-+	(vreinterpretq_u64_p8): Likewise.
-+	(vreinterpretq_u64_p16): Likewise.
-+	(vreinterpret_s8_s16): Likewise.
-+	(vreinterpret_s8_s32): Likewise.
-+	(vreinterpret_s8_s64): Likewise.
-+	(vreinterpret_s8_f32): Likewise.
-+	(vreinterpret_s8_u8): Likewise.
-+	(vreinterpret_s8_u16): Likewise.
-+	(vreinterpret_s8_u32): Likewise.
-+	(vreinterpret_s8_u64): Likewise.
-+	(vreinterpret_s8_p8): Likewise.
-+	(vreinterpret_s8_p16): Likewise.
-+	(vreinterpretq_s8_s16): Likewise.
-+	(vreinterpretq_s8_s32): Likewise.
-+	(vreinterpretq_s8_s64): Likewise.
-+	(vreinterpretq_s8_f32): Likewise.
-+	(vreinterpretq_s8_u8): Likewise.
-+	(vreinterpretq_s8_u16): Likewise.
-+	(vreinterpretq_s8_u32): Likewise.
-+	(vreinterpretq_s8_u64): Likewise.
-+	(vreinterpretq_s8_p8): Likewise.
-+	(vreinterpretq_s8_p16): Likewise.
-+	(vreinterpret_s16_s8): Likewise.
-+	(vreinterpret_s16_s32): Likewise.
-+	(vreinterpret_s16_s64): Likewise.
-+	(vreinterpret_s16_f32): Likewise.
-+	(vreinterpret_s16_u8): Likewise.
-+	(vreinterpret_s16_u16): Likewise.
-+	(vreinterpret_s16_u32): Likewise.
-+	(vreinterpret_s16_u64): Likewise.
-+	(vreinterpret_s16_p8): Likewise.
-+	(vreinterpret_s16_p16): Likewise.
-+	(vreinterpretq_s16_s8): Likewise.
-+	(vreinterpretq_s16_s32): Likewise.
-+	(vreinterpretq_s16_s64): Likewise.
-+	(vreinterpretq_s16_f32): Likewise.
-+	(vreinterpretq_s16_u8): Likewise.
-+	(vreinterpretq_s16_u16): Likewise.
-+	(vreinterpretq_s16_u32): Likewise.
-+	(vreinterpretq_s16_u64): Likewise.
-+	(vreinterpretq_s16_p8): Likewise.
-+	(vreinterpretq_s16_p16): Likewise.
-+	(vreinterpret_s32_s8): Likewise.
-+	(vreinterpret_s32_s16): Likewise.
-+	(vreinterpret_s32_s64): Likewise.
-+	(vreinterpret_s32_f32): Likewise.
-+	(vreinterpret_s32_u8): Likewise.
-+	(vreinterpret_s32_u16): Likewise.
-+	(vreinterpret_s32_u32): Likewise.
-+	(vreinterpret_s32_u64): Likewise.
-+	(vreinterpret_s32_p8): Likewise.
-+	(vreinterpret_s32_p16): Likewise.
-+	(vreinterpretq_s32_s8): Likewise.
-+	(vreinterpretq_s32_s16): Likewise.
-+	(vreinterpretq_s32_s64): Likewise.
-+	(vreinterpretq_s32_f32): Likewise.
-+	(vreinterpretq_s32_u8): Likewise.
-+	(vreinterpretq_s32_u16): Likewise.
-+	(vreinterpretq_s32_u32): Likewise.
-+	(vreinterpretq_s32_u64): Likewise.
-+	(vreinterpretq_s32_p8): Likewise.
-+	(vreinterpretq_s32_p16): Likewise.
-+	(vreinterpret_u8_s8): Likewise.
-+	(vreinterpret_u8_s16): Likewise.
-+	(vreinterpret_u8_s32): Likewise.
-+	(vreinterpret_u8_s64): Likewise.
-+	(vreinterpret_u8_f32): Likewise.
-+	(vreinterpret_u8_u16): Likewise.
-+	(vreinterpret_u8_u32): Likewise.
-+	(vreinterpret_u8_u64): Likewise.
-+	(vreinterpret_u8_p8): Likewise.
-+	(vreinterpret_u8_p16): Likewise.
-+	(vreinterpretq_u8_s8): Likewise.
-+	(vreinterpretq_u8_s16): Likewise.
-+	(vreinterpretq_u8_s32): Likewise.
-+	(vreinterpretq_u8_s64): Likewise.
-+	(vreinterpretq_u8_f32): Likewise.
-+	(vreinterpretq_u8_u16): Likewise.
-+	(vreinterpretq_u8_u32): Likewise.
-+	(vreinterpretq_u8_u64): Likewise.
-+	(vreinterpretq_u8_p8): Likewise.
-+	(vreinterpretq_u8_p16): Likewise.
-+	(vreinterpret_u16_s8): Likewise.
-+	(vreinterpret_u16_s16): Likewise.
-+	(vreinterpret_u16_s32): Likewise.
-+	(vreinterpret_u16_s64): Likewise.
-+	(vreinterpret_u16_f32): Likewise.
-+	(vreinterpret_u16_u8): Likewise.
-+	(vreinterpret_u16_u32): Likewise.
-+	(vreinterpret_u16_u64): Likewise.
-+	(vreinterpret_u16_p8): Likewise.
-+	(vreinterpret_u16_p16): Likewise.
-+	(vreinterpretq_u16_s8): Likewise.
-+	(vreinterpretq_u16_s16): Likewise.
-+	(vreinterpretq_u16_s32): Likewise.
-+	(vreinterpretq_u16_s64): Likewise.
-+	(vreinterpretq_u16_f32): Likewise.
-+	(vreinterpretq_u16_u8): Likewise.
-+	(vreinterpretq_u16_u32): Likewise.
-+	(vreinterpretq_u16_u64): Likewise.
-+	(vreinterpretq_u16_p8): Likewise.
-+	(vreinterpretq_u16_p16): Likewise.
-+	(vreinterpret_u32_s8): Likewise.
-+	(vreinterpret_u32_s16): Likewise.
-+	(vreinterpret_u32_s32): Likewise.
-+	(vreinterpret_u32_s64): Likewise.
-+	(vreinterpret_u32_f32): Likewise.
-+	(vreinterpret_u32_u8): Likewise.
-+	(vreinterpret_u32_u16): Likewise.
-+	(vreinterpret_u32_u64): Likewise.
-+	(vreinterpret_u32_p8): Likewise.
-+	(vreinterpret_u32_p16): Likewise.
-+	(vreinterpretq_u32_s8): Likewise.
-+	(vreinterpretq_u32_s16): Likewise.
-+	(vreinterpretq_u32_s32): Likewise.
-+	(vreinterpretq_u32_s64): Likewise.
-+	(vreinterpretq_u32_f32): Likewise.
-+	(vreinterpretq_u32_u8): Likewise.
-+	(vreinterpretq_u32_u16): Likewise.
-+	(vreinterpretq_u32_u64): Likewise.
-+	(vreinterpretq_u32_p8): Likewise.
-+	(vreinterpretq_u32_p16): Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209640.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* gcc/config/aarch64/aarch64-simd.md (aarch64_s<optab><mode>):
-+	Pattern extended.
-+	* config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator
-+	extended.
-+	(sqabs): Likewise.
-+	* config/aarch64/arm_neon.h (vqneg_s64): New intrinsic.
-+	(vqnegd_s64): Likewise.
-+	(vqabs_s64): Likewise.
-+	(vqabsd_s64): Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209627, 209636.
-+	2014-04-22  Renlin  <renlin.li@arm.com>
-+		    Jiong Wang  <jiong.wang@arm.com>
-+
-+	* config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset".
-+	* config/aarch64/aarch64.c (aarch64_layout_frame)
-+	(aarch64_initial_elimination_offset): Likewise.
-+
-+	2014-04-22  Marcus Shawcroft  <marcus.shawcroft@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_initial_elimination_offset):
-+	Fix indentation.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209618.
-+	2014-04-22  Renlin Li  <Renlin.Li@arm.com>
-+
-+	* config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust
-+	the output asm format.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209617.
-+	2014-04-22  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* config/aarch64/aarch64-simd.md
-+	(aarch64_cm<optab>di): Always split.
-+	(*aarch64_cm<optab>di): New.
-+	(aarch64_cmtstdi): Always split.
-+	(*aarch64_cmtstdi): New.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209615.
-+	2014-04-22  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
-+
-+	* config/arm/arm.c (arm_hard_regno_mode_ok): Loosen
-+	restrictions on core registers for DImode values in Thumb2.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209613, r209614.
-+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* config/arm/arm.md (*anddi_notdi_zesidi): New pattern.
-+	* config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern.
-+
-+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* config/arm/thumb2.md (*iordi_notdi_di): New pattern.
-+	(*iordi_notzesidi_di): Likewise.
-+	(*iordi_notsesidi_di): Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209561.
-+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* config/arm/arm-protos.h (tune_params): New struct members.
-+	* config/arm/arm.c: Initialise tune_params per processor.
-+	(thumb2_reorg): Suppress conversion from t32 to t16 when optimizing
-+	for speed, based on new tune_params.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209559.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro
-+	added.
-+	* config/aarch64/aarch64-simd-builtins.def (frintn): Use added
-+	macro.
-+	* config/aarch64/aarch64-simd.md (<frint_pattern>): Comment
-+	corrected.
-+	* config/aarch64/aarch64.md (<frint_pattern>): Likewise.
-+	* config/aarch64/arm_neon.h (vrnd_f64): Added.
-+	(vrnda_f64): Likewise.
-+	(vrndi_f64): Likewise.
-+	(vrndm_f64): Likewise.
-+	(vrndn_f64): Likewise.
-+	(vrndp_f64): Likewise.
-+	(vrndx_f64): Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209419.
-+	2014-04-15  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR rtl-optimization/60663
-+	* config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case,
-+	avoid 0 cost.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209457.
-+	2014-04-16  Andrew  Pinski  <apinski@cavium.com>
-+
-+	* config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32
-+	definition.
-+
-+2014-05-19  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+	* LINARO-VERSION: Update.
-+
-+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209889.
-+	2014-04-29  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
-+
-+	* config/aarch64/aarch64.md (mov<mode>cc): New for GPF.
-+
-+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209556.
-+	2014-04-22  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
-+
-+	* config/arm/arm.c (arm_print_operand, thumb_exit): Make sure
-+	GET_MODE_SIZE argument is enum machine_mode.
-+
-+2014-04-28  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	* LINARO-VERSION: Bump version.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
-+	* LINARO-VERSION: New file.
-+	* configure.ac: Add Linaro version string.
---- a/src/gcc/testsuite/gcc.target/arm/pr44788.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr44788.c
-@@ -2,6 +2,8 @@
- /* { dg-require-effective-target arm_thumb2_ok } */
- /* { dg-options "-Os -fno-strict-aliasing -fPIC -mthumb -march=armv7-a -mfpu=vfp3 -mfloat-abi=softfp" } */
- 
-+extern void foo (float *);
-+
- void joint_decode(float* mlt_buffer1, int t) {
-     int i;
-     float decode_buffer[1060];
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-floorf.c
-@@ -5,8 +5,11 @@
- 
- #define N 32
- 
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
-   int i = 0;
-   /* Vectorizable.  */
---- a/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+  int i = 0;
-+  /* Vectorizable.  */
-+  for (i = 0; i < N; i++)
-+    output[i] = __builtin_lceilf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbs.c
-@@ -3,7 +3,7 @@
- /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
- /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
- 
--extern void baz (float);
-+extern void bar (float);
- 
- void
- foo (float *p, float a, int n)
-@@ -13,4 +13,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fldmdbs" } } */
-+/* { dg-final { scan-assembler "vldmdb.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-4.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+  register unsigned int r[50] asm ("r1"); /* { dg-error "suitable for a register" } */
-+  return r[1];
-+}
---- a/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
-+++ b/src/gcc/testsuite/gcc.target/arm/iordi3-opt.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile } */
-+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
- /* { dg-options "-O1" } */
- 
- unsigned long long or64 (unsigned long long input)
---- a/src/gcc/testsuite/gcc.target/arm/pr58784.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr58784.c
-@@ -11,6 +11,9 @@
-     char stepsRemoved;
-     ptp_tlv_t tlv[1];
- } ptp_message_announce_t;
-+
-+extern void f (ptp_message_announce_t *);
-+
- int ptplib_send_announce(int sequenceId, int i)
- {
-     ptp_message_announce_t tx_packet;
---- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c
-@@ -0,0 +1,65 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef long long s64int;
-+typedef int s32int;
-+typedef unsigned long long u64int;
-+typedef unsigned int u32int;
-+
-+s64int
-+iordi_di_notdi (s64int a, s64int b)
-+{
-+  return (a | ~b);
-+}
-+
-+s64int
-+iordi_di_notzesidi (s64int a, u32int b)
-+{
-+  return (a | ~(u64int) b);
-+}
-+
-+s64int
-+iordi_notdi_zesidi (s64int a, u32int b)
-+{
-+  return (~a | (u64int) b);
-+}
-+
-+s64int
-+iordi_di_notsesidi (s64int a, s32int b)
-+{
-+  return (a | ~(s64int) b);
-+}
-+
-+int main ()
-+{
-+  s64int a64 = 0xdeadbeef00000000ll;
-+  s64int b64 = 0x000000004f4f0112ll;
-+  s64int c64 = 0xdeadbeef000f0000ll;
-+
-+  u32int c32 = 0x01124f4f;
-+  s32int d32 = 0xabbaface;
-+
-+  s64int z = iordi_di_notdi (a64, b64);
-+  if (z != 0xffffffffb0b0feedll)
-+    abort ();
-+
-+  z = iordi_di_notzesidi (a64, c32);
-+  if (z != 0xfffffffffeedb0b0ll)
-+    abort ();
-+
-+  z = iordi_notdi_zesidi (c64, c32);
-+  if (z != 0x21524110fff2ffffll)
-+    abort ();
-+
-+  z = iordi_di_notsesidi (a64, d32);
-+  if (z != 0xdeadbeef54450531ll)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmias.c
-@@ -3,7 +3,7 @@
- /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
- /* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
- 
--extern void baz (float);
-+extern void bar (float);
- 
- void
- foo (float *p, float a, int n)
-@@ -13,4 +13,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fldmias" } } */
-+/* { dg-final { scan-assembler "vldmia.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/cold-lc.c
-+++ b/src/gcc/testsuite/gcc.target/arm/cold-lc.c
-@@ -7,6 +7,7 @@
-     struct task_struct *task;
- };
- extern struct thread_info *current_thread_info (void);
-+extern int show_stack (struct task_struct *, unsigned long *);
- 
- void dump_stack (void)
- {
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmdbd.c
-@@ -13,4 +13,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fldmdbd" } } */
-+/* { dg-final { scan-assembler "vldmdb.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbs.c
-@@ -12,4 +12,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fstmdbs" } } */
-+/* { dg-final { scan-assembler "vstmdb.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-ldmiad.c
-@@ -13,4 +13,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fldmiad" } } */
-+/* { dg-final { scan-assembler "vldmia.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmias.c
-@@ -12,4 +12,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fstmias" } } */
-+/* { dg-final { scan-assembler "vstmia.32" } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmdbd.c
-@@ -12,4 +12,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fstmdbd" } } */
-+/* { dg-final { scan-assembler "vstmdb.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lceil-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+  return __builtin_lceilf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtp.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+  return __builtin_lceil (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtp.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-stmiad.c
-@@ -12,4 +12,4 @@
-   while (n--);
- }
- 
--/* { dg-final { scan-assembler "fstmiad" } } */
-+/* { dg-final { scan-assembler "vstmia.64" } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s64.x"
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0.  */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u64.x"
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0.  */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32p8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s64.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_p16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qp16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u64.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_p16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32s16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32s16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqp8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32u16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32u16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64p16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64p16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp
-@@ -0,0 +1,35 @@
-+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
-+
-+# This program is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3 of the License, or
-+# (at your option) any later version.
-+#
-+# This program is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3.  If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an ARM target.
-+if ![istarget arm*-*-*] then {
-+  return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+	"" ""
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpp16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_p16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_p16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qp16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnp8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16s8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32u8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64p8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpp8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipp16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzips32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzips32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c
-@@ -0,0 +1,26 @@
-+/* Test the `vextp64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_crypto_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_crypto } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly64x1_t in1 = {0};
-+  poly64x1_t in2 = {1};
-+  poly64x1_t actual = vext_p64 (in1, in2, 0);
-+  if (actual != in1)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* Don't scan assembler for vext - it can be optimized into a move from r0.
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnp16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrns32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrns32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qp8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_f32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qf32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipp8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_f32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c
-@@ -0,0 +1,33 @@
-+/* Test the `vextQp64' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_crypto_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_crypto } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+poly64x2_t
-+test_vextq_p64_1 (poly64x2_t a, poly64x2_t b)
-+{
-+  return vextq_p64(a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  poly64x2_t in1 = {0, 1};
-+  poly64x2_t in2 = {2, 3};
-+  poly64x2_t actual = test_vextq_p64_1 (in1, in2);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != i + 1)
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16p8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16p8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16p8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQp16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqp16.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQs32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqs32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnQu32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnqu32.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnu8.x"
-+
-+/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64f32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64f32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64f32.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_u16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qu16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32p16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32p16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32p16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_p8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vexts16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_s16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_f32.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_u16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqf32.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64s16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64s16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64s16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16qu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16q_u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16qu8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64u16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64u16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64u16.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev64qs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev64q_s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev64qs8.x"
-+
-+/* { dg-final { scan-assembler "vrev64\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextp8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/ext_p8.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzps16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzps16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpQs8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpqs8.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vuzpu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vuzpu16.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQs16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_s16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32s8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32s8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32s8.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_s16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qs16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vextQu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/extq_u16.x"
-+
-+/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQs16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqs16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vtrnf32' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vtrnf32.x"
-+
-+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev32qu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev32q_u16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev32qu16.x"
-+
-+/* { dg-final { scan-assembler "vrev32\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipQu16' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipqu16.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vzipu8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -O1 -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vzipu8.x"
-+
-+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/simd/vrev16u8_1.c
-@@ -0,0 +1,12 @@
-+/* Test the `vrev16u8' ARM Neon intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+/* { dg-add-options arm_neon } */
-+
-+#include "arm_neon.h"
-+#include "../../aarch64/simd/vrev16u8.x"
-+
-+/* { dg-final { scan-assembler "vrev16\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+  int i = 0;
-+  /* Vectorizable.  */
-+  for (i = 0; i < N; i++)
-+    output[i] = __builtin_lfloorf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr51835.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr51835.c
-@@ -13,5 +13,5 @@
-   return (unsigned int)d;
- }
- 
--/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
--/* { dg-final { scan-assembler-times "fmrrd\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
-+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r0,\[\\t \]*r1,\[\\t \]*d0" 2 { target { arm_little_endian } } } } */
-+/* { dg-final { scan-assembler-times "vmov\[\\t \]+r1,\[\\t \]*r0,\[\\t \]*d0" 2 { target { ! arm_little_endian } } } } */
---- a/src/gcc/testsuite/gcc.target/arm/20031108-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/20031108-1.c
-@@ -20,6 +20,9 @@
- 
- Rec_Pointer Ptr_Glob;
- 
-+extern int Proc_7 (int, int, int *);
-+
-+void
- Proc_1 (Ptr_Val_Par)
-     Rec_Pointer Ptr_Val_Par;
- {
---- a/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/neon-modes-2.c
-@@ -11,6 +11,8 @@
- 
- #define MANY(A) A (0), A (1), A (2), A (3), A (4), A (5)
- 
-+extern void foo (int *, int *);
-+
- void
- bar (uint32_t *ptr, int y)
- {
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-roundf.c
-@@ -5,8 +5,11 @@
- 
- #define N 32
- 
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
-   int i = 0;
-   /* Vectorizable.  */
---- a/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr43920-2.c
-@@ -4,6 +4,8 @@
- 
- #include <stdio.h>
- 
-+extern int lseek(int, long, int);
-+
- int getFileStartAndLength (int fd, int *start_, size_t *length_)
- {
-       int start, end;
---- a/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
-+++ b/src/gcc/testsuite/gcc.target/arm/xordi3-opt.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile } */
-+/* { dg-do compile { target { arm_arm_ok || arm_thumb2_ok} } } */
- /* { dg-options "-O1" } */
- 
- unsigned long long xor64 (unsigned long long input)
---- a/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_neon_ok } */
-+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
-+/* { dg-add-options arm_v8_neon } */
-+
-+#define N 32
-+
-+float __attribute__((aligned(16))) input[N];
-+int __attribute__((aligned(16))) output[N];
-+
-+void
-+foo ()
-+{
-+  int i = 0;
-+  /* Vectorizable.  */
-+  for (i = 0; i < N; i++)
-+    output[i] = __builtin_lroundf (input[i]);
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
-+++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c
-@@ -0,0 +1,12 @@
-+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */
-+/* { dg-options "-O2 -march=armv5te -marm" } */
-+/* { dg-final { scan-assembler "bx" } } */
-+/* { dg-final { scan-assembler-not "blx" } } */
-+
-+int lcal (int) __attribute__ ((long_call));
-+
-+int
-+dec (int a)
-+{
-+  return lcal (a);
-+}
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-btruncf.c
-@@ -5,8 +5,11 @@
- 
- #define N 32
- 
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
-   int i = 0;
-   /* Vectorizable.  */
---- a/src/gcc/testsuite/gcc.target/arm/pr61948.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr61948.c
-@@ -0,0 +1,16 @@
-+/* PR target/61948 */
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_neon_ok } */
-+/* { dg-require-effective-target arm_thumb2_ok } */
-+/* { dg-options "-O2 -mthumb" } */
-+/* { dg-add-options arm_neon } */
-+
-+long long f (long long *c)
-+{
-+  long long t = c[0];
-+  asm ("nop" : : : "r0", "r3", "r4", "r5",
-+		   "r6", "r7", "r8", "r9",
-+		   "r10", "r11", "r12", "memory");
-+  return t >> 1;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/arm/pr51968.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr51968.c
-@@ -1,6 +1,6 @@
- /* PR target/51968 */
- /* { dg-do compile } */
--/* { dg-options "-O2 -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
-+/* { dg-options "-O2 -Wno-implicit-function-declaration -march=armv7-a -mfloat-abi=softfp -mfpu=neon" } */
- /* { dg-require-effective-target arm_neon_ok } */
- 
- typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8)));
---- a/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lround-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a -ffast-math" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+  return __builtin_lroundf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvta.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+  return __builtin_lround (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvta.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60650.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60650.c
-@@ -20,6 +20,10 @@
- int a, c, d;
- long long e;
- 
-+extern int foo1 (struct btrfs_root *, int, int, int);
-+extern int foo2 (struct btrfs_root *, int, int);
-+
-+int
- truncate_one_csum (struct btrfs_root *p1, long long p2, long long p3)
- {
-   int f, g, i = p1->fs_info->sb->s_blocksize_bits;
---- a/src/gcc/testsuite/gcc.target/arm/vfp-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vfp-1.c
-@@ -11,40 +11,40 @@
- 
- void test_sf() {
-   /* abssf2_vfp */
--  /* { dg-final { scan-assembler "fabss" } } */
-+  /* { dg-final { scan-assembler "vabs.f32" } } */
-   f1 = fabsf (f1);
-   /* negsf2_vfp */
--  /* { dg-final { scan-assembler "fnegs" } } */
-+  /* { dg-final { scan-assembler "vneg.f32" } } */
-   f1 = -f1;
-   /* addsf3_vfp */
--  /* { dg-final { scan-assembler "fadds" } } */
-+  /* { dg-final { scan-assembler "vadd.f32" } } */
-   f1 = f2 + f3;
-   /* subsf3_vfp */
--  /* { dg-final { scan-assembler "fsubs" } } */
-+  /* { dg-final { scan-assembler "vsub.f32" } } */
-   f1 = f2 - f3;
-   /* divsf3_vfp */
--  /* { dg-final { scan-assembler "fdivs" } } */
-+  /* { dg-final { scan-assembler "vdiv.f32" } } */
-   f1 = f2 / f3;
-   /* mulsf3_vfp */
--  /* { dg-final { scan-assembler "fmuls" } } */
-+  /* { dg-final { scan-assembler "vmul.f32" } } */
-   f1 = f2 * f3;
-   /* mulsf3negsf_vfp */
--  /* { dg-final { scan-assembler "fnmuls" } } */
-+  /* { dg-final { scan-assembler "vnmul.f32" } } */
-   f1 = -f2 * f3;
-   /* mulsf3addsf_vfp */
--  /* { dg-final { scan-assembler "fmacs" } } */
-+  /* { dg-final { scan-assembler "vmla.f32" } } */
-   f1 = f2 * f3 + f1;
-   /* mulsf3subsf_vfp */
--  /* { dg-final { scan-assembler "fmscs" } } */
-+  /* { dg-final { scan-assembler "vnmls.f32" } } */
-   f1 = f2 * f3 - f1;
-   /* mulsf3negsfaddsf_vfp */
--  /* { dg-final { scan-assembler "fnmacs" } } */
-+  /* { dg-final { scan-assembler "vmls.f32" } } */
-   f1 = f2 - f3 * f1;
-   /* mulsf3negsfsubsf_vfp */
--  /* { dg-final { scan-assembler "fnmscs" } } */
-+  /* { dg-final { scan-assembler "vnmla.f32" } } */
-   f1 = -f2 * f3 - f1;
-   /* sqrtsf2_vfp */
--  /* { dg-final { scan-assembler "fsqrts" } } */
-+  /* { dg-final { scan-assembler "vsqrt.f32" } } */
-   f1 = sqrtf (f1);
- }
- 
-@@ -52,40 +52,40 @@
- 
- void test_df() {
-   /* absdf2_vfp */
--  /* { dg-final { scan-assembler "fabsd" } } */
-+  /* { dg-final { scan-assembler "vabs.f64" } } */
-   d1 = fabs (d1);
-   /* negdf2_vfp */
--  /* { dg-final { scan-assembler "fnegd" } } */
-+  /* { dg-final { scan-assembler "vneg.f64" } } */
-   d1 = -d1;
-   /* adddf3_vfp */
--  /* { dg-final { scan-assembler "faddd" } } */
-+  /* { dg-final { scan-assembler "vadd.f64" } } */
-   d1 = d2 + d3;
-   /* subdf3_vfp */
--  /* { dg-final { scan-assembler "fsubd" } } */
-+  /* { dg-final { scan-assembler "vsub.f64" } } */
-   d1 = d2 - d3;
-   /* divdf3_vfp */
--  /* { dg-final { scan-assembler "fdivd" } } */
-+  /* { dg-final { scan-assembler "vdiv.f64" } } */
-   d1 = d2 / d3;
-   /* muldf3_vfp */
--  /* { dg-final { scan-assembler "fmuld" } } */
-+  /* { dg-final { scan-assembler "vmul.f64" } } */
-   d1 = d2 * d3;
-   /* muldf3negdf_vfp */
--  /* { dg-final { scan-assembler "fnmuld" } } */
-+  /* { dg-final { scan-assembler "vnmul.f64" } } */
-   d1 = -d2 * d3;
-   /* muldf3adddf_vfp */
--  /* { dg-final { scan-assembler "fmacd" } } */
-+  /* { dg-final { scan-assembler "vmla.f64" } } */
-   d1 = d2 * d3 + d1;
-   /* muldf3subdf_vfp */
--  /* { dg-final { scan-assembler "fmscd" } } */
-+  /* { dg-final { scan-assembler "vnmls.f64" } } */
-   d1 = d2 * d3 - d1;
-   /* muldf3negdfadddf_vfp */
--  /* { dg-final { scan-assembler "fnmacd" } } */
-+  /* { dg-final { scan-assembler "vmls.f64" } } */
-   d1 = d2 - d3 * d1;
-   /* muldf3negdfsubdf_vfp */
--  /* { dg-final { scan-assembler "fnmscd" } } */
-+  /* { dg-final { scan-assembler "vnmla.f64" } } */
-   d1 = -d2 * d3 - d1;
-   /* sqrtdf2_vfp */
--  /* { dg-final { scan-assembler "fsqrtd" } } */
-+  /* { dg-final { scan-assembler "vsqrt.f64" } } */
-   d1 = sqrt (d1);
- }
- 
-@@ -94,46 +94,46 @@
- 
- void test_convert () {
-   /* extendsfdf2_vfp */
--  /* { dg-final { scan-assembler "fcvtds" } } */
-+  /* { dg-final { scan-assembler "vcvt.f64.f32" } } */
-   d1 = f1;
-   /* truncdfsf2_vfp */
--  /* { dg-final { scan-assembler "fcvtsd" } } */
-+  /* { dg-final { scan-assembler "vcvt.f32.f64" } } */
-   f1 = d1;
-   /* truncsisf2_vfp */
--  /* { dg-final { scan-assembler "ftosizs" } } */
-+  /* { dg-final { scan-assembler "vcvt.s32.f32" } } */
-   i1 = f1;
-   /* truncsidf2_vfp */
--  /* { dg-final { scan-assembler "ftosizd" } } */
-+  /* { dg-final { scan-assembler "vcvt.s32.f64" } } */
-   i1 = d1;
-   /* fixuns_truncsfsi2 */
--  /* { dg-final { scan-assembler "ftouizs" } } */
-+  /* { dg-final { scan-assembler "vcvt.u32.f32" } } */
-   u1 = f1;
-   /* fixuns_truncdfsi2 */
--  /* { dg-final { scan-assembler "ftouizd" } } */
-+  /* { dg-final { scan-assembler "vcvt.u32.f64" } } */
-   u1 = d1;
-   /* floatsisf2_vfp */
--  /* { dg-final { scan-assembler "fsitos" } } */
-+  /* { dg-final { scan-assembler "vcvt.f32.s32" } } */
-   f1 = i1;
-   /* floatsidf2_vfp */
--  /* { dg-final { scan-assembler "fsitod" } } */
-+  /* { dg-final { scan-assembler "vcvt.f64.s32" } } */
-   d1 = i1;
-   /* floatunssisf2 */
--  /* { dg-final { scan-assembler "fuitos" } } */
-+  /* { dg-final { scan-assembler "vcvt.f32.u32" } } */
-   f1 = u1;
-   /* floatunssidf2 */
--  /* { dg-final { scan-assembler "fuitod" } } */
-+  /* { dg-final { scan-assembler "vcvt.f64.u32" } } */
-   d1 = u1;
- }
- 
- void test_ldst (float f[], double d[]) {
--  /* { dg-final { scan-assembler "flds.+ \\\[r0, #1020\\\]" } } */
--  /* { dg-final { scan-assembler "flds.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-+  /* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */
-+  /* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-   /* { dg-final { scan-assembler "add.+ r0, #1024" } } */
--  /* { dg-final { scan-assembler "fsts.+ \\\[r\[0-9\]\\\]\n" } } */
-+  /* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */
-   f[256] = f[255] + f[-255];
- 
--  /* { dg-final { scan-assembler "fldd.+ \\\[r1, #1016\\\]" } } */
--  /* { dg-final { scan-assembler "fldd.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
--  /* { dg-final { scan-assembler "fstd.+ \\\[r1, #256\\\]" } } */
-+  /* { dg-final { scan-assembler "vldr.64.+ \\\[r1, #1016\\\]" } } */
-+  /* { dg-final { scan-assembler "vldr.64.+ \\\[r\[1-9\], #-1016\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
-+  /* { dg-final { scan-assembler "vstr.64.+ \\\[r1, #256\\\]" } } */
-   d[32] = d[127] + d[-127];
- }
---- a/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-copysignf.c
-@@ -0,0 +1,36 @@
-+/* { dg-do run } */
-+/* { dg-require-effective-target arm_neon_hw } */
-+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
-+/* { dg-add-options "arm_neon" } */
-+
-+extern void abort ();
-+
-+#define N 16
-+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
-+	      -12.5f, -15.6f, -18.7f, -21.8f,
-+	      24.9f, 27.1f, 30.2f, 33.3f,
-+	      36.4f, 39.5f, 42.6f, 45.7f};
-+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
-+	      -9.0f, 1.0f, -2.0f, 3.0f,
-+	      -4.0f, -5.0f, 6.0f, 7.0f,
-+	      -8.0f, -9.0f, 10.0f, 11.0f};
-+float r[N];
-+
-+int
-+main (void)
-+{
-+  int i;
-+
-+  for (i = 0; i < N; i++)
-+    r[i] = __builtin_copysignf (a[i], b[i]);
-+
-+  /* check results:  */
-+  for (i = 0; i < N; i++)
-+    if (r[i] != __builtin_copysignf (a[i], b[i]))
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.target/arm/rev16.c
-+++ b/src/gcc/testsuite/gcc.target/arm/rev16.c
-@@ -0,0 +1,35 @@
-+/* { dg-options "-O2" } */
-+/* { dg-do run } */
-+
-+extern void abort (void);
-+
-+typedef unsigned int __u32;
-+
-+__u32
-+__rev16_32_alt (__u32 x)
-+{
-+  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
-+         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
-+}
-+
-+__u32
-+__rev16_32 (__u32 x)
-+{
-+  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
-+         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
-+}
-+
-+int
-+main (void)
-+{
-+  volatile __u32 in32 = 0x12345678;
-+  volatile __u32 expected32 = 0x34127856;
-+
-+  if (__rev16_32 (in32) != expected32)
-+    abort ();
-+
-+  if (__rev16_32_alt (in32) != expected32)
-+    abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c
-@@ -0,0 +1,65 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef long long s64int;
-+typedef int s32int;
-+typedef unsigned long long u64int;
-+typedef unsigned int u32int;
-+
-+s64int
-+anddi_di_notdi (s64int a, s64int b)
-+{
-+  return (a & ~b);
-+}
-+
-+s64int
-+anddi_di_notzesidi (s64int a, u32int b)
-+{
-+  return (a & ~(u64int) b);
-+}
-+
-+s64int
-+anddi_notdi_zesidi (s64int a, u32int b)
-+{
-+  return (~a & (u64int) b);
-+}
-+
-+s64int
-+anddi_di_notsesidi (s64int a, s32int b)
-+{
-+  return (a & ~(s64int) b);
-+}
-+
-+int main ()
-+{
-+  s64int a64 = 0xdeadbeef0000ffffll;
-+  s64int b64 = 0x000000005f470112ll;
-+  s64int c64 = 0xdeadbeef300f0000ll;
-+
-+  u32int c32 = 0x01124f4f;
-+  s32int d32 = 0xabbaface;
-+
-+  s64int z = anddi_di_notdi (c64, b64);
-+  if (z != 0xdeadbeef20080000ll)
-+    abort ();
-+
-+  z = anddi_di_notzesidi (a64, c32);
-+  if (z != 0xdeadbeef0000b0b0ll)
-+    abort ();
-+
-+  z = anddi_notdi_zesidi (c64, c32);
-+  if (z != 0x0000000001104f4fll)
-+    abort ();
-+
-+  z = anddi_di_notsesidi (a64, d32);
-+  if (z != 0x0000000000000531ll)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "bic\t" 6 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr63210.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr63210.c
-@@ -0,0 +1,12 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-mthumb -Os " }  */
-+/* { dg-require-effective-target arm_thumb1_ok } */
-+
-+int foo1 (int c);
-+int foo2 (int c);
-+
-+int test (int c)
-+{
-+  return (foo1 (c) || foo2 (c));
-+}
-+/* { dg-final { object-size text <= 28 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-2.c
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+  register unsigned pc asm ("pc"); /* { dg-error "not general enough" } */
-+  
-+  return pc > 0x12345678;
-+}
---- a/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
-+++ b/src/gcc/testsuite/gcc.target/arm/vect-rounding-ceilf.c
-@@ -5,8 +5,11 @@
- 
- #define N 32
- 
-+float __attribute__((aligned(16))) input[N];
-+float __attribute__((aligned(16))) output[N];
-+
- void
--foo (float *output, float *input)
-+foo ()
- {
-   int i = 0;
-   /* Vectorizable.  */
---- a/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60650-2.c
-@@ -4,17 +4,19 @@
- int a, h, j;
- long long d, e, i;
- int f;
-+int
- fn1 (void *p1, int p2)
- {
-     switch (p2)
-     case 8:
- {
--    register b = *(long long *) p1, c asm ("r2");
-+    register int b = *(long long *) p1, c asm ("r2");
-     asm ("%0": "=r" (a), "=r" (c):"r" (b), "r" (0));
-     *(long long *) p1 = c;
-     }
- }
- 
-+int
- fn2 ()
- {
-     int k;
-@@ -27,8 +29,8 @@
-         case 0:
-         (
-         {
--            register l asm ("r4");
--            register m asm ("r0");
-+            register int l asm ("r4");
-+            register int m asm ("r0");
-             asm ("  .err  .endif\n\t": "=r" (h), "=r" (j):"r" (m),
-             "r"
-             (l));;
---- a/src/gcc/testsuite/gcc.target/arm/pr55642.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr55642.c
-@@ -2,6 +2,8 @@
- /* { dg-do compile } */
- /* { dg-require-effective-target arm_thumb2_ok } */
- 
-+extern int abs (int);
-+
- int
- foo (int v)
- {
---- a/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
-+++ b/src/gcc/testsuite/gcc.target/arm/lfloor-vcvt_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target arm_v8_vfp_ok } */
-+/* { dg-options "-O2 -march=armv8-a" } */
-+/* { dg-add-options arm_v8_vfp } */
-+
-+int
-+foofloat (float x)
-+{
-+  return __builtin_lfloorf (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtm.s32.f32\ts\[0-9\]+, s\[0-9\]+" 1 } } */
-+
-+
-+int
-+foodouble (double x)
-+{
-+  return __builtin_lfloor (x);
-+}
-+
-+/* { dg-final { scan-assembler-times "vcvtm.s32.f64\ts\[0-9\]+, d\[0-9\]+" 1 } } */
---- a/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
-+++ b/src/gcc/testsuite/gcc.target/arm/pr60606-3.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+int
-+f (void)
-+{
-+  register unsigned int r asm ("cc"); /* { dg-error "not general enough|suitable for data type" } */
-+  return r;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_1.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * withoug outgoing.
-+     * total frame size <= 256.
-+     * number of callee-save reg == 1.
-+     * optimized code should use "str !" for stack adjustment.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test1, 200, )
-+t_frame_run (test1)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_9.c
-@@ -0,0 +1,17 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * with outgoing.
-+     * total frame size > 512.
-+       area except outgoing <= 512
-+     * number of callee-saved reg = 1.
-+     * Split stack adjustment into two subtractions.
-+       the first subtractions couldn't be optimized
-+       into "str !" as it's > 256.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test9, 480, , 24, a[8], a[9], a[10])
-+t_frame_run (test9)
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_lane_1.c
-@@ -0,0 +1,97 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT, STRUCT)	\
-+VARIANT (uint8, , 8, _u8, 6, STRUCT)	\
-+VARIANT (uint16, , 4, _u16, 3, STRUCT)	\
-+VARIANT (uint32, , 2, _u32, 1, STRUCT)	\
-+VARIANT (uint64, , 1, _u64, 0, STRUCT)	\
-+VARIANT (int8, , 8, _s8, 5, STRUCT)	\
-+VARIANT (int16, , 4, _s16, 2, STRUCT)	\
-+VARIANT (int32, , 2, _s32, 0, STRUCT)	\
-+VARIANT (int64, , 1, _s64, 0, STRUCT)	\
-+VARIANT (poly8, , 8, _p8, 7, STRUCT)	\
-+VARIANT (poly16, , 4, _p16, 1, STRUCT)	\
-+VARIANT (float32, , 2, _f32, 1, STRUCT)	\
-+VARIANT (float64, , 1, _f64, 0, STRUCT)	\
-+VARIANT (uint8, q, 16, _u8, 14, STRUCT)	\
-+VARIANT (uint16, q, 8, _u16, 4, STRUCT)	\
-+VARIANT (uint32, q, 4, _u32, 3, STRUCT)	\
-+VARIANT (uint64, q, 2, _u64, 0, STRUCT)	\
-+VARIANT (int8, q, 16, _s8, 13, STRUCT)	\
-+VARIANT (int16, q, 8, _s16, 6, STRUCT)	\
-+VARIANT (int32, q, 4, _s32, 2, STRUCT)	\
-+VARIANT (int64, q, 2, _s64, 1, STRUCT)	\
-+VARIANT (poly8, q, 16, _p8, 12, STRUCT)	\
-+VARIANT (poly16, q, 8, _p16, 5, STRUCT)	\
-+VARIANT (float32, q, 4, _f32, 1, STRUCT)\
-+VARIANT (float64, q, 2, _f64, 0, STRUCT)
-+
-+#define TESTMETH(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
-+int									\
-+test_vld##STRUCT##Q##_lane##SUFFIX (const BASE##_t *data,		\
-+				     const BASE##_t *overwrite)		\
-+{									\
-+  BASE##x##ELTS##x##STRUCT##_t vectors;					\
-+  BASE##_t temp[ELTS];							\
-+  int i,j;								\
-+  for (i = 0; i < STRUCT; i++, data += ELTS)				\
-+    vectors.val[i] = vld1##Q##SUFFIX (data);				\
-+  vectors = vld##STRUCT##Q##_lane##SUFFIX (overwrite, vectors, LANE);	\
-+  while (--i >= 0)							\
-+    {									\
-+      vst1##Q##SUFFIX (temp, vectors.val[i]);				\
-+      data -= ELTS; /* Point at value loaded before vldN_lane.  */	\
-+      for (j = 0; j < ELTS; j++)					\
-+        if (temp[j] != (j == LANE ? overwrite[i] : data[j]))		\
-+          return 1;							\
-+    }									\
-+  return 0;								\
-+}
-+
-+
-+/* Tests of vld2_dup and vld2q_dup.  */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vld3_dup and vld3q_dup.  */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vld4_dup and vld4q_dup.  */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, Q, ELTS, SUFFIX, LANE, STRUCT)			\
-+  if (test_vld##STRUCT##Q##_lane##SUFFIX ((const BASE##_t *)orig_data,	\
-+						BASE##_data) != 0)	\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  /* Original data for all vector formats.  */
-+  uint64_t orig_data[8] = {0x1234567890abcdefULL, 0x13579bdf02468aceULL,
-+			   0x012389ab4567cdefULL, 0xfeeddadacafe0431ULL,
-+			   0x1032547698badcfeULL, 0xbadbadbadbad0badULL,
-+			   0x0102030405060708ULL, 0x0f0e0d0c0b0a0908ULL};
-+
-+  /* Data with which vldN_lane will overwrite some of previous.  */
-+  uint8_t uint8_data[4] = { 7, 11, 13, 17 };
-+  uint16_t uint16_data[4] = { 257, 263, 269, 271 };
-+  uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
-+  uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
-+			      0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+  int8_t int8_data[4] = { -1, 3, -5, 7 };
-+  int16_t int16_data[4] = { 257, -259, 261, -263 };
-+  int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+  int64_t *int64_data = (int64_t *)uint64_data;
-+  poly8_t poly8_data[4] = { 0, 7, 13, 18, };
-+  poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
-+  float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+  float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
-+
-+  VARIANTS (CHECK, 2);
-+  VARIANTS (CHECK, 3);
-+  VARIANTS (CHECK, 4);
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_1.c
-@@ -0,0 +1,79 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX)	\
-+int __attribute__ ((noinline))			\
-+test_vld##STRUCT##SUFFIX ()			\
-+{						\
-+  BASE##_t data[ELTS * STRUCT];			\
-+  BASE##_t temp[ELTS];				\
-+  BASE##x##ELTS##x##STRUCT##_t vectors;		\
-+  int i,j;					\
-+  for (i = 0; i < STRUCT * ELTS; i++)		\
-+    data [i] = (BASE##_t) 2*i + 1;		\
-+  asm volatile ("" : : : "memory");		\
-+  vectors = vld##STRUCT##SUFFIX (data);		\
-+  for (i = 0; i < STRUCT; i++)			\
-+    {						\
-+      vst1##SUFFIX (temp, vectors.val[i]);	\
-+      asm volatile ("" : : : "memory");		\
-+      for (j = 0; j < ELTS; j++)		\
-+        if (temp[j] != data[i + STRUCT*j])	\
-+          return 1;				\
-+    }						\
-+  return 0;					\
-+}
-+
-+#define VARIANTS(VARIANT, STRUCT)	\
-+VARIANT (uint8, 8, STRUCT, _u8)		\
-+VARIANT (uint16, 4, STRUCT, _u16)	\
-+VARIANT (uint32, 2, STRUCT, _u32)	\
-+VARIANT (uint64, 1, STRUCT, _u64)	\
-+VARIANT (int8, 8, STRUCT, _s8)		\
-+VARIANT (int16, 4, STRUCT, _s16)	\
-+VARIANT (int32, 2, STRUCT, _s32)	\
-+VARIANT (int64, 1, STRUCT, _s64)	\
-+VARIANT (poly8, 8, STRUCT, _p8)		\
-+VARIANT (poly16, 4, STRUCT, _p16)	\
-+VARIANT (float32, 2, STRUCT, _f32)	\
-+VARIANT (float64, 1, STRUCT, _f64)	\
-+VARIANT (uint8, 16, STRUCT, q_u8)	\
-+VARIANT (uint16, 8, STRUCT, q_u16)	\
-+VARIANT (uint32, 4, STRUCT, q_u32)	\
-+VARIANT (uint64, 2, STRUCT, q_u64)	\
-+VARIANT (int8, 16, STRUCT, q_s8)	\
-+VARIANT (int16, 8, STRUCT, q_s16)	\
-+VARIANT (int32, 4, STRUCT, q_s32)	\
-+VARIANT (int64, 2, STRUCT, q_s64)	\
-+VARIANT (poly8, 16, STRUCT, q_p8)	\
-+VARIANT (poly16, 8, STRUCT, q_p16)	\
-+VARIANT (float32, 4, STRUCT, q_f32)	\
-+VARIANT (float64, 2, STRUCT, q_f64)
-+
-+/* Tests of vld2 and vld2q.  */
-+VARIANTS (TESTMETH, 2)
-+
-+/* Tests of vld3 and vld3q.  */
-+VARIANTS (TESTMETH, 3)
-+
-+/* Tests of vld4 and vld4q.  */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, ELTS, STRUCT, SUFFIX)	\
-+  if (test_vld##STRUCT##SUFFIX () != 0)		\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  VARIANTS (CHECK, 2)
-+  VARIANTS (CHECK, 3)
-+  VARIANTS (CHECK, 4)
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c
-@@ -0,0 +1,54 @@
-+/* Test vqabs_s64 intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+int __attribute__ ((noinline))
-+test_vqabs_s64 (int64x1_t passed, int64_t expected)
-+{
-+  return vget_lane_s64 (vqabs_s64 (passed), 0) != expected;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vqabsd_s64 (int64_t passed, int64_t expected)
-+{
-+  return vqabsd_s64 (passed) != expected;
-+}
-+
-+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */
-+
-+int
-+main (int argc, char **argv)
-+{
-+  /* Basic test.  */
-+  if (test_vqabs_s64 (vcreate_s64 (-1), 1))
-+    abort ();
-+  if (test_vqabsd_s64 (-1, 1))
-+    abort ();
-+
-+  /* Getting absolute value of min int64_t.
-+     Note, exact result cannot be represented in int64_t,
-+     so max int64_t is expected.  */
-+  if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
-+    abort ();
-+  if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff))
-+    abort ();
-+
-+  /* Another input that gets max int64_t.  */
-+  if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff))
-+    abort ();
-+  if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff))
-+    abort ();
-+
-+  /* Checking that large positive numbers stay the same.  */
-+  if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff))
-+    abort ();
-+  if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff))
-+    abort ();
-+
-+  return 0;
-+}
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp
-@@ -0,0 +1,35 @@
-+# Copyright (C) 2014 Free Software Foundation, Inc.
-+
-+# This program is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3 of the License, or
-+# (at your option) any later version.
-+#
-+# This program is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3.  If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an AArch64 target.
-+if ![istarget aarch64*-*-*] then {
-+  return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+	"" ""
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32b ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32b (uint32_t arg0, uint8_t arg1)
-+{
-+  return __crc32b (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32d ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32d (uint32_t arg0, uint64_t arg1)
-+{
-+  return __crc32d (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cb ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cb (uint32_t arg0, uint8_t arg1)
-+{
-+  return __crc32cb (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cd ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cd (uint32_t arg0, uint64_t arg1)
-+{
-+  return __crc32cd (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32w ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32w (uint32_t arg0, uint32_t arg1)
-+{
-+  return __crc32w (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32h ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32h (uint32_t arg0, uint16_t arg1)
-+{
-+  return __crc32h (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32cw ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32cw (uint32_t arg0, uint32_t arg1)
-+{
-+  return __crc32cw (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c
-@@ -0,0 +1,15 @@
-+/* Test the crc32ch ACLE intrinsic.  */
-+
-+/* { dg-do assemble } */
-+/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */
-+
-+#include "arm_acle.h"
-+
-+uint32_t
-+test_crc32ch (uint32_t arg0, uint16_t arg1)
-+{
-+  return __crc32ch (arg0, arg1);
-+}
-+
-+/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
-@@ -0,0 +1,18 @@
-+/* Verify:
-+     * without outgoing.
-+     * total frame size > 512.
-+     * number of callee-save reg >= 2.
-+     * split the stack adjustment into two substractions,
-+       the second could be optimized into "stp !".  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test13, 700, )
-+t_frame_run (test13)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * without outgoing.
-+     * total frame size <= 256.
-+     * number of callee-save regs >= 2.
-+     * optimized code should use "stp !" for stack adjustment.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test2, 200, "x19")
-+t_frame_run (test2)
-+
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/legitimize_stack_var_before_reload_1.c
-@@ -0,0 +1,21 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-rtl-expand" } */
-+
-+extern void initialize_array (unsigned char *, int);
-+
-+int
-+test15 (void)
-+{
-+  unsigned char a[480];
-+
-+  initialize_array (a, 480);
-+
-+  if (a[0] == 0x10)
-+    return 1;
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-rtl-dump "\\(mem\[^\\n\]*\\(plus\[^\\n\]*virtual-stack-vars" "expand" } } */
-+
-+/* { dg-final { cleanup-rtl-dump "expand" } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c
-@@ -0,0 +1,596 @@
-+/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define ABS(a) __builtin_fabs (a)
-+#define ISNAN(a) __builtin_isnan (a)
-+
-+#define DOUBLE_EQUALS(a, b, epsilon)		\
-+(						\
-+ ((a) == (b))					\
-+  || (ISNAN (a) && ISNAN (b))			\
-+  || (ABS (a - b) < epsilon)			\
-+)
-+
-+/* Pi accurate up to 16 digits.
-+   Further digits are a closest binary approximation.  */
-+#define PI_F64 3.14159265358979311599796346854
-+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
-+   0x400921FB54442D18.  */
-+
-+/* E accurate up to 16 digits.
-+   Further digits are a closest binary approximation.  */
-+#define E_F64 2.71828182845904509079559829843
-+/* Hex representation in Double (IEEE754 Double precision 64-bit) is:
-+   0x4005BF0A8B145769.  */
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vreinterpret_f32_f64 (float64x1_t __a)
-+{
-+  return vreinterpret_f32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f32_f64 ()
-+{
-+  float64x1_t a;
-+  float32x2_t b;
-+  float64_t c[1] = { PI_F64 };
-+  /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32.  */
-+  float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
-+  float32_t e[2];
-+  int i;
-+
-+  a = vld1_f64 (c);
-+  b = wrap_vreinterpret_f32_f64 (a);
-+  vst1_f32 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vreinterpret_s8_f64 (float64x1_t __a)
-+{
-+  return vreinterpret_s8_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s8_f64 ()
-+{
-+  float64x1_t a;
-+  int8x8_t b;
-+  float64_t c[1] = { PI_F64 };
-+  int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
-+  int8_t e[8];
-+  int i;
-+
-+  a = vld1_f64 (c);
-+  b = wrap_vreinterpret_s8_f64 (a);
-+  vst1_s8 (e, b);
-+  for (i = 0; i < 8; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vreinterpret_s16_f64 (float64x1_t __a)
-+{
-+  return vreinterpret_s16_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s16_f64 ()
-+{
-+  float64x1_t a;
-+  int16x4_t b;
-+  float64_t c[1] = { PI_F64 };
-+  int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
-+  int16_t e[4];
-+  int i;
-+
-+  a = vld1_f64 (c);
-+  b = wrap_vreinterpret_s16_f64 (a);
-+  vst1_s16 (e, b);
-+  for (i = 0; i < 4; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vreinterpret_s32_f64 (float64x1_t __a)
-+{
-+  return vreinterpret_s32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s32_f64 ()
-+{
-+  float64x1_t a;
-+  int32x2_t b;
-+  float64_t c[1] = { PI_F64 };
-+  int32_t d[2] = { 0x54442D18, 0x400921FB };
-+  int32_t e[2];
-+  int i;
-+
-+  a = vld1_f64 (c);
-+  b = wrap_vreinterpret_s32_f64 (a);
-+  vst1_s32 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_s64_f64 (float64x1_t __a)
-+{
-+  return vreinterpret_s64_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_s64_f64 ()
-+{
-+  float64x1_t a;
-+  int64x1_t b;
-+  float64_t c[1] = { PI_F64 };
-+  int64_t d[1] = { 0x400921FB54442D18 };
-+  int64_t e[1];
-+  int i;
-+
-+  a = vld1_f64 (c);
-+  b = wrap_vreinterpret_s64_f64 (a);
-+  vst1_s64 (e, b);
-+  if (d[0] != e[0])
-+    return 1;
-+  return 0;
-+};
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f32_f64 (float64x2_t __a)
-+{
-+  return vreinterpretq_f32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f32_f64 ()
-+{
-+  float64x2_t a;
-+  float32x4_t b;
-+  float64_t c[2] = { PI_F64, E_F64 };
-+
-+  /* Values corresponding to f32 reinterpret of
-+     { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }.  */
-+  float32_t d[4] = { 3.3702805504E12,
-+		     2.1426990032196044921875E0,
-+		     -2.8569523269651966444143014594E-32,
-+		     2.089785099029541015625E0 };
-+  float32_t e[4];
-+  int i;
-+
-+  a = vld1q_f64 (c);
-+  b = wrap_vreinterpretq_f32_f64 (a);
-+  vst1q_f32 (e, b);
-+  for (i = 0; i < 4; i++)
-+    {
-+      if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__))
-+	return 1;
-+    }
-+  return 0;
-+};
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s8_f64 (float64x2_t __a)
-+{
-+  return vreinterpretq_s8_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s8_f64 ()
-+{
-+  float64x2_t a;
-+  int8x16_t b;
-+  float64_t c[2] = { PI_F64, E_F64 };
-+  int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
-+		   0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
-+  int8_t e[16];
-+  int i;
-+
-+  a = vld1q_f64 (c);
-+  b = wrap_vreinterpretq_s8_f64 (a);
-+  vst1q_s8 (e, b);
-+  for (i = 0; i < 16; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s16_f64 (float64x2_t __a)
-+{
-+  return vreinterpretq_s16_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s16_f64 ()
-+{
-+  float64x2_t a;
-+  int16x8_t b;
-+  float64_t c[2] = { PI_F64, E_F64 };
-+  int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
-+		   0x5769, 0x8B14, 0xBF0A, 0x4005 };
-+  int16_t e[8];
-+  int i;
-+
-+  a = vld1q_f64 (c);
-+  b = wrap_vreinterpretq_s16_f64 (a);
-+  vst1q_s16 (e, b);
-+  for (i = 0; i < 8; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s32_f64 (float64x2_t __a)
-+{
-+  return vreinterpretq_s32_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s32_f64 ()
-+{
-+  float64x2_t a;
-+  int32x4_t b;
-+  float64_t c[2] = { PI_F64, E_F64 };
-+  int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
-+  int32_t e[4];
-+  int i;
-+
-+  a = vld1q_f64 (c);
-+  b = wrap_vreinterpretq_s32_f64 (a);
-+  vst1q_s32 (e, b);
-+  for (i = 0; i < 4; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_s64_f64 (float64x2_t __a)
-+{
-+  return vreinterpretq_s64_f64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_s64_f64 ()
-+{
-+  float64x2_t a;
-+  int64x2_t b;
-+  float64_t c[2] = { PI_F64, E_F64 };
-+  int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
-+  int64_t e[2];
-+  int i;
-+
-+  a = vld1q_f64 (c);
-+  b = wrap_vreinterpretq_s64_f64 (a);
-+  vst1q_s64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (d[i] != e[i])
-+      return 1;
-+  return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_f32 (float32x2_t __a)
-+{
-+  return vreinterpret_f64_f32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_f32 ()
-+{
-+  float32x2_t a;
-+  float64x1_t b;
-+  /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32.  */
-+  float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 };
-+  float64_t d[1] = { PI_F64 };
-+  float64_t e[1];
-+  int i;
-+
-+  a = vld1_f32 (c);
-+  b = wrap_vreinterpret_f64_f32 (a);
-+  vst1_f64 (e, b);
-+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+    return 1;
-+  return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s8 (int8x8_t __a)
-+{
-+  return vreinterpret_f64_s8 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s8 ()
-+{
-+  int8x8_t a;
-+  float64x1_t b;
-+  int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 };
-+  float64_t d[1] = { PI_F64 };
-+  float64_t e[1];
-+  int i;
-+
-+  a = vld1_s8 (c);
-+  b = wrap_vreinterpret_f64_s8 (a);
-+  vst1_f64 (e, b);
-+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+    return 1;
-+  return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s16 (int16x4_t __a)
-+{
-+  return vreinterpret_f64_s16 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s16 ()
-+{
-+  int16x4_t a;
-+  float64x1_t b;
-+  int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 };
-+  float64_t d[1] = { PI_F64 };
-+  float64_t e[1];
-+  int i;
-+
-+  a = vld1_s16 (c);
-+  b = wrap_vreinterpret_f64_s16 (a);
-+  vst1_f64 (e, b);
-+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+    return 1;
-+  return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s32 (int32x2_t __a)
-+{
-+  return vreinterpret_f64_s32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s32 ()
-+{
-+  int32x2_t a;
-+  float64x1_t b;
-+  int32_t c[2] = { 0x54442D18, 0x400921FB };
-+  float64_t d[1] = { PI_F64 };
-+  float64_t e[1];
-+  int i;
-+
-+  a = vld1_s32 (c);
-+  b = wrap_vreinterpret_f64_s32 (a);
-+  vst1_f64 (e, b);
-+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+    return 1;
-+  return 0;
-+};
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vreinterpret_f64_s64 (int64x1_t __a)
-+{
-+  return vreinterpret_f64_s64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpret_f64_s64 ()
-+{
-+  int64x1_t a;
-+  float64x1_t b;
-+  int64_t c[1] = { 0x400921FB54442D18 };
-+  float64_t d[1] = { PI_F64 };
-+  float64_t e[1];
-+
-+  a = vld1_s64 (c);
-+  b = wrap_vreinterpret_f64_s64 (a);
-+  vst1_f64 (e, b);
-+  if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__))
-+    return 1;
-+  return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_f32 (float32x4_t __a)
-+{
-+  return vreinterpretq_f64_f32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_f32 ()
-+{
-+  float32x4_t a;
-+  float64x2_t b;
-+  /* Values corresponding to f32 reinterpret of
-+     { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }.  */
-+  float32_t c[4] = { 3.3702805504E12,
-+		     2.1426990032196044921875E0,
-+		     -2.8569523269651966444143014594E-32,
-+		     2.089785099029541015625E0 };
-+
-+  float64_t d[2] = { PI_F64, E_F64 };
-+  float64_t e[2];
-+  int i;
-+
-+  a = vld1q_f32 (c);
-+  b = wrap_vreinterpretq_f64_f32 (a);
-+  vst1q_f64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s8 (int8x16_t __a)
-+{
-+  return vreinterpretq_f64_s8 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s8 ()
-+{
-+  int8x16_t a;
-+  float64x2_t b;
-+  int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40,
-+		   0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 };
-+  float64_t d[2] = { PI_F64, E_F64 };
-+  float64_t e[2];
-+  int i;
-+
-+  a = vld1q_s8 (c);
-+  b = wrap_vreinterpretq_f64_s8 (a);
-+  vst1q_f64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s16 (int16x8_t __a)
-+{
-+  return vreinterpretq_f64_s16 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s16 ()
-+{
-+  int16x8_t a;
-+  float64x2_t b;
-+  int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009,
-+		   0x5769, 0x8B14, 0xBF0A, 0x4005 };
-+  float64_t d[2] = { PI_F64, E_F64 };
-+  float64_t e[2];
-+  int i;
-+
-+  a = vld1q_s16 (c);
-+  b = wrap_vreinterpretq_f64_s16 (a);
-+  vst1q_f64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s32 (int32x4_t __a)
-+{
-+  return vreinterpretq_f64_s32 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s32 ()
-+{
-+  int32x4_t a;
-+  float64x2_t b;
-+  int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A };
-+  float64_t d[2] = { PI_F64, E_F64 };
-+  float64_t e[2];
-+  int i;
-+
-+  a = vld1q_s32 (c);
-+  b = wrap_vreinterpretq_f64_s32 (a);
-+  vst1q_f64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vreinterpretq_f64_s64 (int64x2_t __a)
-+{
-+  return vreinterpretq_f64_s64 (__a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vreinterpretq_f64_s64 ()
-+{
-+  int64x2_t a;
-+  float64x2_t b;
-+  int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 };
-+  float64_t d[2] = { PI_F64, E_F64 };
-+  float64_t e[2];
-+  int i;
-+
-+  a = vld1q_s64 (c);
-+  b = wrap_vreinterpretq_f64_s64 (a);
-+  vst1q_f64 (e, b);
-+  for (i = 0; i < 2; i++)
-+    if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__))
-+      return 1;
-+  return 0;
-+};
-+
-+int
-+main (int argc, char **argv)
-+{
-+  if (test_vreinterpret_f32_f64 ())
-+    abort ();
-+
-+  if (test_vreinterpret_s8_f64 ())
-+    abort ();
-+  if (test_vreinterpret_s16_f64 ())
-+    abort ();
-+  if (test_vreinterpret_s32_f64 ())
-+    abort ();
-+  if (test_vreinterpret_s64_f64 ())
-+    abort ();
-+
-+  if (test_vreinterpretq_f32_f64 ())
-+    abort ();
-+
-+  if (test_vreinterpretq_s8_f64 ())
-+    abort ();
-+  if (test_vreinterpretq_s16_f64 ())
-+    abort ();
-+  if (test_vreinterpretq_s32_f64 ())
-+    abort ();
-+  if (test_vreinterpretq_s64_f64 ())
-+    abort ();
-+
-+  if (test_vreinterpret_f64_f32 ())
-+    abort ();
-+
-+  if (test_vreinterpret_f64_s8 ())
-+    abort ();
-+  if (test_vreinterpret_f64_s16 ())
-+    abort ();
-+  if (test_vreinterpret_f64_s32 ())
-+    abort ();
-+  if (test_vreinterpret_f64_s64 ())
-+    abort ();
-+
-+  if (test_vreinterpretq_f64_f32 ())
-+    abort ();
-+
-+  if (test_vreinterpretq_f64_s8 ())
-+    abort ();
-+  if (test_vreinterpretq_f64_s16 ())
-+    abort ();
-+  if (test_vreinterpretq_f64_s32 ())
-+    abort ();
-+  if (test_vreinterpretq_f64_s64 ())
-+    abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_fp_attribute_1.c
-@@ -21,6 +21,6 @@
-   leaf ();
- }
- 
--/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
- 
- /* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vect.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect.x
-@@ -2,6 +2,7 @@
- typedef unsigned int *__restrict__ pRUINT;
- typedef long long *__restrict__ pRINT64;
- typedef unsigned long long *__restrict__ pRUINT64;
-+extern int abs (int j);
- 
- void test_orn (pRUINT a, pRUINT b, pRUINT c)
- {
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_14.c
-@@ -0,0 +1,12 @@
-+/* Verify:
-+     * with outgoing.
-+     * total frame size > 512.
-+     * number of callee-save reg >= 2.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test14, 700, , 8, a[8])
-+t_frame_run (test14)
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_3.c
-@@ -0,0 +1,14 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * without outgoing.
-+     * total frame size <= 512 but > 256.
-+     * number of callee-save reg == 1.
-+     * we can't use "str !" to optimize stack adjustment.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test3, 400, )
-+t_frame_run (test3)
---- a/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-constantpool1.c
-@@ -2,10 +2,13 @@
- /* { dg-do compile } */
- 
- extern int __finite (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __finitef (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __signbit (double __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
-+extern int __signbitf (float __value) __attribute__ ((__nothrow__)) __attribute__ ((__const__));
- int
- __ecvt_r (value, ndigit, decpt, sign, buf, len)
-      double value;
--     int ndigit, *decpt, *sign;
-+     int ndigit, *decpt, *sign, len;
-      char *buf;
- {
-   if ((sizeof (value) == sizeof (float) ? __finitef (value) : __finite (value)) && value != 0.0)
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_s64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_s64 AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+int64_t in[SIZE] = { -4l, 4l, -2l, 2l, -1l, 1l };
-+
-+int
-+main (void)
-+{
-+  int i;
-+
-+  for (i = 0; i < SIZE / 2; ++i)
-+    if (vpaddd_s64 (vld1q_s64 (in + 2 * i)) != 0)
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vextq_s16_1 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 1);
-+}
-+
-+int16x8_t
-+test_vextq_s16_2 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 2);
-+}
-+
-+int16x8_t
-+test_vextq_s16_3 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 3);
-+}
-+
-+int16x8_t
-+test_vextq_s16_4 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 4);
-+}
-+
-+int16x8_t
-+test_vextq_s16_5 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 5);
-+}
-+
-+int16x8_t
-+test_vextq_s16_6 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 6);
-+}
-+
-+int16x8_t
-+test_vextq_s16_7 (int16x8_t a, int16x8_t b)
-+{
-+  return vextq_s16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  int16x8_t in1 = vld1q_s16 (arr1);
-+  int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  int16x8_t in2 = vld1q_s16 (arr2);
-+  int16_t exp[8];
-+  int16x8_t expected;
-+  int16x8_t actual = test_vextq_s16_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s16_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_s16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_u64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_u64 AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+uint64_t in[SIZE] = { 4ul, 4ul, 2ul, 2ul, 1ul, 1ul };
-+
-+int
-+main (void)
-+{
-+  int i;
-+
-+  for (i = 0; i < SIZE / 2; ++i)
-+    if (vpaddd_u64 (vld1q_u64 (in + 2 * i)) != 2 * in[2 * i])
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler "addp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vext_u8_1 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 1);
-+}
-+
-+uint8x8_t
-+test_vext_u8_2 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 2);
-+}
-+
-+uint8x8_t
-+test_vext_u8_3 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 3);
-+}
-+
-+uint8x8_t
-+test_vext_u8_4 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 4);
-+}
-+
-+uint8x8_t
-+test_vext_u8_5 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 5);
-+}
-+
-+uint8x8_t
-+test_vext_u8_6 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 6);
-+}
-+
-+uint8x8_t
-+test_vext_u8_7 (uint8x8_t a, uint8x8_t b)
-+{
-+  return vext_u8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  uint8x8_t in1 = vld1_u8 (arr1);
-+  uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  uint8x8_t in2 = vld1_u8 (arr2);
-+  uint8_t exp[8];
-+  uint8x8_t expected;
-+  uint8x8_t actual = test_vext_u8_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u8_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1_u8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vextq_u16_1 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 1);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_2 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 2);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_3 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 3);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_4 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 4);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_5 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 5);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_6 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 6);
-+}
-+
-+uint16x8_t
-+test_vextq_u16_7 (uint16x8_t a, uint16x8_t b)
-+{
-+  return vextq_u16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  uint16x8_t in1 = vld1q_u16 (arr1);
-+  uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  uint16x8_t in2 = vld1q_u16 (arr2);
-+  uint16_t exp[8];
-+  uint16x8_t expected;
-+  uint16x8_t actual = test_vextq_u16_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u16_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_u16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vuzpqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+  return vuzpq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+  int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  int16x8_t expect1 = vld1q_s16 (exp1);
-+  int16x8_t expect2 = vld1q_s16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+  return vuzpq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+  uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  uint16x8_t expect1 = vld1q_u16 (exp1);
-+  uint16x8_t expect2 = vld1q_u16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vuzpu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+  return vuzp_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second));
-+  uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  uint8x8_t expect1 = vld1_u8 (exp1);
-+  uint8x8_t expect2 = vld1_u8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev64qu8 (uint8x16_t _arg)
-+{
-+  return vrev64q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x16_t reversed = test_vrev64qu8 (inorder);
-+  uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vuzps32 (int32x2_t _a, int32x2_t _b)
-+{
-+  return vuzp_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2};
-+  int32_t second[] = {3, 4};
-+  int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second));
-+  int32_t exp1[] = {1, 3};
-+  int32_t exp2[] = {2, 4};
-+  int32x2_t expect1 = vld1_s32 (exp1);
-+  int32x2_t expect2 = vld1_s32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x
-@@ -0,0 +1,17 @@
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int64_t arr1[] = {0};
-+  int64x1_t in1 = vld1_s64 (arr1);
-+  int64_t arr2[] = {1};
-+  int64x1_t in2 = vld1_s64 (arr2);
-+  int64x1_t actual = vext_s64 (in1, in2, 0);
-+  if (actual != in1)
-+    abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vuzpu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+  return vuzp_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2};
-+  uint32_t second[] = {3, 4};
-+  uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second));
-+  uint32_t exp1[] = {1, 3};
-+  uint32_t exp2[] = {2, 4};
-+  uint32x2_t expect1 = vld1_u32 (exp1);
-+  uint32x2_t expect2 = vld1_u32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x
-@@ -0,0 +1,17 @@
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint64_t arr1[] = {0};
-+  uint64x1_t in1 = vld1_u64 (arr1);
-+  uint64_t arr2[] = {1};
-+  uint64x1_t in2 = vld1_u64 (arr2);
-+  uint64x1_t actual = vext_u64 (in1, in2, 0);
-+  if (actual != in1)
-+    abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vzipqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+  return vzipq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+  int16x8_t res1 = result.val[0], res2 = result.val[1];
-+  int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  int16x8_t expected1 = vld1q_s16 (exp1);
-+  int16x8_t expected2 = vld1q_s16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vzipf32 (float32x2_t _a, float32x2_t _b)
-+{
-+  return vzip_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2};
-+  float32_t second[] = {3, 4};
-+  float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second));
-+  float32x2_t res1 = result.val[0], res2 = result.val[1];
-+  float32_t exp1[] = {1, 3};
-+  float32_t exp2[] = {2, 4};
-+  float32x2_t expected1 = vld1_f32 (exp1);
-+  float32x2_t expected2 = vld1_f32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vzipu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+  return vzip_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second));
-+  uint8x8_t res1 = result.val[0], res2 = result.val[1];
-+  uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  uint8x8_t expected1 = vld1_u8 (exp1);
-+  uint8x8_t expected2 = vld1_u8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vzipqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+  return vzipq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+  uint16x8_t res1 = result.val[0], res2 = result.val[1];
-+  uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  uint16x8_t expected1 = vld1q_u16 (exp1);
-+  uint16x8_t expected2 = vld1q_u16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqp16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vext_p8_1 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 1);
-+}
-+
-+poly8x8_t
-+test_vext_p8_2 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 2);
-+}
-+
-+poly8x8_t
-+test_vext_p8_3 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 3);
-+}
-+
-+poly8x8_t
-+test_vext_p8_4 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 4);
-+}
-+
-+poly8x8_t
-+test_vext_p8_5 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 5);
-+}
-+
-+poly8x8_t
-+test_vext_p8_6 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 6);
-+}
-+
-+poly8x8_t
-+test_vext_p8_7 (poly8x8_t a, poly8x8_t b)
-+{
-+  return vext_p8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  poly8x8_t in1 = vld1_p8 (arr1);
-+  poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  poly8x8_t in2 = vld1_p8 (arr2);
-+  poly8_t exp[8];
-+  poly8x8_t expected;
-+  poly8x8_t actual = test_vext_p8_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p8_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1_p8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32s16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vuzpp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+  return vuzp_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second));
-+  poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  poly8x8_t expect1 = vld1_p8 (exp1);
-+  poly8x8_t expect2 = vld1_p8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqp8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp
-@@ -0,0 +1,45 @@
-+#  Specific regression driver for AArch64 SIMD instructions.
-+#  Copyright (C) 2014 Free Software Foundation, Inc.
-+#  Contributed by ARM Ltd.
-+#
-+#  This file is part of GCC.
-+#
-+#  GCC is free software; you can redistribute it and/or modify it
-+#  under the terms of the GNU General Public License as published by
-+#  the Free Software Foundation; either version 3, or (at your option)
-+#  any later version.
-+#
-+#  GCC is distributed in the hope that it will be useful, but
-+#  WITHOUT ANY WARRANTY; without even the implied warranty of
-+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+#  General Public License for more details.
-+#
-+#  You should have received a copy of the GNU General Public License
-+#  along with GCC; see the file COPYING3.  If not see
-+#  <http://www.gnu.org/licenses/>.  */
-+
-+# GCC testsuite that uses the `dg.exp' driver.
-+
-+# Exit immediately if this isn't an AArch64 target.
-+if {![istarget aarch64*-*-*] } then {
-+  return
-+}
-+
-+# Load support procs.
-+load_lib gcc-dg.exp
-+
-+# If a testcase doesn't have special options, use these.
-+global DEFAULT_CFLAGS
-+if ![info exists DEFAULT_CFLAGS] then {
-+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
-+}
-+
-+# Initialize `dg'.
-+dg-init
-+
-+# Main loop.
-+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
-+	"" $DEFAULT_CFLAGS
-+
-+# All done.
-+dg-finish
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vtrns16 (int16x4_t _a, int16x4_t _b)
-+{
-+  return vtrn_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4};
-+  int16_t second[] = {5, 6, 7, 8};
-+  int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second));
-+  int16x4_t res1 = result.val[0], res2 = result.val[1];
-+  int16_t exp1[] = {1, 5, 3, 7};
-+  int16_t exp2[] = {2, 6, 4, 8};
-+  int16x4_t expected1 = vld1_s16 (exp1);
-+  int16x4_t expected2 = vld1_s16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev64qp8 (poly8x16_t _arg)
-+{
-+  return vrev64q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x16_t reversed = test_vrev64qp8 (inorder);
-+  poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vtrnu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+  return vtrn_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4};
-+  uint16_t second[] = {5, 6, 7, 8};
-+  uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second));
-+  uint16x4_t res1 = result.val[0], res2 = result.val[1];
-+  uint16_t exp1[] = {1, 5, 3, 7};
-+  uint16_t exp2[] = {2, 6, 4, 8};
-+  uint16x4_t expected1 = vld1_u16 (exp1);
-+  uint16x4_t expected2 = vld1_u16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vext_p16_1 (poly16x4_t a, poly16x4_t b)
-+{
-+  return vext_p16 (a, b, 1);
-+}
-+
-+poly16x4_t
-+test_vext_p16_2 (poly16x4_t a, poly16x4_t b)
-+{
-+  return vext_p16 (a, b, 2);
-+}
-+
-+poly16x4_t
-+test_vext_p16_3 (poly16x4_t a, poly16x4_t b)
-+{
-+  return vext_p16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  poly16_t arr1[] = {0, 1, 2, 3};
-+  poly16x4_t in1 = vld1_p16 (arr1);
-+  poly16_t arr2[] = {4, 5, 6, 7};
-+  poly16x4_t in2 = vld1_p16 (arr2);
-+  poly16_t exp[4];
-+  poly16x4_t expected;
-+  poly16x4_t actual = test_vext_p16_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_p16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p16_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_p16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_p16_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_p16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpp16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vzipqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+  return vzipq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+  uint8x16_t res1 = result.val[0], res2 = result.val[1];
-+  uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+  uint8_t exp2[] =
-+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+  uint8x16_t expected1 = vld1q_u8 (exp1);
-+  uint8x16_t expected2 = vld1q_u8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vextu64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u64.x"
-+
-+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
-+   return its first argument, so it is legitimate to optimize it out.  */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qp16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+float32x4_t
-+test_vextq_f32_1 (float32x4_t a, float32x4_t b)
-+{
-+  return vextq_f32 (a, b, 1);
-+}
-+
-+float32x4_t
-+test_vextq_f32_2 (float32x4_t a, float32x4_t b)
-+{
-+  return vextq_f32 (a, b, 2);
-+}
-+
-+float32x4_t
-+test_vextq_f32_3 (float32x4_t a, float32x4_t b)
-+{
-+  return vextq_f32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  float32_t arr1[] = {0, 1, 2, 3};
-+  float32x4_t in1 = vld1q_f32 (arr1);
-+  float32_t arr2[] = {4, 5, 6, 7};
-+  float32x4_t in2 = vld1q_f32 (arr2);
-+  float32_t exp[4];
-+  float32x4_t expected;
-+  float32x4_t actual = test_vextq_f32_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_f32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_f32_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_f32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_f32_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_f32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqp16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnp8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vextq_u8_1 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 1);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_2 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 2);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_3 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 3);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_4 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 4);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_5 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 5);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_6 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 6);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_7 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 7);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_8 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 8);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_9 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 9);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_10 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 10);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_11 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 11);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_12 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 12);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_13 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 13);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_14 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 14);
-+}
-+
-+uint8x16_t
-+test_vextq_u8_15 (uint8x16_t a, uint8x16_t b)
-+{
-+  return vextq_u8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+  uint8x16_t in1 = vld1q_u8 (arr1);
-+  uint8_t arr2[] =
-+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+  uint8x16_t in2 = vld1q_u8 (arr2);
-+  uint8_t exp[16];
-+  uint8x16_t expected;
-+  uint8x16_t actual = test_vextq_u8_1 (in1, in2);
-+
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_2 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_3 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_4 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_5 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_6 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_7 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_8 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 8;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_9 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 9;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_10 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 10;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_11 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 11;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_12 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 12;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_13 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 13;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_14 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 14;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u8_15 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 15;
-+  expected = vld1q_u8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vuzpqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+  return vuzpq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2, 3, 4};
-+  float32_t second[] = {5, 6, 7, 8};
-+  float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+  float32_t exp1[] = {1, 3, 5, 7};
-+  float32_t exp2[] = {2, 4, 6, 8};
-+  float32x4_t expect1 = vld1q_f32 (exp1);
-+  float32x4_t expect2 = vld1q_f32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vzipp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+  return vzip_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second));
-+  poly8x8_t res1 = result.val[0], res2 = result.val[1];
-+  poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  poly8x8_t expected1 = vld1_p8 (exp1);
-+  poly8x8_t expected2 = vld1_p8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vtrnqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+  return vtrnq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2, 3, 4};
-+  int32_t second[] = {5, 6, 7, 8};
-+  int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+  int32x4_t res1 = result.val[0], res2 = result.val[1];
-+  int32_t exp1[] = {1, 5, 3, 7};
-+  int32_t exp2[] = {2, 6, 4, 8};
-+  int32x4_t expected1 = vld1q_s32 (exp1);
-+  int32x4_t expected2 = vld1q_s32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_2.c
-@@ -0,0 +1,131 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline" } */
-+/* Stops the test_xxx methods being inlined into main, thus preventing constant
-+   propagation.  */
-+
-+#include "int_comparisons.x"
-+
-+extern void abort (void);
-+
-+#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort ()
-+
-+#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {			\
-+  BASETYPE##_t _a[2] = {2, 3};						\
-+  BASETYPE##x2_t a = vld1##SUFFIX (_a);					\
-+  BASETYPE##_t _b[2] = {1, 3};						\
-+  BASETYPE##x2_t b = vld1##SUFFIX (_b);					\
-+  RESTYPE res[2];							\
-+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0);	\
-+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0);	\
-+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0);	\
-+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0);	\
-+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \
-+}
-+
-+#define CHECK4(T, R0, R1, R2, R3)		\
-+  if (res[0] != (T)R0 || res[1] != (T)R1	\
-+      || res[2] != (T)R2 || res[3] != (T)R3) abort ()
-+
-+#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {	\
-+  BASETYPE##_t _a[4] = {1, 2, 3, 4};			\
-+  BASETYPE##x4_t a = vld1##SUFFIX (_a);			\
-+  BASETYPE##_t _b[4] = {4, 2, 1, 3};			\
-+  BASETYPE##x4_t b = vld1##SUFFIX (_b);			\
-+  RESTYPE res[4];					\
-+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, -1, 0, 0, 0);			\
-+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, -1, -1, 0, 0);			\
-+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, 0, -1, 0, 0);			\
-+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, 0, -1, -1, -1);			\
-+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, 0, 0, -1, -1);			\
-+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));	\
-+  CHECK4 (RESTYPE, 0, -1, -1, 0);			\
-+}
-+
-+#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7)			       \
-+  if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
-+      || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6		       \
-+      || res[7] != (T)R7) abort ()
-+
-+#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {	\
-+  BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8};	\
-+  BASETYPE##x8_t a = vld1##SUFFIX (_a);			\
-+  BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9};	\
-+  BASETYPE##x8_t b = vld1##SUFFIX (_b);			\
-+  RESTYPE res[8];					\
-+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1);		\
-+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1);	\
-+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0);		\
-+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0);	\
-+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0);		\
-+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));	\
-+  CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1);		\
-+}
-+
-+/* 16-way tests use same 8 values twice.  */
-+#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7)			       \
-+  if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
-+      || res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6		       \
-+      || res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1		       \
-+      || res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4	       \
-+      || res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort ()
-+
-+#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) {			  \
-+  BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \
-+  BASETYPE##x16_t a = vld1##SUFFIX (_a);				  \
-+  BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \
-+  BASETYPE##x16_t b = vld1##SUFFIX (_b);				  \
-+  RESTYPE res[16];							  \
-+  vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1);				  \
-+  vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1);			  \
-+  vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0);				  \
-+  vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0);			  \
-+  vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0);				  \
-+  vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b));			  \
-+  CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1);			  \
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  TEST2 (int32, _s32, uint32_t, _u32);
-+  TEST2 (uint32, _u32, uint32_t, _u32);
-+  TEST2 (int64, q_s64, uint64_t, q_u64);
-+  TEST2 (uint64, q_u64, uint64_t, q_u64);
-+
-+  TEST4 (int16, _s16, uint16_t, _u16);
-+  TEST4 (uint16, _u16, uint16_t, _u16);
-+  TEST4 (int32, q_s32, uint32_t, q_u32);
-+  TEST4 (uint32, q_u32, uint32_t, q_u32);
-+
-+  TEST8 (int8, _s8, uint8_t, _u8);
-+  TEST8 (uint8, _u8, uint8_t, _u8);
-+  TEST8 (int16, q_s16, uint16_t, q_u16);
-+  TEST8 (uint16, q_u16, uint16_t, q_u16);
-+
-+  TEST16 (int8, q_s8, uint8_t, q_u8);
-+  TEST16 (uint8, q_u8, uint8_t, q_u8);
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+  return vtrnq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2, 3, 4};
-+  uint32_t second[] = {5, 6, 7, 8};
-+  uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+  uint32x4_t res1 = result.val[0], res2 = result.val[1];
-+  uint32_t exp1[] = {1, 5, 3, 7};
-+  uint32_t exp2[] = {2, 6, 4, 8};
-+  uint32x4_t expected1 = vld1q_u32 (exp1);
-+  uint32x4_t expected2 = vld1q_u32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int32x4_t
-+test_vrev64qs32 (int32x4_t _arg)
-+{
-+  return vrev64q_s32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32x4_t inorder = {1, 2, 3, 4};
-+  int32x4_t reversed = test_vrev64qs32 (inorder);
-+  int32x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x8x2_t
-+test_vtrnu8 (uint8x8_t _a, uint8x8_t _b)
-+{
-+  return vtrn_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second));
-+  uint8x8_t res1 = result.val[0], res2 = result.val[1];
-+  uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  uint8x8_t expected1 = vld1_u8 (exp1);
-+  uint8x8_t expected2 = vld1_u8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint32x4_t
-+test_vrev64qu32 (uint32x4_t _arg)
-+{
-+  return vrev64q_u32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32x4_t inorder = {1, 2, 3, 4};
-+  uint32x4_t reversed = test_vrev64qu32 (inorder);
-+  uint32x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s64.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vext_s8_1 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 1);
-+}
-+
-+int8x8_t
-+test_vext_s8_2 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 2);
-+}
-+
-+int8x8_t
-+test_vext_s8_3 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 3);
-+}
-+
-+int8x8_t
-+test_vext_s8_4 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 4);
-+}
-+
-+int8x8_t
-+test_vext_s8_5 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 5);
-+}
-+
-+int8x8_t
-+test_vext_s8_6 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 6);
-+}
-+
-+int8x8_t
-+test_vext_s8_7 (int8x8_t a, int8x8_t b)
-+{
-+  return vext_s8 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  int8x8_t in1 = vld1_s8 (arr1);
-+  int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  int8x8_t in2 = vld1_s8 (arr2);
-+  int8_t exp[8];
-+  int8x8_t expected;
-+  int8x8_t actual = test_vext_s8_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s8_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1_s8 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnp16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vuzps8 (int8x8_t _a, int8x8_t _b)
-+{
-+  return vuzp_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second));
-+  int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  int8x8_t expect1 = vld1_s8 (exp1);
-+  int8x8_t expect2 = vld1_s8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vzipqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+  return vzipq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+  poly8x16_t res1 = result.val[0], res2 = result.val[1];
-+  poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+  poly8_t exp2[] =
-+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+  poly8x16_t expected1 = vld1q_p8 (exp1);
-+  poly8x16_t expected2 = vld1q_p8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextp16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_p16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vrev32s16 (int16x4_t _arg)
-+{
-+  return vrev32_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16x4_t inorder = {1, 2, 3, 4};
-+  int16x4_t reversed = test_vrev32s16 (inorder);
-+  int16x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vrev32u16 (uint16x4_t _arg)
-+{
-+  return vrev32_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16x4_t inorder = {1, 2, 3, 4};
-+  uint16x4_t reversed = test_vrev32u16 (inorder);
-+  uint16x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vrev64p16 (poly16x4_t _arg)
-+{
-+  return vrev64_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16x4_t inorder = {1, 2, 3, 4};
-+  poly16x4_t reversed = test_vrev64p16 (inorder);
-+  poly16x4_t expected = {4, 3, 2, 1};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qf32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vzipqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+  return vzipq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2, 3, 4};
-+  float32_t second[] = {5, 6, 7, 8};
-+  float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+  float32x4_t res1 = result.val[0], res2 = result.val[1];
-+  float32_t exp1[] = {1, 5, 2, 6};
-+  float32_t exp2[] = {3, 7, 4, 8};
-+  float32x4_t expected1 = vld1q_f32 (exp1);
-+  float32x4_t expected2 = vld1q_f32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vextq_p8_1 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 1);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_2 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 2);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_3 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 3);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_4 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 4);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_5 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 5);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_6 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 6);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_7 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 7);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_8 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 8);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_9 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 9);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_10 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 10);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_11 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 11);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_12 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 12);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_13 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 13);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_14 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 14);
-+}
-+
-+poly8x16_t
-+test_vextq_p8_15 (poly8x16_t a, poly8x16_t b)
-+{
-+  return vextq_p8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+  poly8x16_t in1 = vld1q_p8 (arr1);
-+  poly8_t arr2[] =
-+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+  poly8x16_t in2 = vld1q_p8 (arr2);
-+  poly8_t exp[16];
-+  poly8x16_t expected;
-+  poly8x16_t actual = test_vextq_p8_1 (in1, in2);
-+
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_2 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_3 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_4 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_5 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_6 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_7 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_8 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 8;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_9 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 9;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_10 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 10;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_11 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 11;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_12 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 12;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_13 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 13;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_14 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 14;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p8_15 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 15;
-+  expected = vld1q_p8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev64qs8 (int8x16_t _arg)
-+{
-+  return vrev64q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x16_t reversed = test_vrev64qs8 (inorder);
-+  int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16p8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vuzps16 (int16x4_t _a, int16x4_t _b)
-+{
-+  return vuzp_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4};
-+  int16_t second[] = {5, 6, 7, 8};
-+  int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second));
-+  int16_t exp1[] = {1, 3, 5, 7};
-+  int16_t exp2[] = {2, 4, 6, 8};
-+  int16x4_t expect1 = vld1_s16 (exp1);
-+  int16x4_t expect2 = vld1_s16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vuzpu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+  return vuzp_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4};
-+  uint16_t second[] = {5, 6, 7, 8};
-+  uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second));
-+  uint16_t exp1[] = {1, 3, 5, 7};
-+  uint16_t exp2[] = {2, 4, 6, 8};
-+  uint16x4_t expect1 = vld1_u16 (exp1);
-+  uint16x4_t expect2 = vld1_u16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnu8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x8x2_t
-+test_vtrnp8 (poly8x8_t _a, poly8x8_t _b)
-+{
-+  return vtrn_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second));
-+  poly8x8_t res1 = result.val[0], res2 = result.val[1];
-+  poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  poly8x8_t expected1 = vld1_p8 (exp1);
-+  poly8x8_t expected2 = vld1_p8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vrev32qs16 (int16x8_t _arg)
-+{
-+  return vrev32q_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int16x8_t reversed = test_vrev32qs16 (inorder);
-+  int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64f32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vzips8 (int8x8_t _a, int8x8_t _b)
-+{
-+  return vzip_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second));
-+  int8x8_t res1 = result.val[0], res2 = result.val[1];
-+  int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  int8x8_t expected1 = vld1_s8 (exp1);
-+  int8x8_t expected2 = vld1_s8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vrev32qu16 (uint16x8_t _arg)
-+{
-+  return vrev32q_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint16x8_t reversed = test_vrev32qu16 (inorder);
-+  uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vtrnf32 (float32x2_t _a, float32x2_t _b)
-+{
-+  return vtrn_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2};
-+  float32_t second[] = {3, 4};
-+  float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second));
-+  float32x2_t res1 = result.val[0], res2 = result.val[1];
-+  float32_t exp1[] = {1, 3};
-+  float32_t exp2[] = {2, 4};
-+  float32x2_t expected1 = vld1_f32 (exp1);
-+  float32x2_t expected2 = vld1_f32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev16u8 (uint8x8_t _arg)
-+{
-+  return vrev16_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8x8_t reversed = test_vrev16u8 (inorder);
-+  uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+int64x2_t
-+test_vextq_s64_1 (int64x2_t a, int64x2_t b)
-+{
-+  return vextq_s64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int64_t arr1[] = {0, 1};
-+  int64x2_t in1 = vld1q_s64 (arr1);
-+  int64_t arr2[] = {2, 3};
-+  int64x2_t in2 = vld1q_s64 (arr2);
-+  int64_t exp[2];
-+  int64x2_t expected;
-+  int64x2_t actual = test_vextq_s64_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_s64 (exp);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vzipp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+  return vzip_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4};
-+  poly16_t second[] = {5, 6, 7, 8};
-+  poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second));
-+  poly16x4_t res1 = result.val[0], res2 = result.val[1];
-+  poly16_t exp1[] = {1, 5, 2, 6};
-+  poly16_t exp2[] = {3, 7, 4, 8};
-+  poly16x4_t expected1 = vld1_p16 (exp1);
-+  poly16x4_t expected2 = vld1_p16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+uint64x2_t
-+test_vextq_u64_1 (uint64x2_t a, uint64x2_t b)
-+{
-+  return vextq_u64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint64_t arr1[] = {0, 1};
-+  uint64x2_t in1 = vld1q_u64 (arr1);
-+  uint64_t arr2[] = {2, 3};
-+  uint64x2_t in2 = vld1q_u64 (arr2);
-+  uint64_t exp[2];
-+  uint64x2_t expected;
-+  uint64x2_t actual = test_vextq_u64_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_u64 (exp);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x
-@@ -0,0 +1,29 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vzipqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+  return vzipq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+  int8x16_t res1 = result.val[0], res2 = result.val[1];
-+  int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24};
-+  int8_t exp2[] =
-+      {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32};
-+  int8x16_t expected1 = vld1q_s8 (exp1);
-+  int8x16_t expected2 = vld1q_s8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+  return vtrnq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+  uint8x16_t res1 = result.val[0], res2 = result.val[1];
-+  uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+  uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+  uint8x16_t expected1 = vld1q_u8 (exp1);
-+  uint8x16_t expected2 = vld1q_u8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+int32x2_t
-+test_vext_s32_1 (int32x2_t a, int32x2_t b)
-+{
-+  return vext_s32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int32_t arr1[] = {0, 1};
-+  int32x2_t in1 = vld1_s32 (arr1);
-+  int32_t arr2[] = {2, 3};
-+  int32x2_t in2 = vld1_s32 (arr2);
-+  int32_t exp[2];
-+  int32x2_t expected;
-+  int32x2_t actual = test_vext_s32_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_s32 (exp);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+uint32x2_t
-+test_vext_u32_1 (uint32x2_t a, uint32x2_t b)
-+{
-+  return vext_u32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint32_t arr1[] = {0, 1};
-+  uint32x2_t in1 = vld1_u32 (arr1);
-+  uint32_t arr2[] = {2, 3};
-+  uint32x2_t in2 = vld1_u32 (arr2);
-+  uint32_t exp[2];
-+  uint32x2_t expected;
-+  uint32x2_t actual = test_vext_u32_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_u32 (exp);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x
-@@ -0,0 +1,227 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vextq_s8_1 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 1);
-+}
-+
-+int8x16_t
-+test_vextq_s8_2 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 2);
-+}
-+
-+int8x16_t
-+test_vextq_s8_3 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 3);
-+}
-+
-+int8x16_t
-+test_vextq_s8_4 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 4);
-+}
-+
-+int8x16_t
-+test_vextq_s8_5 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 5);
-+}
-+
-+int8x16_t
-+test_vextq_s8_6 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 6);
-+}
-+
-+int8x16_t
-+test_vextq_s8_7 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 7);
-+}
-+
-+int8x16_t
-+test_vextq_s8_8 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 8);
-+}
-+
-+int8x16_t
-+test_vextq_s8_9 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 9);
-+}
-+
-+int8x16_t
-+test_vextq_s8_10 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 10);
-+}
-+
-+int8x16_t
-+test_vextq_s8_11 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 11);
-+}
-+
-+int8x16_t
-+test_vextq_s8_12 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 12);
-+}
-+
-+int8x16_t
-+test_vextq_s8_13 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 13);
-+}
-+
-+int8x16_t
-+test_vextq_s8_14 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 14);
-+}
-+
-+int8x16_t
-+test_vextq_s8_15 (int8x16_t a, int8x16_t b)
-+{
-+  return vextq_s8 (a, b, 15);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-+  int8x16_t in1 = vld1q_s8 (arr1);
-+  int8_t arr2[] =
-+      {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
-+  int8x16_t in2 = vld1q_s8 (arr2);
-+  int8_t exp[16];
-+  int8x16_t expected;
-+  int8x16_t actual = test_vextq_s8_1 (in1, in2);
-+
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_2 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_3 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_4 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_5 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_6 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_7 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_8 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 8;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_9 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 9;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_10 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 10;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_11 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 11;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_12 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 12;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_13 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 13;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_14 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 14;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s8_15 (in1, in2);
-+  for (i = 0; i < 16; i++)
-+    exp[i] = i + 15;
-+  expected = vld1q_s8 (exp);
-+  for (i = 0; i < 16; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c
-@@ -0,0 +1,36 @@
-+/* Test the `vextq_f64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+extern void abort (void);
-+#include <stdio.h>
-+
-+float64x2_t
-+test_vextq_f64_1 (float64x2_t a, float64x2_t b)
-+{
-+  return vextq_f64 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  float64_t arr1[] = {0, 1};
-+  float64x2_t in1 = vld1q_f64 (arr1);
-+  float64_t arr2[] = {2, 3};
-+  float64x2_t in2 = vld1q_f64 (arr2);
-+  float64_t exp[] = {1, 2};
-+  float64x2_t expected = vld1q_f64 (exp);
-+  float64x2_t actual = test_vextq_f64_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vpaddd_f64.c
-@@ -0,0 +1,27 @@
-+/* Test the vpaddd_f64 AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3" } */
-+
-+#include "arm_neon.h"
-+
-+#define SIZE 6
-+
-+extern void abort (void);
-+
-+float64_t in[SIZE] = { -4.0, 4.0, -2.0, 2.0, -1.0, 1.0 };
-+
-+int
-+main (void)
-+{
-+  int i;
-+
-+  for (i = 0; i < SIZE / 2; ++i)
-+    if (vpaddd_f64 (vld1q_f64 (in + 2 * i)) != 0.0)
-+      abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler "faddp\[ \t\]+\[dD\]\[0-9\]+, v\[0-9\].2d+\n" } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qs16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev16p8 (poly8x8_t _arg)
-+{
-+  return vrev16_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8x8_t reversed = test_vrev16p8 (inorder);
-+  poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16u8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextp8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_p8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x8x2_t
-+test_vtrns8 (int8x8_t _a, int8x8_t _b)
-+{
-+  return vtrn_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second));
-+  int8x8_t res1 = result.val[0], res2 = result.val[1];
-+  int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  int8x8_t expected1 = vld1_s8 (exp1);
-+  int8x8_t expected2 = vld1_s8 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x8x2_t
-+test_vtrnqs16 (int16x8_t _a, int16x8_t _b)
-+{
-+  return vtrnq_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second));
-+  int16x8_t res1 = result.val[0], res2 = result.val[1];
-+  int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  int16x8_t expected1 = vld1q_s16 (exp1);
-+  int16x8_t expected2 = vld1q_s16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x8x2_t
-+test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b)
-+{
-+  return vtrnq_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second));
-+  uint16x8_t res1 = result.val[0], res2 = result.val[1];
-+  uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  uint16x8_t expected1 = vld1q_u16 (exp1);
-+  uint16x8_t expected2 = vld1q_u16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x
-@@ -0,0 +1,114 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vextq_p16_1 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 1);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_2 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 2);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_3 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 3);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_4 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 4);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_5 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 5);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_6 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 6);
-+}
-+
-+poly16x8_t
-+test_vextq_p16_7 (poly16x8_t a, poly16x8_t b)
-+{
-+  return vextq_p16 (a, b, 7);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7};
-+  poly16x8_t in1 = vld1q_p16 (arr1);
-+  poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15};
-+  poly16x8_t in2 = vld1q_p16 (arr2);
-+  poly16_t exp[8];
-+  poly16x8_t expected;
-+  poly16x8_t actual = test_vextq_p16_1 (in1, in2);
-+
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_2 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_3 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_4 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 4;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_5 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 5;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_6 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 6;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_p16_7 (in1, in2);
-+  for (i = 0; i < 8; i++)
-+    exp[i] = i + 7;
-+  expected = vld1q_p16 (exp);
-+  for (i = 0; i < 8; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x8_t
-+test_vrev64qs16 (int16x8_t _arg)
-+{
-+  return vrev64q_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int16x8_t reversed = test_vrev64qs16 (inorder);
-+  int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x8_t
-+test_vrev64qu16 (uint16x8_t _arg)
-+{
-+  return vrev64q_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint16x8_t reversed = test_vrev64qu16 (inorder);
-+  uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev64u8 (uint8x8_t _arg)
-+{
-+  return vrev64_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8x8_t reversed = test_vrev64u8 (inorder);
-+  uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+  return vuzpq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+  poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15};
-+  poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16};
-+  poly16x8_t expect1 = vld1q_p16 (exp1);
-+  poly16x8_t expect2 = vld1q_p16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+float32x2x2_t
-+test_vuzpf32 (float32x2_t _a, float32x2_t _b)
-+{
-+  return vuzp_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2};
-+  float32_t second[] = {3, 4};
-+  float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second));
-+  float32_t exp1[] = {1, 3};
-+  float32_t exp2[] = {2, 4};
-+  float32x2_t expect1 = vld1_f32 (exp1);
-+  float32x2_t expect2 = vld1_f32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqf32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqs8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+  return vtrnq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+  poly8x16_t res1 = result.val[0], res2 = result.val[1];
-+  poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+  poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+  poly8x16_t expected1 = vld1q_p8 (exp1);
-+  poly8x16_t expected2 = vld1q_p8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int32x2_t
-+test_vrev64s32 (int32x2_t _arg)
-+{
-+  return vrev64_s32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32x2_t inorder = {1, 2};
-+  int32x2_t reversed = test_vrev64s32 (inorder);
-+  int32x2_t expected = {2, 1};
-+
-+  for (i = 0; i < 2; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev32qu8 (uint8x16_t _arg)
-+{
-+  return vrev32q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x16_t reversed = test_vrev32qu8 (inorder);
-+  uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint32x2_t
-+test_vrev64u32 (uint32x2_t _arg)
-+{
-+  return vrev64_u32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32x2_t inorder = {1, 2};
-+  uint32x2_t reversed = test_vrev64u32 (inorder);
-+  uint32x2_t expected = {2, 1};
-+
-+  for (i = 0; i < 2; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQf32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_f32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x16_t
-+test_vrev16qu8 (uint8x16_t _arg)
-+{
-+  return vrev16q_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8x16_t reversed = test_vrev16qu8 (inorder);
-+  uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqp8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qp16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vzipqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+  return vzipq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+  poly16x8_t res1 = result.val[0], res2 = result.val[1];
-+  poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12};
-+  poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16};
-+  poly16x8_t expected1 = vld1q_p16 (exp1);
-+  poly16x8_t expected2 = vld1q_p16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qu32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint8x16x2_t
-+test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b)
-+{
-+  return vuzpq_u8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  uint8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second));
-+  uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+  uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+  uint8x16_t expect1 = vld1q_u8 (exp1);
-+  uint8x16_t expect2 = vld1q_u8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev64p8 (poly8x8_t _arg)
-+{
-+  return vrev64_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8x8_t reversed = test_vrev64p8 (inorder);
-+  poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint8x8_t
-+test_vrev32u8 (uint8x8_t _arg)
-+{
-+  return vrev32_u8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  uint8x8_t reversed = test_vrev32u8 (inorder);
-+  uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev16s8 (int8x8_t _arg)
-+{
-+  return vrev16_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8x8_t reversed = test_vrev16s8 (inorder);
-+  int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextu8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_u8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons.x
-@@ -0,0 +1,68 @@
-+/*  test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics
-+    in arm_neon.h (excluding the 64x1 variants as these generally produce scalar
-+    not vector ops).  */
-+#include "arm_neon.h"
-+
-+#define DONT_FORCE(X)
-+
-+#define FORCE_SIMD(V1)   asm volatile ("mov %d0, %1.d[0]"       \
-+           : "=w"(V1)                                           \
-+           : "w"(V1)                                            \
-+           : /* No clobbers */);
-+
-+#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t	\
-+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a)			\
-+{								\
-+  uint##SIZE##_t res;						\
-+  FORCE (a);							\
-+  res = v##OP##SUFFIX (a);					\
-+  FORCE (res);							\
-+  return res;							\
-+}
-+
-+#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t	\
-+test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \
-+{								\
-+  uint##SIZE##_t res;						\
-+  FORCE (a);							\
-+  FORCE (b);							\
-+  res = v##OP##SUFFIX (a, b);					\
-+  FORCE (res);							\
-+  return res;							\
-+}
-+
-+#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \
-+OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE)
-+
-+#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \
-+OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \
-+UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE)
-+
-+ALL_OPS (8x8, int, _s8, DONT_FORCE)
-+ALL_OPS (16x4, int, _s16, DONT_FORCE)
-+ALL_OPS (32x2, int, _s32, DONT_FORCE)
-+ALL_OPS (64x1, int, _s64, DONT_FORCE)
-+ALL_OPS (64, int, d_s64, FORCE_SIMD)
-+ALL_OPS (8x16, int, q_s8, DONT_FORCE)
-+ALL_OPS (16x8, int, q_s16, DONT_FORCE)
-+ALL_OPS (32x4, int, q_s32, DONT_FORCE)
-+ALL_OPS (64x2, int, q_s64, DONT_FORCE)
-+UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE)
-+UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE)
-+UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE)
-+UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE)
-+UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD)
-+UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE)
-+UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE)
-+UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE)
-+UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE)
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqs32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqp8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64p16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32u16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev32qp8 (poly8x16_t _arg)
-+{
-+  return vrev32q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x16_t reversed = test_vrev32qp8 (inorder);
-+  poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vtrnp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+  return vtrn_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4};
-+  poly16_t second[] = {5, 6, 7, 8};
-+  poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second));
-+  poly16x4_t res1 = result.val[0], res2 = result.val[1];
-+  poly16_t exp1[] = {1, 5, 3, 7};
-+  poly16_t exp2[] = {2, 6, 4, 8};
-+  poly16x4_t expected1 = vld1_p16 (exp1);
-+  poly16x4_t expected2 = vld1_p16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vzips32 (int32x2_t _a, int32x2_t _b)
-+{
-+  return vzip_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2};
-+  int32_t second[] = {3, 4};
-+  int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second));
-+  int32x2_t res1 = result.val[0], res2 = result.val[1];
-+  int32_t exp1[] = {1, 3};
-+  int32_t exp2[] = {2, 4};
-+  int32x2_t expected1 = vld1_s32 (exp1);
-+  int32x2_t expected2 = vld1_s32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64u32.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x16_t
-+test_vrev16qp8 (poly8x16_t _arg)
-+{
-+  return vrev16q_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8x16_t reversed = test_vrev16qp8 (inorder);
-+  poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vzipu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+  return vzip_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2};
-+  uint32_t second[] = {3, 4};
-+  uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second));
-+  uint32x2_t res1 = result.val[0], res2 = result.val[1];
-+  uint32_t exp1[] = {1, 3};
-+  uint32_t exp2[] = {2, 4};
-+  uint32x2_t expected1 = vld1_u32 (exp1);
-+  uint32x2_t expected2 = vld1_u32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+float32x4x2_t
-+test_vtrnqf32 (float32x4_t _a, float32x4_t _b)
-+{
-+  return vtrnq_f32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32_t first[] = {1, 2, 3, 4};
-+  float32_t second[] = {5, 6, 7, 8};
-+  float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second));
-+  float32x4_t res1 = result.val[0], res2 = result.val[1];
-+  float32_t exp1[] = {1, 5, 3, 7};
-+  float32_t exp2[] = {2, 6, 4, 8};
-+  float32x4_t expected1 = vld1q_f32 (exp1);
-+  float32x4_t expected2 = vld1q_f32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x
-@@ -0,0 +1,28 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vtrnqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+  return vtrnq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+  int8x16_t res1 = result.val[0], res2 = result.val[1];
-+  int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31};
-+  int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32};
-+  int8x16_t expected1 = vld1q_s8 (exp1);
-+  int8x16_t expected2 = vld1q_s8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vexts64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s64.x"
-+
-+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
-+   return its first argument, so it is legitimate to optimize it out.  */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzps32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+float32x4_t
-+test_vrev64qf32 (float32x4_t _arg)
-+{
-+  return vrev64q_f32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32x4_t inorder = {1, 2, 3, 4};
-+  float32x4_t reversed = test_vrev64qf32 (inorder);
-+  float32x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vext_s16_1 (int16x4_t a, int16x4_t b)
-+{
-+  return vext_s16 (a, b, 1);
-+}
-+
-+int16x4_t
-+test_vext_s16_2 (int16x4_t a, int16x4_t b)
-+{
-+  return vext_s16 (a, b, 2);
-+}
-+
-+int16x4_t
-+test_vext_s16_3 (int16x4_t a, int16x4_t b)
-+{
-+  return vext_s16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int16_t arr1[] = {0, 1, 2, 3};
-+  int16x4_t in1 = vld1_s16 (arr1);
-+  int16_t arr2[] = {4, 5, 6, 7};
-+  int16x4_t in2 = vld1_s16 (arr2);
-+  int16_t exp[4];
-+  int16x4_t expected;
-+  int16x4_t actual = test_vext_s16_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_s16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s16_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_s16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_s16_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_s16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vext_u16_1 (uint16x4_t a, uint16x4_t b)
-+{
-+  return vext_u16 (a, b, 1);
-+}
-+
-+uint16x4_t
-+test_vext_u16_2 (uint16x4_t a, uint16x4_t b)
-+{
-+  return vext_u16 (a, b, 2);
-+}
-+
-+uint16x4_t
-+test_vext_u16_3 (uint16x4_t a, uint16x4_t b)
-+{
-+  return vext_u16 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint16_t arr1[] = {0, 1, 2, 3};
-+  uint16x4_t in1 = vld1_u16 (arr1);
-+  uint16_t arr2[] = {4, 5, 6, 7};
-+  uint16x4_t in2 = vld1_u16 (arr2);
-+  uint16_t exp[4];
-+  uint16x4_t expected;
-+  uint16x4_t actual = test_vext_u16_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_u16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u16_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1_u16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vext_u16_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1_u16 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqs32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly8x16x2_t
-+test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b)
-+{
-+  return vuzpq_p8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  poly8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second));
-+  poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+  poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+  poly8x16_t expect1 = vld1q_p8 (exp1);
-+  poly8x16_t expect2 = vld1q_p8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzips8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly8x8_t
-+test_vrev32p8 (poly8x8_t _arg)
-+{
-+  return vrev32_p8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly8x8_t reversed = test_vrev32p8 (inorder);
-+  poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev64s8 (int8x8_t _arg)
-+{
-+  return vrev64_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8x8_t reversed = test_vrev64s8 (inorder);
-+  int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpp8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+int32x4_t
-+test_vextq_s32_1 (int32x4_t a, int32x4_t b)
-+{
-+  return vextq_s32 (a, b, 1);
-+}
-+
-+int32x4_t
-+test_vextq_s32_2 (int32x4_t a, int32x4_t b)
-+{
-+  return vextq_s32 (a, b, 2);
-+}
-+
-+int32x4_t
-+test_vextq_s32_3 (int32x4_t a, int32x4_t b)
-+{
-+  return vextq_s32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  int32_t arr1[] = {0, 1, 2, 3};
-+  int32x4_t in1 = vld1q_s32 (arr1);
-+  int32_t arr2[] = {4, 5, 6, 7};
-+  int32x4_t in2 = vld1q_s32 (arr2);
-+  int32_t exp[4];
-+  int32x4_t expected;
-+  int32x4_t actual = test_vextq_s32_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_s32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s32_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_s32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_s32_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_s32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x
-@@ -0,0 +1,58 @@
-+extern void abort (void);
-+
-+uint32x4_t
-+test_vextq_u32_1 (uint32x4_t a, uint32x4_t b)
-+{
-+  return vextq_u32 (a, b, 1);
-+}
-+
-+uint32x4_t
-+test_vextq_u32_2 (uint32x4_t a, uint32x4_t b)
-+{
-+  return vextq_u32 (a, b, 2);
-+}
-+
-+uint32x4_t
-+test_vextq_u32_3 (uint32x4_t a, uint32x4_t b)
-+{
-+  return vextq_u32 (a, b, 3);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  uint32_t arr1[] = {0, 1, 2, 3};
-+  uint32x4_t in1 = vld1q_u32 (arr1);
-+  uint32_t arr2[] = {4, 5, 6, 7};
-+  uint32x4_t in2 = vld1q_u32 (arr2);
-+  uint32_t exp[4];
-+  uint32x4_t expected;
-+  uint32x4_t actual = test_vextq_u32_1 (in1, in2);
-+
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 1;
-+  expected = vld1q_u32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u32_2 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 2;
-+  expected = vld1q_u32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  actual = test_vextq_u32_3 (in1, in2);
-+  for (i = 0; i < 4; i++)
-+    exp[i] = i + 3;
-+  expected = vld1q_u32 (exp);
-+  for (i = 0; i < 4; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u64.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipp16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_s32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrns32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qp8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vuzpqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+  return vuzpq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2, 3, 4};
-+  int32_t second[] = {5, 6, 7, 8};
-+  int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+  int32_t exp1[] = {1, 3, 5, 7};
-+  int32_t exp2[] = {2, 4, 6, 8};
-+  int32x4_t expect1 = vld1q_s32 (exp1);
-+  int32x4_t expect2 = vld1q_s32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x4_t
-+test_vrev32p16 (poly16x4_t _arg)
-+{
-+  return vrev32_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16x4_t inorder = {1, 2, 3, 4};
-+  poly16x4_t reversed = test_vrev32p16 (inorder);
-+  poly16x4_t expected = {2, 1, 4, 3};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+  return vuzpq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2, 3, 4};
-+  uint32_t second[] = {5, 6, 7, 8};
-+  uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+  uint32_t exp1[] = {1, 3, 5, 7};
-+  uint32_t exp2[] = {2, 4, 6, 8};
-+  uint32x4_t expect1 = vld1q_u32 (exp1);
-+  uint32x4_t expect2 = vld1q_u32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
-@@ -0,0 +1,56 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+uint64_t in1 = 0x0123456789abcdefULL;
-+uint64_t expected1 = 0x80c4a2e691d5b3f7ULL;
-+
-+#define TEST8(BASETYPE, SUFFIX)						\
-+void test8_##SUFFIX ()							\
-+{									\
-+  BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1));	\
-+  uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0);	\
-+  if (res != expected1) abort ();					\
-+}
-+
-+uint64_t in2 = 0xdeadbeefcafebabeULL;
-+uint64_t expected2 = 0x7bb57df7537f5d7dULL;
-+
-+#define TEST16(BASETYPE, SUFFIX)					\
-+void test16_##SUFFIX ()							\
-+{									\
-+  BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1),	\
-+					   vcreate_##SUFFIX (in2));	\
-+  uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in));	\
-+  uint64_t res1 = vgetq_lane_u64 (res, 0);				\
-+  uint64_t res2 = vgetq_lane_u64 (res, 1);				\
-+  if (res1 != expected1 || res2 != expected2) abort ();			\
-+}
-+
-+TEST8 (poly, p8);
-+TEST8 (int, s8);
-+TEST8 (uint, u8);
-+
-+TEST16 (poly, p8);
-+TEST16 (int, s8);
-+TEST16 (uint, u8);
-+
-+int
-+main (int argc, char **argv)
-+{
-+  test8_p8 ();
-+  test8_s8 ();
-+  test8_u8 ();
-+  test16_p8 ();
-+  test16_s8 ();
-+  test16_u8 ();
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */
-+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vexts32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_s32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu8.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev32qs8 (int8x16_t _arg)
-+{
-+  return vrev32q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x16_t reversed = test_vrev32qs8 (inorder);
-+  int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x16_t
-+test_vrev16qs8 (int8x16_t _arg)
-+{
-+  return vrev16q_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8x16_t reversed = test_vrev16qs8 (inorder);
-+  int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15};
-+
-+  for (i = 0; i < 16; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int16x4_t
-+test_vrev64s16 (int16x4_t _arg)
-+{
-+  return vrev64_s16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16x4_t inorder = {1, 2, 3, 4};
-+  int16x4_t reversed = test_vrev64s16 (inorder);
-+  int16x4_t expected = {4, 3, 2, 1};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQs8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_s8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+uint16x4_t
-+test_vrev64u16 (uint16x4_t _arg)
-+{
-+  return vrev64_u16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16x4_t inorder = {1, 2, 3, 4};
-+  uint16x4_t reversed = test_vrev64u16 (inorder);
-+  uint16x4_t expected = {4, 3, 2, 1};
-+
-+  for (i = 0; i < 4; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x
-@@ -0,0 +1,26 @@
-+extern void abort (void);
-+
-+poly16x4x2_t
-+test_vuzpp16 (poly16x4_t _a, poly16x4_t _b)
-+{
-+  return vuzp_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4};
-+  poly16_t second[] = {5, 6, 7, 8};
-+  poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second));
-+  poly16_t exp1[] = {1, 3, 5, 7};
-+  poly16_t exp2[] = {2, 4, 6, 8};
-+  poly16x4_t expect1 = vld1_p16 (exp1);
-+  poly16x4_t expect2 = vld1_p16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqf32.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_p8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipp8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqp16.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vrev32qp16 (poly16x8_t _arg)
-+{
-+  return vrev32q_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly16x8_t reversed = test_vrev32qp16 (inorder);
-+  poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrnq_u32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnqu32.x"
-+
-+/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int8x16x2_t
-+test_vuzpqs8 (int8x16_t _a, int8x16_t _b)
-+{
-+  return vuzpq_s8 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-+  int8_t second[] =
-+      {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
-+  int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second));
-+  int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
-+  int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
-+  int8x16_t expect1 = vld1q_s8 (exp1);
-+  int8x16_t expect2 = vld1q_s8 (exp2);
-+
-+  for (i = 0; i < 16; i++)
-+    if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x4x2_t
-+test_vzipqs32 (int32x4_t _a, int32x4_t _b)
-+{
-+  return vzipq_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2, 3, 4};
-+  int32_t second[] = {5, 6, 7, 8};
-+  int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second));
-+  int32x4_t res1 = result.val[0], res2 = result.val[1];
-+  int32_t exp1[] = {1, 5, 2, 6};
-+  int32_t exp2[] = {3, 7, 4, 8};
-+  int32x4_t expected1 = vld1q_s32 (exp1);
-+  int32x4_t expected2 = vld1q_s32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+int8x8_t
-+test_vrev32s8 (int8x8_t _arg)
-+{
-+  return vrev32_s8 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  int8x8_t reversed = test_vrev32s8 (inorder);
-+  int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQp16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_p16.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x4x2_t
-+test_vzipqu32 (uint32x4_t _a, uint32x4_t _b)
-+{
-+  return vzipq_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2, 3, 4};
-+  uint32_t second[] = {5, 6, 7, 8};
-+  uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second));
-+  uint32x4_t res1 = result.val[0], res2 = result.val[1];
-+  uint32_t exp1[] = {1, 5, 2, 6};
-+  uint32_t exp2[] = {3, 7, 4, 8};
-+  uint32x4_t expected1 = vld1q_u32 (exp1);
-+  uint32x4_t expected2 = vld1q_u32 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQu32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_u32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_p16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32p16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x
-@@ -0,0 +1,30 @@
-+extern void abort (void);
-+
-+float32x2_t
-+test_vext_f32_1 (float32x2_t a, float32x2_t b)
-+{
-+  return vext_f32 (a, b, 1);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  float32_t arr1[] = {0, 1};
-+  float32x2_t in1 = vld1_f32 (arr1);
-+  float32_t arr2[] = {2, 3};
-+  float32x2_t in2 = vld1_f32 (arr2);
-+  float32_t exp[2];
-+  float32x2_t expected;
-+  float32x2_t actual = test_vext_f32_1 (in1, in2);
-+
-+  for (i = 0; i < 2; i++)
-+    exp[i] = i + 1;
-+  expected = vld1_f32 (exp);
-+  for (i = 0; i < 2; i++)
-+    if (actual[i] != expected[i])
-+      abort ();
-+
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c
-@@ -0,0 +1,25 @@
-+/* Test the `vextf64' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i, off;
-+  float64x1_t in1 = {0};
-+  float64x1_t in2 = {1};
-+  float64x1_t actual = vext_f64 (in1, in2, 0);
-+  if (actual != in1)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* Do not scan-assembler.  An EXT instruction could be emitted, but would merely
-+   return its first argument, so it is legitimate to optimize it out.  */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzpq_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpqu16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu8.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64_s16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64s16.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int32x2x2_t
-+test_vtrns32 (int32x2_t _a, int32x2_t _b)
-+{
-+  return vtrn_s32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int32_t first[] = {1, 2};
-+  int32_t second[] = {3, 4};
-+  int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second));
-+  int32x2_t res1 = result.val[0], res2 = result.val[1];
-+  int32_t exp1[] = {1, 3};
-+  int32_t exp2[] = {2, 4};
-+  int32x2_t expected1 = vld1_s32 (exp1);
-+  int32x2_t expected2 = vld1_s32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev16q_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev16qu8.x"
-+
-+/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+int16x4x2_t
-+test_vzips16 (int16x4_t _a, int16x4_t _b)
-+{
-+  return vzip_s16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  int16_t first[] = {1, 2, 3, 4};
-+  int16_t second[] = {5, 6, 7, 8};
-+  int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second));
-+  int16x4_t res1 = result.val[0], res2 = result.val[1];
-+  int16_t exp1[] = {1, 5, 2, 6};
-+  int16_t exp2[] = {3, 7, 4, 8};
-+  int16x4_t expected1 = vld1_s16 (exp1);
-+  int16x4_t expected2 = vld1_s16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev64q_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev64qs8.x"
-+
-+/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextQp8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "extq_p8.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint32x2x2_t
-+test_vtrnu32 (uint32x2_t _a, uint32x2_t _b)
-+{
-+  return vtrn_u32 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint32_t first[] = {1, 2};
-+  uint32_t second[] = {3, 4};
-+  uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second));
-+  uint32x2_t res1 = result.val[0], res2 = result.val[1];
-+  uint32_t exp1[] = {1, 3};
-+  uint32_t exp2[] = {2, 4};
-+  uint32x2_t expected1 = vld1_u32 (exp1);
-+  uint32x2_t expected2 = vld1_u32 (exp2);
-+
-+  for (i = 0; i < 2; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+uint16x4x2_t
-+test_vzipu16 (uint16x4_t _a, uint16x4_t _b)
-+{
-+  return vzip_u16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  uint16_t first[] = {1, 2, 3, 4};
-+  uint16_t second[] = {5, 6, 7, 8};
-+  uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second));
-+  uint16x4_t res1 = result.val[0], res2 = result.val[1];
-+  uint16_t exp1[] = {1, 5, 2, 6};
-+  uint16_t exp2[] = {3, 7, 4, 8};
-+  uint16x4_t expected1 = vld1_u16 (exp1);
-+  uint16x4_t expected2 = vld1_u16 (exp2);
-+
-+  for (i = 0; i < 4; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vuzp_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vuzpu16.x"
-+
-+/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32_s8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32s8.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vtrn_f32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vtrnf32.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vrev32q_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vrev32qu16.x"
-+
-+/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzipq_u16' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipqu16.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c
-@@ -0,0 +1,11 @@
-+/* Test the `vzip_u8' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline" } */
-+
-+#include <arm_neon.h>
-+#include "vzipu8.x"
-+
-+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x
-@@ -0,0 +1,27 @@
-+extern void abort (void);
-+
-+poly16x8x2_t
-+test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b)
-+{
-+  return vtrnq_p16 (_a, _b);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16};
-+  poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second));
-+  poly16x8_t res1 = result.val[0], res2 = result.val[1];
-+  poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15};
-+  poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16};
-+  poly16x8_t expected1 = vld1q_p16 (exp1);
-+  poly16x8_t expected2 = vld1q_p16 (exp2);
-+
-+  for (i = 0; i < 8; i++)
-+    if ((res1[i] != expected1[i]) || (res2[i] != expected2[i]))
-+      abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/int_comparisons_1.c
-@@ -0,0 +1,47 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+/* Scan-assembler test, so, incorporate as little other code as possible.  */
-+
-+#include "arm_neon.h"
-+#include "int_comparisons.x"
-+
-+/* Operations on all 18 integer types:  (q?)_[su](8|16|32|64), d_[su]64.
-+   (d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'.  */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned).  */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned).  */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
-+
-+/* Comparisons against immediate zero, on the 8 signed integer types only.  */
-+
-+/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/*  For int64_t and int64x1_t, combine_simplify_rtx failure of
-+    https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html
-+    prevents generation of cmge....#0, instead producing mvn + sshr.  */
-+/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
-+/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
-+/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent,
-+   so allow either.  cmgez issue above results in extra 2 * sshr....63.  */
-+/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */
-+
-+// All should have been compiled into single insns without inverting result:
-+/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+poly16x8_t
-+test_vrev64qp16 (poly16x8_t _arg)
-+{
-+  return vrev64q_p16 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8};
-+  poly16x8_t reversed = test_vrev64qp16 (inorder);
-+  poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5};
-+
-+  for (i = 0; i < 8; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c
-@@ -0,0 +1,10 @@
-+/* Test the `vextf32' AArch64 SIMD intrinsic.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -O3 -fno-inline" } */
-+
-+#include "arm_neon.h"
-+#include "ext_f32.x"
-+
-+/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x
-@@ -0,0 +1,22 @@
-+extern void abort (void);
-+
-+float32x2_t
-+test_vrev64f32 (float32x2_t _arg)
-+{
-+  return vrev64_f32 (_arg);
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  int i;
-+  float32x2_t inorder = {1, 2};
-+  float32x2_t reversed = test_vrev64f32 (inorder);
-+  float32x2_t expected = {2, 1};
-+
-+  for (i = 0; i < 2; i++)
-+    if (reversed[i] != expected[i])
-+      abort ();
-+  return 0;
-+}
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
-@@ -0,0 +1,430 @@
-+/* Test vdup_lane intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps -O1" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_f32_0 (float32x2_t a)
-+{
-+  return vdup_lane_f32 (a, 0);
-+}
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_f32_1 (float32x2_t a)
-+{
-+  return vdup_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_f32 ()
-+{
-+  float32x2_t a;
-+  float32x2_t b;
-+  int i;
-+  float32_t c[2] = { 0.0 , 3.14 };
-+  float32_t d[2];
-+
-+  a = vld1_f32 (c);
-+  b = wrap_vdup_lane_f32_0 (a);
-+  vst1_f32 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdup_lane_f32_1 (a);
-+  vst1_f32 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_f32_0 (float32x2_t a)
-+{
-+  return vdupq_lane_f32 (a, 0);
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_f32_1 (float32x2_t a)
-+{
-+  return vdupq_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_f32 ()
-+{
-+  float32x2_t a;
-+  float32x4_t b;
-+  int i;
-+  float32_t c[2] = { 0.0 , 3.14 };
-+  float32_t d[4];
-+
-+  a = vld1_f32 (c);
-+  b = wrap_vdupq_lane_f32_0 (a);
-+  vst1q_f32 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdupq_lane_f32_1 (a);
-+  vst1q_f32 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_lane_s8_0 (int8x8_t a)
-+{
-+  return vdup_lane_s8 (a, 0);
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_lane_s8_1 (int8x8_t a)
-+{
-+  return vdup_lane_s8 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s8 ()
-+{
-+  int8x8_t a;
-+  int8x8_t b;
-+  int i;
-+  /* Only two first cases are interesting.  */
-+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+  int8_t d[8];
-+
-+  a = vld1_s8 (c);
-+  b = wrap_vdup_lane_s8_0 (a);
-+  vst1_s8 (d, b);
-+  for (i = 0; i < 8; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdup_lane_s8_1 (a);
-+  vst1_s8 (d, b);
-+  for (i = 0; i < 8; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s8_0 (int8x8_t a)
-+{
-+  return vdupq_lane_s8 (a, 0);
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s8_1 (int8x8_t a)
-+{
-+  return vdupq_lane_s8 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s8 ()
-+{
-+  int8x8_t a;
-+  int8x16_t b;
-+  int i;
-+  /* Only two first cases are interesting.  */
-+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+  int8_t d[16];
-+
-+  a = vld1_s8 (c);
-+  b = wrap_vdupq_lane_s8_0 (a);
-+  vst1q_s8 (d, b);
-+  for (i = 0; i < 16; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdupq_lane_s8_1 (a);
-+  vst1q_s8 (d, b);
-+  for (i = 0; i < 16; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_lane_s16_0 (int16x4_t a)
-+{
-+  return vdup_lane_s16 (a, 0);
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_lane_s16_1 (int16x4_t a)
-+{
-+  return vdup_lane_s16 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s16 ()
-+{
-+  int16x4_t a;
-+  int16x4_t b;
-+  int i;
-+  /* Only two first cases are interesting.  */
-+  int16_t c[4] = { 0, 1, 2, 3 };
-+  int16_t d[4];
-+
-+  a = vld1_s16 (c);
-+  b = wrap_vdup_lane_s16_0 (a);
-+  vst1_s16 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdup_lane_s16_1 (a);
-+  vst1_s16 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s16_0 (int16x4_t a)
-+{
-+  return vdupq_lane_s16 (a, 0);
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s16_1 (int16x4_t a)
-+{
-+  return vdupq_lane_s16 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s16 ()
-+{
-+  int16x4_t a;
-+  int16x8_t b;
-+  int i;
-+  /* Only two first cases are interesting.  */
-+  int16_t c[4] = { 0, 1, 2, 3 };
-+  int16_t d[8];
-+
-+  a = vld1_s16 (c);
-+  b = wrap_vdupq_lane_s16_0 (a);
-+  vst1q_s16 (d, b);
-+  for (i = 0; i < 8; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdupq_lane_s16_1 (a);
-+  vst1q_s16 (d, b);
-+  for (i = 0; i < 8; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_s32_0 (int32x2_t a)
-+{
-+  return vdup_lane_s32 (a, 0);
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_lane_s32_1 (int32x2_t a)
-+{
-+  return vdup_lane_s32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s32 ()
-+{
-+  int32x2_t a;
-+  int32x2_t b;
-+  int i;
-+  int32_t c[2] = { 0, 1 };
-+  int32_t d[2];
-+
-+  a = vld1_s32 (c);
-+  b = wrap_vdup_lane_s32_0 (a);
-+  vst1_s32 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdup_lane_s32_1 (a);
-+  vst1_s32 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s32_0 (int32x2_t a)
-+{
-+  return vdupq_lane_s32 (a, 0);
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s32_1 (int32x2_t a)
-+{
-+  return vdupq_lane_s32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s32 ()
-+{
-+  int32x2_t a;
-+  int32x4_t b;
-+  int i;
-+  int32_t c[2] = { 0, 1 };
-+  int32_t d[4];
-+
-+  a = vld1_s32 (c);
-+  b = wrap_vdupq_lane_s32_0 (a);
-+  vst1q_s32 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  b = wrap_vdupq_lane_s32_1 (a);
-+  vst1q_s32 (d, b);
-+  for (i = 0; i < 4; i++)
-+    if (c[1] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_lane_s64_0 (int64x1_t a)
-+{
-+  return vdup_lane_s64 (a, 0);
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_lane_s64_1 (int64x1_t a)
-+{
-+  return vdup_lane_s64 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_lane_s64 ()
-+{
-+  int64x1_t a;
-+  int64x1_t b;
-+  int64_t c[1];
-+  int64_t d[1];
-+
-+  c[0] = 0;
-+  a = vld1_s64 (c);
-+  b = wrap_vdup_lane_s64_0 (a);
-+  vst1_s64 (d, b);
-+  if (c[0] != d[0])
-+    return 1;
-+
-+  c[0] = 1;
-+  a = vld1_s64 (c);
-+  b = wrap_vdup_lane_s64_1 (a);
-+  vst1_s64 (d, b);
-+  if (c[0] != d[0])
-+    return 1;
-+  return 0;
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s64_0 (int64x1_t a)
-+{
-+  return vdupq_lane_s64 (a, 0);
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_lane_s64_1 (int64x1_t a)
-+{
-+  return vdupq_lane_s64 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_lane_s64 ()
-+{
-+  int64x1_t a;
-+  int64x2_t b;
-+  int i;
-+  int64_t c[1];
-+  int64_t d[2];
-+
-+  c[0] = 0;
-+  a = vld1_s64 (c);
-+  b = wrap_vdupq_lane_s64_0 (a);
-+  vst1q_s64 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+
-+  c[0] = 1;
-+  a = vld1_s64 (c);
-+  b = wrap_vdupq_lane_s64_1 (a);
-+  vst1q_s64 (d, b);
-+  for (i = 0; i < 2; i++)
-+    if (c[0] != d[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int
-+main ()
-+{
-+
-+  if (test_vdup_lane_f32 ())
-+    abort ();
-+  if (test_vdup_lane_s8 ())
-+    abort ();
-+  if (test_vdup_lane_s16 ())
-+    abort ();
-+  if (test_vdup_lane_s32 ())
-+    abort ();
-+  if (test_vdup_lane_s64 ())
-+    abort ();
-+  if (test_vdupq_lane_f32 ())
-+    abort ();
-+  if (test_vdupq_lane_s8 ())
-+    abort ();
-+  if (test_vdupq_lane_s16 ())
-+    abort ();
-+  if (test_vdupq_lane_s32 ())
-+    abort ();
-+  if (test_vdupq_lane_s64 ())
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* Asm check for test_vdup_lane_s8.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdupq_lane_s8.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdup_lane_s16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
-+/* Asm check for test_vdup_lane_s16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdupq_lane_s16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
-+/* Asm check for test_vdupq_lane_s16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
-+
-+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
-+
-+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+     * with outgoing.
-+     * total frame size > 512.
-+       area except outgoing <= 512
-+     * number of callee-save reg >= 2.
-+     * split the stack adjustment into two substractions,
-+       the first could be optimized into "stp !".  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test15, 480, , 8, a[8])
-+t_frame_run (test15)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
-@@ -0,0 +1,17 @@
-+/* Test if a BSL-like instruction can be generated from a C idiom.  */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Folds to BIF.  */
-+
-+uint32x4_t
-+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask)
-+{
-+  return (mask & a) | (~mask & b);
-+}
-+
-+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
-@@ -0,0 +1,619 @@
-+/* Test vdup_lane intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "-O1 --save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+float32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_f32 (float32_t a)
-+{
-+  return vdup_n_f32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_f32 ()
-+{
-+  float32_t a = 1.0;
-+  float32x2_t b;
-+  float32_t c[2];
-+  int i;
-+
-+  b = wrap_vdup_n_f32 (a);
-+  vst1_f32 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+float32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_f32 (float32_t a)
-+{
-+  return vdupq_n_f32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_f32 ()
-+{
-+  float32_t a = 1.0;
-+  float32x4_t b;
-+  float32_t c[4];
-+  int i;
-+
-+  b = wrap_vdupq_n_f32 (a);
-+  vst1q_f32 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+float64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_f64 (float64_t a)
-+{
-+  return vdup_n_f64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_f64 ()
-+{
-+  float64_t a = 1.0;
-+  float64x1_t b;
-+  float64_t c[1];
-+  int i;
-+
-+  b = wrap_vdup_n_f64 (a);
-+  vst1_f64 (c, b);
-+  for (i = 0; i < 1; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+float64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_f64 (float64_t a)
-+{
-+  return vdupq_n_f64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_f64 ()
-+{
-+  float64_t a = 1.0;
-+  float64x2_t b;
-+  float64_t c[2];
-+  int i;
-+
-+  b = wrap_vdupq_n_f64 (a);
-+  vst1q_f64 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+poly8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_p8 (poly8_t a)
-+{
-+  return vdup_n_p8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_p8 ()
-+{
-+  poly8_t a = 1;
-+  poly8x8_t b;
-+  poly8_t c[8];
-+  int i;
-+
-+  b = wrap_vdup_n_p8 (a);
-+  vst1_p8 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+poly8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_p8 (poly8_t a)
-+{
-+  return vdupq_n_p8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_p8 ()
-+{
-+  poly8_t a = 1;
-+  poly8x16_t b;
-+  poly8_t c[16];
-+  int i;
-+
-+  b = wrap_vdupq_n_p8 (a);
-+  vst1q_p8 (c, b);
-+  for (i = 0; i < 16; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_s8 (int8_t a)
-+{
-+  return vdup_n_s8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s8 ()
-+{
-+  int8_t a = 1;
-+  int8x8_t b;
-+  int8_t c[8];
-+  int i;
-+
-+  b = wrap_vdup_n_s8 (a);
-+  vst1_s8 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_s8 (int8_t a)
-+{
-+  return vdupq_n_s8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s8 ()
-+{
-+  int8_t a = 1;
-+  int8x16_t b;
-+  int8_t c[16];
-+  int i;
-+
-+  b = wrap_vdupq_n_s8 (a);
-+  vst1q_s8 (c, b);
-+  for (i = 0; i < 16; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint8x8_t __attribute__ ((noinline))
-+wrap_vdup_n_u8 (uint8_t a)
-+{
-+  return vdup_n_u8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u8 ()
-+{
-+  uint8_t a = 1;
-+  uint8x8_t b;
-+  uint8_t c[8];
-+  int i;
-+
-+  b = wrap_vdup_n_u8 (a);
-+  vst1_u8 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint8x16_t __attribute__ ((noinline))
-+wrap_vdupq_n_u8 (uint8_t a)
-+{
-+  return vdupq_n_u8 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u8 ()
-+{
-+  uint8_t a = 1;
-+  uint8x16_t b;
-+  uint8_t c[16];
-+  int i;
-+
-+  b = wrap_vdupq_n_u8 (a);
-+  vst1q_u8 (c, b);
-+  for (i = 0; i < 16; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+poly16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_p16 (poly16_t a)
-+{
-+  return vdup_n_p16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_p16 ()
-+{
-+  poly16_t a = 1;
-+  poly16x4_t b;
-+  poly16_t c[4];
-+  int i;
-+
-+  b = wrap_vdup_n_p16 (a);
-+  vst1_p16 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+poly16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_p16 (poly16_t a)
-+{
-+  return vdupq_n_p16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_p16 ()
-+{
-+  poly16_t a = 1;
-+  poly16x8_t b;
-+  poly16_t c[8];
-+  int i;
-+
-+  b = wrap_vdupq_n_p16 (a);
-+  vst1q_p16 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_s16 (int16_t a)
-+{
-+  return vdup_n_s16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s16 ()
-+{
-+  int16_t a = 1;
-+  int16x4_t b;
-+  int16_t c[4];
-+  int i;
-+
-+  b = wrap_vdup_n_s16 (a);
-+  vst1_s16 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_s16 (int16_t a)
-+{
-+  return vdupq_n_s16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s16 ()
-+{
-+  int16_t a = 1;
-+  int16x8_t b;
-+  int16_t c[8];
-+  int i;
-+
-+  b = wrap_vdupq_n_s16 (a);
-+  vst1q_s16 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint16x4_t __attribute__ ((noinline))
-+wrap_vdup_n_u16 (uint16_t a)
-+{
-+  return vdup_n_u16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u16 ()
-+{
-+  uint16_t a = 1;
-+  uint16x4_t b;
-+  uint16_t c[4];
-+  int i;
-+
-+  b = wrap_vdup_n_u16 (a);
-+  vst1_u16 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint16x8_t __attribute__ ((noinline))
-+wrap_vdupq_n_u16 (uint16_t a)
-+{
-+  return vdupq_n_u16 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u16 ()
-+{
-+  uint16_t a = 1;
-+  uint16x8_t b;
-+  uint16_t c[8];
-+  int i;
-+
-+  b = wrap_vdupq_n_u16 (a);
-+  vst1q_u16 (c, b);
-+  for (i = 0; i < 8; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_s32 (int32_t a)
-+{
-+  return vdup_n_s32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s32 ()
-+{
-+  int32_t a = 1;
-+  int32x2_t b;
-+  int32_t c[2];
-+  int i;
-+
-+  b = wrap_vdup_n_s32 (a);
-+  vst1_s32 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_s32 (int32_t a)
-+{
-+  return vdupq_n_s32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s32 ()
-+{
-+  int32_t a = 1;
-+  int32x4_t b;
-+  int32_t c[4];
-+  int i;
-+
-+  b = wrap_vdupq_n_s32 (a);
-+  vst1q_s32 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint32x2_t __attribute__ ((noinline))
-+wrap_vdup_n_u32 (uint32_t a)
-+{
-+  return vdup_n_u32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u32 ()
-+{
-+  uint32_t a = 1;
-+  uint32x2_t b;
-+  uint32_t c[2];
-+  int i;
-+
-+  b = wrap_vdup_n_u32 (a);
-+  vst1_u32 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint32x4_t __attribute__ ((noinline))
-+wrap_vdupq_n_u32 (uint32_t a)
-+{
-+  return vdupq_n_u32 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u32 ()
-+{
-+  uint32_t a = 1;
-+  uint32x4_t b;
-+  uint32_t c[4];
-+  int i;
-+
-+  b = wrap_vdupq_n_u32 (a);
-+  vst1q_u32 (c, b);
-+  for (i = 0; i < 4; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_s64 (int64_t a)
-+{
-+  return vdup_n_s64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_s64 ()
-+{
-+  int64_t a = 1;
-+  int64x1_t b;
-+  int64_t c[1];
-+  int i;
-+
-+  b = wrap_vdup_n_s64 (a);
-+  vst1_s64 (c, b);
-+  for (i = 0; i < 1; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_s64 (int64_t a)
-+{
-+  return vdupq_n_s64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_s64 ()
-+{
-+  int64_t a = 1;
-+  int64x2_t b;
-+  int64_t c[2];
-+  int i;
-+
-+  b = wrap_vdupq_n_s64 (a);
-+  vst1q_s64 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint64x1_t __attribute__ ((noinline))
-+wrap_vdup_n_u64 (uint64_t a)
-+{
-+  return vdup_n_u64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdup_n_u64 ()
-+{
-+  uint64_t a = 1;
-+  uint64x1_t b;
-+  uint64_t c[1];
-+  int i;
-+
-+  b = wrap_vdup_n_u64 (a);
-+  vst1_u64 (c, b);
-+  for (i = 0; i < 1; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+uint64x2_t __attribute__ ((noinline))
-+wrap_vdupq_n_u64 (uint64_t a)
-+{
-+  return vdupq_n_u64 (a);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupq_n_u64 ()
-+{
-+  uint64_t a = 1;
-+  uint64x2_t b;
-+  uint64_t c[2];
-+  int i;
-+
-+  b = wrap_vdupq_n_u64 (a);
-+  vst1q_u64 (c, b);
-+  for (i = 0; i < 2; i++)
-+    if (a != c[i])
-+      return 1;
-+  return 0;
-+}
-+
-+int
-+main ()
-+{
-+  if (test_vdup_n_f32 ())
-+    abort ();
-+  if (test_vdup_n_f64 ())
-+    abort ();
-+  if (test_vdup_n_p8 ())
-+    abort ();
-+  if (test_vdup_n_u8 ())
-+    abort ();
-+  if (test_vdup_n_s8 ())
-+    abort ();
-+  if (test_vdup_n_p16 ())
-+    abort ();
-+  if (test_vdup_n_s16 ())
-+    abort ();
-+  if (test_vdup_n_u16 ())
-+    abort ();
-+  if (test_vdup_n_s32 ())
-+    abort ();
-+  if (test_vdup_n_u32 ())
-+    abort ();
-+  if (test_vdup_n_s64 ())
-+    abort ();
-+  if (test_vdup_n_u64 ())
-+    abort ();
-+  if (test_vdupq_n_f32 ())
-+    abort ();
-+  if (test_vdupq_n_f64 ())
-+    abort ();
-+  if (test_vdupq_n_p8 ())
-+    abort ();
-+  if (test_vdupq_n_u8 ())
-+    abort ();
-+  if (test_vdupq_n_s8 ())
-+    abort ();
-+  if (test_vdupq_n_p16 ())
-+    abort ();
-+  if (test_vdupq_n_s16 ())
-+    abort ();
-+  if (test_vdupq_n_u16 ())
-+    abort ();
-+  if (test_vdupq_n_s32 ())
-+    abort ();
-+  if (test_vdupq_n_u32 ())
-+    abort ();
-+  if (test_vdupq_n_s64 ())
-+    abort ();
-+  if (test_vdupq_n_u64 ())
-+    abort ();
-+  return 0;
-+}
-+
-+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64.
-+   Cannot force floating point value in general purpose regester.  */
-+
-+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
-+
-+/* Asm check for test_vdup_n_s32, test_vdup_n_u32.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
-+
-+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
-+
-+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out.
-+   Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
-+   are not practical.  */
-+
-+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64.  */
-+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * without outgoing.
-+     * total frame size <= 512 but > 256.
-+     * number of callee-save reg >= 2.
-+     * we can use "stp !" to optimize stack adjustment.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test4, 400, "x19")
-+t_frame_run (test4)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c
-@@ -0,0 +1,22 @@
-+/* { dg-do compile } */
-+/* { dg-options " -O2 " } */
-+
-+float
-+f_1 (float a, float b, float c, float d)
-+{
-+  if (a > 0.0)
-+    return c;
-+  else
-+    return 2.0;
-+}
-+
-+double
-+f_2 (double a, double b, double c, double d)
-+{
-+  if (a > b)
-+    return c;
-+  else
-+    return d;
-+}
-+
-+/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fp.c
-@@ -8,11 +8,11 @@
- 
- 
- #define DEFN_SETV(type) \
--		set_vector_##type (pR##type a, type n)   \
--		{ 				         \
--		  int i;			         \
--		  for (i=0; i<16; i++)		         \
--		    a[i] = n;				 \
-+		void set_vector_##type (pR##type a, type n)   \
-+		{					      \
-+		  int i;				      \
-+		  for (i=0; i<16; i++)			      \
-+		    a[i] = n;				      \
- 		}
- 
- #define DEFN_CHECKV(type) \
---- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c
-@@ -0,0 +1,59 @@
-+/* { dg-options "-O2" } */
-+/* { dg-do run } */
-+
-+extern void abort (void);
-+
-+typedef unsigned int __u32;
-+
-+__u32
-+__rev16_32_alt (__u32 x)
-+{
-+  return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8)
-+         | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8);
-+}
-+
-+__u32
-+__rev16_32 (__u32 x)
-+{
-+  return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8)
-+         | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8);
-+}
-+
-+typedef unsigned long long __u64;
-+
-+__u64
-+__rev16_64_alt (__u64 x)
-+{
-+  return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8)
-+         | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8);
-+}
-+
-+__u64
-+__rev16_64 (__u64 x)
-+{
-+  return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8)
-+         | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8);
-+}
-+
-+int
-+main (void)
-+{
-+  volatile __u32 in32 = 0x12345678;
-+  volatile __u32 expected32 = 0x34127856;
-+  volatile __u64 in64 = 0x1234567890abcdefUL;
-+  volatile __u64 expected64 = 0x34127856ab90efcdUL;
-+
-+  if (__rev16_32 (in32) != expected32)
-+    abort ();
-+
-+  if (__rev16_32_alt (in32) != expected32)
-+    abort ();
-+
-+  if (__rev16_64 (in64) != expected64)
-+    abort ();
-+
-+  if (__rev16_64_alt (in64) != expected64)
-+    abort ();
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_high_1.c
-@@ -0,0 +1,60 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -std=c99" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT)				\
-+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8)		\
-+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16)	\
-+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32)	\
-+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64)	\
-+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8)		\
-+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16)		\
-+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32)		\
-+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64)		\
-+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32)	\
-+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
-+
-+
-+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)	\
-+int								\
-+test_vget_low_ ##SUFFIX (BASETYPE *data)			\
-+{								\
-+  BASETYPE temp [NUM64];					\
-+  TYPE128 vec = vld1q_##SUFFIX (data);				\
-+  TYPE64 high = vget_high_##SUFFIX (vec);			\
-+  vst1_##SUFFIX (temp, high);					\
-+  for (int i = 0; i < NUM64; i++)				\
-+    if (temp[i] != data[i + NUM64])				\
-+      return 1;							\
-+  return 0;							\
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)		\
-+  if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0)	\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  uint8_t uint8_t_data[16] =
-+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+  int8_t int8_t_data[16] =
-+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+  VARIANTS (CHECK);
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vldN_dup_1.c
-@@ -0,0 +1,84 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT, STRUCT)	\
-+VARIANT (uint8, , 8, _u8, STRUCT)	\
-+VARIANT (uint16, , 4, _u16, STRUCT)	\
-+VARIANT (uint32, , 2, _u32, STRUCT)	\
-+VARIANT (uint64, , 1, _u64, STRUCT)	\
-+VARIANT (int8, , 8, _s8, STRUCT)	\
-+VARIANT (int16, , 4, _s16, STRUCT)	\
-+VARIANT (int32, , 2, _s32, STRUCT)	\
-+VARIANT (int64, , 1, _s64, STRUCT)	\
-+VARIANT (poly8, , 8, _p8, STRUCT)	\
-+VARIANT (poly16, , 4, _p16, STRUCT)	\
-+VARIANT (float32, , 2, _f32, STRUCT)	\
-+VARIANT (float64, , 1, _f64, STRUCT)	\
-+VARIANT (uint8, q, 16, _u8, STRUCT)	\
-+VARIANT (uint16, q, 8, _u16, STRUCT)	\
-+VARIANT (uint32, q, 4, _u32, STRUCT)	\
-+VARIANT (uint64, q, 2, _u64, STRUCT)	\
-+VARIANT (int8, q, 16, _s8, STRUCT)	\
-+VARIANT (int16, q, 8, _s16, STRUCT)	\
-+VARIANT (int32, q, 4, _s32, STRUCT)	\
-+VARIANT (int64, q, 2, _s64, STRUCT)	\
-+VARIANT (poly8, q, 16, _p8, STRUCT)	\
-+VARIANT (poly16, q, 8, _p16, STRUCT)	\
-+VARIANT (float32, q, 4, _f32, STRUCT)	\
-+VARIANT (float64, q, 2, _f64, STRUCT)
-+
-+#define TESTMETH(BASE, Q, ELTS, SUFFIX, STRUCT)	\
-+int								\
-+test_vld##STRUCT##Q##_dup##SUFFIX (const BASE##_t *data)	\
-+{								\
-+  BASE##_t temp[ELTS];						\
-+  BASE##x##ELTS##x##STRUCT##_t vectors =			\
-+			   vld##STRUCT##Q##_dup##SUFFIX (data); \
-+  int i,j;							\
-+  for (i = 0; i < STRUCT; i++)					\
-+    {								\
-+      vst1##Q##SUFFIX (temp, vectors.val[i]);			\
-+      for (j = 0; j < ELTS; j++)				\
-+        if (temp[j] != data[i])					\
-+          return 1;						\
-+    }								\
-+  return 0;							\
-+}
-+
-+/* Tests of vld2_dup and vld2q_dup.  */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vld3_dup and vld3q_dup.  */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vld4_dup and vld4q_dup.  */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, Q, ELTS, SUFFIX, STRUCT)			\
-+  if (test_vld##STRUCT##Q##_dup##SUFFIX (BASE ##_data) != 0)	\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  uint8_t uint8_data[4] = { 7, 11, 13, 17 };
-+  uint16_t uint16_data[4] = { 257, 263, 269, 271 };
-+  uint32_t uint32_data[4] = { 65537, 65539, 65543, 65551 };
-+  uint64_t uint64_data[4] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL,
-+			      0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+  int8_t int8_data[4] = { -1, 3, -5, 7 };
-+  int16_t int16_data[4] = { 257, -259, 261, -263 };
-+  int32_t int32_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+  int64_t *int64_data = (int64_t *)uint64_data;
-+  poly8_t poly8_data[4] = { 0, 7, 13, 18, };
-+  poly16_t poly16_data[4] = { 11111, 2222, 333, 44 };
-+  float32_t float32_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+  float64_t float64_data[4] = { 1.010010001, 12345.6789, -9876.54321, 1.618 };
-+
-+  VARIANTS (CHECK, 2);
-+  VARIANTS (CHECK, 3);
-+  VARIANTS (CHECK, 4);
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
-@@ -0,0 +1,343 @@
-+/* Test vdup_lane intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "-O1 --save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+#define force_simd(V1) asm volatile ("mov %d0, %1.d[0]" \
-+         : "=w"(V1)                                     \
-+         : "w"(V1)                                      \
-+         : /* No clobbers */)
-+
-+extern void abort (void);
-+
-+float32_t __attribute__ ((noinline))
-+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a)
-+{
-+  return vdups_lane_f32 (a, 0);
-+}
-+
-+float32_t __attribute__ ((noinline))
-+wrap_vdups_lane_f32_1 (float32x2_t a)
-+{
-+  return vdups_lane_f32 (a, 1);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_f32 ()
-+{
-+  float32x2_t a;
-+  float32_t b;
-+  float32_t c[2] = { 0.0, 1.0 };
-+
-+  a = vld1_f32 (c);
-+  b = wrap_vdups_lane_f32_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vdups_lane_f32_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+float64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a)
-+{
-+  return vdupd_lane_f64 (a, 0);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_f64 ()
-+{
-+  float64x1_t a;
-+  float64_t b;
-+  float64_t c[1] = { 0.0 };
-+  a = vld1_f64 (c);
-+  b = wrap_vdupd_lane_f64_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+int8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a)
-+{
-+  int8_t result = vdupb_lane_s8 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_s8_1 (int8x8_t a)
-+{
-+  int8_t result = vdupb_lane_s8 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupb_lane_s8 ()
-+{
-+  int8x8_t a;
-+  int8_t b;
-+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+
-+  a = vld1_s8 (c);
-+  b = wrap_vdupb_lane_s8_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vdupb_lane_s8_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+
-+  return 0;
-+}
-+
-+uint8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a)
-+{
-+  uint8_t result = vdupb_lane_u8 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+uint8_t __attribute__ ((noinline))
-+wrap_vdupb_lane_u8_1 (uint8x8_t a)
-+{
-+  uint8_t result = vdupb_lane_u8 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupb_lane_u8 ()
-+{
-+  uint8x8_t a;
-+  uint8_t b;
-+  uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-+
-+  a = vld1_u8 (c);
-+  b = wrap_vdupb_lane_u8_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vdupb_lane_u8_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+int16_t __attribute__ ((noinline))
-+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a)
-+{
-+  int16_t result = vduph_lane_s16 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int16_t __attribute__ ((noinline))
-+wrap_vduph_lane_s16_1 (int16x4_t a)
-+{
-+  int16_t result = vduph_lane_s16 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vduph_lane_s16 ()
-+{
-+  int16x4_t a;
-+  int16_t b;
-+  int16_t c[4] = { 0, 1, 2, 3 };
-+
-+  a = vld1_s16 (c);
-+  b = wrap_vduph_lane_s16_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vduph_lane_s16_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+uint16_t __attribute__ ((noinline))
-+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a)
-+{
-+  uint16_t result = vduph_lane_u16 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+uint16_t __attribute__ ((noinline))
-+wrap_vduph_lane_u16_1 (uint16x4_t a)
-+{
-+  uint16_t result = vduph_lane_u16 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vduph_lane_u16 ()
-+{
-+  uint16x4_t a;
-+  uint16_t b;
-+  uint16_t c[4] = { 0, 1, 2, 3 };
-+
-+  a = vld1_u16 (c);
-+  b = wrap_vduph_lane_u16_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vduph_lane_u16_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+int32_t __attribute__ ((noinline))
-+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a)
-+{
-+  int32_t result = vdups_lane_s32 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int32_t __attribute__ ((noinline))
-+wrap_vdups_lane_s32_1 (int32x2_t a)
-+{
-+  int32_t result = vdups_lane_s32 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_s32 ()
-+{
-+  int32x2_t a;
-+  int32_t b;
-+  int32_t c[2] = { 0, 1 };
-+
-+  a = vld1_s32 (c);
-+  b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vdups_lane_s32_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+uint32_t __attribute__ ((noinline))
-+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a)
-+{
-+  uint32_t result = vdups_lane_u32 (a, 0);
-+  force_simd (result);
-+  return result;
-+}
-+
-+uint32_t __attribute__ ((noinline))
-+wrap_vdups_lane_u32_1 (uint32x2_t a)
-+{
-+  uint32_t result = vdups_lane_u32 (a, 1);
-+  force_simd (result);
-+  return result;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdups_lane_u32 ()
-+{
-+  uint32x2_t a;
-+  uint32_t b;
-+  uint32_t c[2] = { 0, 1 };
-+  a = vld1_u32 (c);
-+  b = wrap_vdups_lane_u32_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  b = wrap_vdups_lane_u32_1 (a);
-+  if (c[1] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+uint64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a)
-+{
-+  return vdupd_lane_u64 (a, 0);;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_u64 ()
-+{
-+  uint64x1_t a;
-+  uint64_t b;
-+  uint64_t c[1] = { 0 };
-+
-+  a = vld1_u64 (c);
-+  b = wrap_vdupd_lane_u64_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+int64_t __attribute__ ((noinline))
-+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a)
-+{
-+  return vdupd_lane_u64 (a, 0);
-+}
-+
-+int __attribute__ ((noinline))
-+test_vdupd_lane_s64 ()
-+{
-+  int64x1_t a;
-+  int64_t b;
-+  int64_t c[1] = { 0 };
-+
-+  a = vld1_s64 (c);
-+  b = wrap_vdupd_lane_s64_0 (a, a);
-+  if (c[0] != b)
-+    return 1;
-+  return 0;
-+}
-+
-+int
-+main ()
-+{
-+  if (test_vdups_lane_f32 ())
-+    abort ();
-+  if (test_vdupd_lane_f64 ())
-+    abort ();
-+  if (test_vdupb_lane_s8 ())
-+    abort ();
-+  if (test_vdupb_lane_u8 ())
-+    abort ();
-+  if (test_vduph_lane_s16 ())
-+    abort ();
-+  if (test_vduph_lane_u16 ())
-+    abort ();
-+  if (test_vdups_lane_s32 ())
-+    abort ();
-+  if (test_vdups_lane_u32 ())
-+    abort ();
-+  if (test_vdupd_lane_s64 ())
-+    abort ();
-+  if (test_vdupd_lane_u64 ())
-+    abort ();
-+  return 0;
-+}
-+
-+/* Asm check for vdupb_lane_s8, vdupb_lane_u8.  */
-+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */
-+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */
-+
-+/* Asm check for vduph_lane_h16, vduph_lane_h16.  */
-+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */
-+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */
-+
-+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
-+/* Can't generate "dup s<n>, v<m>[0]" for vdups_lane_s32 and vdups_lane_u32.  */
-+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */
-+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
-+
-+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64.  */
-+/* Attempts to make the compiler generate vdupd are not practical.  */
-+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } }
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
-@@ -0,0 +1,22 @@
-+/* Test vbslq_u64 can be folded.  */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+#include <arm_neon.h>
-+
-+/* Folds to BIC.  */
-+
-+int32x4_t
-+half_fold_int (uint32x4_t mask)
-+{
-+  int32x4_t a = {0, 0, 0, 0};
-+  int32x4_t b = {2, 4, 8, 16};
-+  return vbslq_s32 (mask, a, b);
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+/* { dg-final { scan-assembler "bic\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_2.c
-@@ -0,0 +1,28 @@
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-inline --save-temps" } */
-+
-+extern void abort (void);
-+
-+typedef float float32x2_t __attribute__ ((__vector_size__ ((8))));
-+typedef unsigned int uint32x2_t __attribute__ ((__vector_size__ ((8))));
-+
-+float32x2_t
-+test_dup_1 (float32x2_t in)
-+{
-+  return __builtin_shuffle (in, (uint32x2_t) {1, 1});
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  float32x2_t test = {2.718, 3.141};
-+  float32x2_t res = test_dup_1 (test);
-+  if (res[0] != test[1] || res[1] != test[1])
-+    abort ();
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "\[ \t\]*dup\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.s\\\[\[01\]\\\]" 1 } } */
-+/* { dg-final { scan-assembler-not "zip" } } */
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_5.c
-@@ -0,0 +1,13 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * with outgoing.
-+     * total frame size <= 512.
-+     * one subtraction of the whole frame size.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test5, 300, "x19", 8, a[8])
-+t_frame_run (test5)
---- a/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
-@@ -5,48 +5,54 @@
- 
- extern void abort (void);
- 
--int __attribute__ ((noinline))
--test_vld1_vst1 ()
--{
--  int8x8_t a;
--  int8x8_t b;
--  int i = 0;
--  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
--  int8_t d[8];
--  a = vld1_s8 (c);
--  asm volatile ("":::"memory");
--  vst1_s8 (d, a);
--  asm volatile ("":::"memory");
--  for (; i < 8; i++)
--    if (c[i] != d[i])
--      return 1;
--  return 0;
-+#define TESTMETH(TYPE, NUM, BASETYPE, SUFFIX)	\
-+int __attribute__ ((noinline))			\
-+test_vld1_vst1##SUFFIX ()			\
-+{						\
-+  TYPE vec;					\
-+  int i = 0;					\
-+  BASETYPE src[NUM];				\
-+  BASETYPE dest[NUM];				\
-+  for (i = 0; i < NUM; i++)			\
-+    src[i] = 2*i + 1;				\
-+  asm volatile ("":::"memory");			\
-+  vec = vld1 ## SUFFIX (src);			\
-+  asm volatile ("":::"memory");			\
-+  vst1 ## SUFFIX (dest, vec);			\
-+  asm volatile ("":::"memory");			\
-+  for (i = 0; i < NUM; i++)			\
-+    if (src[i] != dest[i])			\
-+      return 1;					\
-+  return 0;					\
- }
- 
--int __attribute__ ((noinline))
--test_vld1q_vst1q ()
--{
--  int16x8_t a;
--  int16x8_t b;
--  int i = 0;
--  int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
--  int16_t d[8];
--  a = vld1q_s16 (c);
--  asm volatile ("":::"memory");
--  vst1q_s16 (d, a);
--  asm volatile ("":::"memory");
--  for (; i < 8; i++)
--    if (c[i] != d[i])
--      return 1;
--  return 0;
--}
-+#define VARIANTS(THING)			\
-+THING (int8x8_t, 8, int8_t, _s8)	\
-+THING (uint8x8_t, 8, uint8_t, _u8)	\
-+THING (int16x4_t, 4, int16_t, _s16)	\
-+THING (uint16x4_t, 4, uint16_t, _u16)	\
-+THING (int32x2_t, 2, int32_t, _s32)	\
-+THING (uint32x2_t, 2, uint32_t, _u32)	\
-+THING (float32x2_t, 2, float32_t, _f32) \
-+THING (int8x16_t, 16, int8_t, q_s8)	\
-+THING (uint8x16_t, 16, uint8_t, q_u8)	\
-+THING (int16x8_t, 8, int16_t, q_s16)	\
-+THING (uint16x8_t, 8, uint16_t, q_u16)	\
-+THING (int32x4_t, 4, int32_t, q_s32)	\
-+THING (uint32x4_t, 4, uint32_t, q_u32)	\
-+THING (int64x2_t, 2, int64_t, q_s64)	\
-+THING (uint64x2_t, 2, uint64_t, q_u64)	\
-+THING (float64x2_t, 2, float64_t, q_f64)
- 
-+VARIANTS (TESTMETH)
-+
-+#define DOTEST(TYPE, NUM, BASETYPE, SUFFIX)	\
-+  if (test_vld1_vst1##SUFFIX ())		\
-+    abort ();
-+
- int
- main ()
- {
--  if (test_vld1_vst1 ())
--    abort ();
--  if (test_vld1q_vst1q ())
--    abort ();
-+  VARIANTS (DOTEST);
-   return 0;
- }
---- a/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/cvtf_1.c
-@@ -0,0 +1,95 @@
-+/* { dg-do run } */
-+/* { dg-options "-save-temps -fno-inline -O1" } */
-+
-+#define FCVTDEF(ftype,itype) \
-+void \
-+cvt_##itype##_to_##ftype (itype a, ftype b)\
-+{\
-+  ftype c;\
-+  c = (ftype) a;\
-+  if ( (c - b) > 0.00001) abort();\
-+}
-+
-+#define force_simd_for_float(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :)
-+#define force_simd_for_double(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :)
-+
-+#define FCVTDEF_SISD(ftype,itype) \
-+void \
-+cvt_##itype##_to_##ftype##_sisd (itype a, ftype b)\
-+{\
-+  ftype c;\
-+  force_simd_for_##ftype(a);\
-+  c = (ftype) a;\
-+  if ( (c - b) > 0.00001) abort();\
-+}
-+
-+#define FCVT(ftype,itype,ival,fval) cvt_##itype##_to_##ftype (ival, fval);
-+#define FCVT_SISD(ftype,itype,ival,fval) cvt_##itype##_to_##ftype##_sisd (ival, fval);
-+
-+typedef int int32_t;
-+typedef unsigned int uint32_t;
-+typedef long long int int64_t;
-+typedef unsigned long long int uint64_t;
-+
-+extern void abort();
-+
-+FCVTDEF (float, int32_t)
-+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
-+FCVTDEF (float, uint32_t)
-+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ w\[0-9\]+" } } */
-+FCVTDEF (double, int32_t)
-+/* "scvtf\td\[0-9\]+,\ w\[0-9\]+" */
-+FCVTDEF (double, uint32_t)
-+/* "ucvtf\td\[0-9\]+,\ w\[0-9\]+" */
-+FCVTDEF (float, int64_t)
-+/* "scvtf\ts\[0-9\]+,\ x\[0-9\]+" */
-+FCVTDEF (float, uint64_t)
-+/* "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" */
-+FCVTDEF (double, int64_t)
-+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
-+FCVTDEF (double, uint64_t)
-+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ x\[0-9\]+" } } */
-+FCVTDEF_SISD (float, int32_t)
-+/* { dg-final { scan-assembler "scvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
-+FCVTDEF_SISD (double, int64_t)
-+/* { dg-final { scan-assembler "scvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
-+FCVTDEF_SISD (float, uint32_t)
-+/* { dg-final { scan-assembler "ucvtf\ts\[0-9\]+,\ s\[0-9\]+" } } */
-+FCVTDEF_SISD (double, uint64_t)
-+/* { dg-final { scan-assembler "ucvtf\td\[0-9\]+,\ d\[0-9\]+" } } */
-+FCVTDEF_SISD (float, int64_t)
-+/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (float, uint64_t)
-+/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\]+,\ x\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (double, int32_t)
-+/* { dg-final { scan-assembler-times "scvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
-+FCVTDEF_SISD (double, uint32_t)
-+/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\]+,\ w\[0-9\]+" 2 } } */
-+
-+int32_t ival = -1234;
-+int64_t llival = -13031303L;
-+uint32_t uival = 1234;
-+uint64_t ullival = 13031303L;
-+
-+int main ()
-+{
-+  float x;
-+  double y;
-+
-+  FCVT (float, int32_t, ival, -1234.0);
-+  FCVT (float, uint32_t, uival, 1234.0);
-+  FCVT (float, int64_t, llival, -13031303.0);
-+  FCVT (float, uint64_t, ullival, 13031303.0);
-+  FCVT (double, int32_t, ival, -1234.0);
-+  FCVT (double, uint32_t, uival, 1234.0);
-+  FCVT (double, int64_t, llival, -13031303.0);
-+  FCVT (double, uint64_t, ullival, 13031303.0);
-+  FCVT_SISD (float, int32_t, ival, -1234.0);
-+  FCVT_SISD (double, int64_t, llival, -13031303.0);
-+  FCVT_SISD (float, uint32_t, uival, 1234.0);
-+  FCVT_SISD (double, uint64_t, ullival, 13031303.0);
-+
-+  return 0;
-+}
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c
-@@ -17,6 +17,11 @@
- };
- typedef struct _IO_FILE FILE;
- extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream);
-+extern void *memset (void *s, int c, size_t n);
-+extern void *memcpy (void *dest, const void *src, size_t n);
-+extern int fprintf (FILE *stream, const char *format, ...);
-+extern char * safe_strncpy (char *dst, const char *src, size_t size);
-+extern size_t strlen (const char *s);
- extern struct _IO_FILE *stderr;
- extern int optind;
- struct aftype {
---- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c
-@@ -0,0 +1,18 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
-+
-+typedef void FP (int);
-+
-+/* { dg-final { scan-assembler "br" } } */
-+/* { dg-final { scan-assembler-not "blr" } } */
-+void
-+f1 (FP fp, int n)
-+{
-+  (fp) (n);
-+}
-+
-+void
-+f2 (int n, FP fp)
-+{
-+  (fp) (n);
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c
-@@ -0,0 +1,54 @@
-+/* { dg-do compile } */
-+
-+#include "arm_neon.h"
-+
-+int32x4_t
-+foo (int32x4_t a, int16x4_t b, int16x4_t c, int d)
-+{
-+  return vqdmlal_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo1 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
-+{
-+  return vqdmlal_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo2 (int32x4_t a, int16x4_t b, int16x4_t c, int d)
-+{
-+  return vqdmlsl_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo3 (int32x4_t a, int16x4_t b, int16x8_t c, int d)
-+{
-+  return vqdmlsl_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo4 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
-+{
-+  return vqdmlal_high_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo5 (int32x4_t a, int16x8_t b, int16x4_t c, int d)
-+{
-+  return vqdmlsl_high_lane_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo6 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
-+{
-+  return vqdmlal_high_laneq_s16 (a, b, c, d);
-+}
-+
-+int32x4_t
-+foo7 (int32x4_t a, int16x8_t b, int16x8_t c, int d)
-+{
-+  return vqdmlsl_high_laneq_s16 (a, b, c, d);
-+}
-+
-+
-+/* { dg-excess-errors "incompatible type for argument" } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * without outgoing.
-+     * total frame size > 512.
-+     * number of callee-saved reg == 1.
-+     * split stack adjustment into two subtractions.
-+       the second subtraction should use "str !".  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test6, 700, )
-+t_frame_run (test6)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_common.h
-@@ -0,0 +1,94 @@
-+extern void abort ();
-+
-+#define CVT(v) ((unsigned char)(v))
-+
-+static void __attribute__((noinline))
-+check_args_8 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-+	      int a8)
-+{
-+  if (a0 != 0
-+      || a1 != 1
-+      || a2 != 2
-+      || a3 != 3
-+      || a4 != 4
-+      || a5 != 5
-+      || a6 != 6
-+      || a7 != 7
-+      || a8 != 8)
-+    abort ();
-+}
-+
-+static void __attribute__((noinline))
-+check_args_24 (int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-+	       int a8, int a9, int a10)
-+{
-+  if (a0 != 0
-+      || a1 != 1
-+      || a2 != 2
-+      || a3 != 3
-+      || a4 != 4
-+      || a5 != 5
-+      || a6 != 6
-+      || a7 != 7
-+      || a8 != 8
-+      || a9 != 9
-+      || a10 != 10)
-+    abort ();
-+}
-+
-+void __attribute__ ((noinline))
-+initialize_array (unsigned char *a, int len)
-+{
-+  int i;
-+
-+  for (i = 0; i < (len / 2); i++)
-+    {
-+      a[i] = i;
-+      a[len - i - 1] = i;
-+    }
-+
-+  return;
-+}
-+
-+#define t_frame_pattern(name, local_size, callee_saved)\
-+int \
-+name (void)\
-+{\
-+  unsigned char a[local_size];\
-+  initialize_array (a, local_size); \
-+  __asm__ ("":::callee_saved); \
-+  if (a[0] != a[local_size - 1] \
-+      || a[0] != 0) \
-+    return 0; \
-+  if (a[local_size / 2 - 1] != a[local_size / 2] \
-+      || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
-+    return 0; \
-+  return 1; \
-+}
-+
-+#define t_frame_pattern_outgoing(name, local_size, callee_saved, out_going_num, ...)\
-+int \
-+name (void)\
-+{\
-+  unsigned char a[local_size];\
-+  initialize_array (a, local_size); \
-+  __asm__ ("":::callee_saved); \
-+  if (a[0] != a[local_size - 1] \
-+      || a[0] != 0) \
-+    return 0; \
-+  if (a[local_size / 2 - 1] != a[local_size / 2] \
-+      || a[local_size / 2 - 1] != CVT (local_size / 2 - 1)) \
-+    return 0; \
-+  check_args_ ## out_going_num (a[0], a[1], a[2], a[3], a[4], a[5], a[6],\
-+				a[7], __VA_ARGS__); \
-+  return 1; \
-+}
-+
-+#define t_frame_run(name) \
-+int \
-+main (int argc, char **argv) \
-+{\
-+  if (!name ())\
-+    abort ();\
-+  return 0;\
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vstN_1.c
-@@ -0,0 +1,76 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define TESTMETH(BASE, ELTS, STRUCT, SUFFIX)		\
-+int __attribute__ ((noinline))				\
-+test_vst##STRUCT##SUFFIX ()				\
-+{							\
-+  BASE##_t src[ELTS * STRUCT];				\
-+  BASE##_t dest[ELTS * STRUCT];				\
-+  BASE##x##ELTS##x##STRUCT##_t vectors;			\
-+  int i,j;						\
-+  for (i = 0; i < STRUCT * ELTS; i++)			\
-+    src [i] = (BASE##_t) 2*i + 1;			\
-+  for (i = 0; i < STRUCT; i++)				\
-+    vectors.val[i] = vld1##SUFFIX (&src[i*ELTS]);	\
-+  asm volatile ("" : : : "memory");			\
-+  vst##STRUCT##SUFFIX (dest, vectors);			\
-+  asm volatile ("" : : : "memory");			\
-+  for (i = 0; i < STRUCT; i++)				\
-+    {							\
-+      for (j = 0; j < ELTS; j++)			\
-+        if (src[i*ELTS + j] != dest[i + STRUCT*j])	\
-+          return 1;					\
-+    }							\
-+  return 0;						\
-+}
-+
-+#define VARIANTS(VARIANT, STRUCT)	\
-+VARIANT (uint8, 8, STRUCT, _u8)		\
-+VARIANT (uint16, 4, STRUCT, _u16)	\
-+VARIANT (uint32, 2, STRUCT, _u32)	\
-+VARIANT (uint64, 1, STRUCT, _u64)	\
-+VARIANT (int8, 8, STRUCT, _s8)		\
-+VARIANT (int16, 4, STRUCT, _s16)	\
-+VARIANT (int32, 2, STRUCT, _s32)	\
-+VARIANT (int64, 1, STRUCT, _s64)	\
-+VARIANT (poly8, 8, STRUCT, _p8)		\
-+VARIANT (poly16, 4, STRUCT, _p16)	\
-+VARIANT (float32, 2, STRUCT, _f32)	\
-+VARIANT (float64, 1, STRUCT, _f64)	\
-+VARIANT (uint8, 16, STRUCT, q_u8)	\
-+VARIANT (uint16, 8, STRUCT, q_u16)	\
-+VARIANT (uint32, 4, STRUCT, q_u32)	\
-+VARIANT (uint64, 2, STRUCT, q_u64)	\
-+VARIANT (int8, 16, STRUCT, q_s8)	\
-+VARIANT (int16, 8, STRUCT, q_s16)	\
-+VARIANT (int32, 4, STRUCT, q_s32)	\
-+VARIANT (int64, 2, STRUCT, q_s64)	\
-+VARIANT (poly8, 16, STRUCT, q_p8)	\
-+VARIANT (poly16, 8, STRUCT, q_p16)	\
-+VARIANT (float32, 4, STRUCT, q_f32)	\
-+VARIANT (float64, 2, STRUCT, q_f64)
-+
-+/* Tests of vst2 and vst2q.  */
-+VARIANTS (TESTMETH, 2)
-+/* Tests of vst3 and vst3q.  */
-+VARIANTS (TESTMETH, 3)
-+/* Tests of vst4 and vst4q.  */
-+VARIANTS (TESTMETH, 4)
-+
-+#define CHECK(BASE, ELTS, STRUCT, SUFFIX)	\
-+  if (test_vst##STRUCT##SUFFIX () != 0)		\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  VARIANTS (CHECK, 2)
-+  VARIANTS (CHECK, 3)
-+  VARIANTS (CHECK, 4)
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmax-fmin.c
-@@ -8,11 +8,11 @@
- #include "vect-fmaxv-fminv.x"
- 
- #define DEFN_SETV(type) \
--		set_vector_##type (pR##type a, type n)   \
--		{ 				         \
--		  int i;			         \
--		  for (i=0; i<16; i++)		         \
--		    a[i] = n;				 \
-+		void set_vector_##type (pR##type a, type n)   \
-+		{					      \
-+		  int i;				      \
-+		  for (i=0; i<16; i++)			      \
-+		    a[i] = n;				      \
- 		}
- 
- #define DEFN_CHECKV(type) \
---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
-@@ -193,7 +193,6 @@
-   return b;
- }
- /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */
--/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */
- 
- Int32x1
- test_corners_sisd_si (Int32x1 b)
-@@ -207,7 +206,6 @@
-   return b;
- }
- /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */
--/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */
- 
- 
- 
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
-@@ -0,0 +1,21 @@
-+/* Test vbslq_f64 can be folded.  */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Folds to ret.  */
-+
-+float32x4_t
-+fold_me (float32x4_t a, float32x4_t b)
-+{
-+  uint32x4_t mask = {-1, -1, -1, -1};
-+  return vbslq_f32 (mask, a, b);
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-ld1r.x
-@@ -7,7 +7,7 @@
-     for (i = 0; i < 8 / sizeof (TYPE); i++) \
-       output[i] = *a; \
-   } \
--  foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
-+  void foo_ ## TYPE ## _q (TYPE *a, TYPE *output) \
-   { \
-     int i; \
-     for (i = 0; i < 32 / sizeof (TYPE); i++) \
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
-@@ -0,0 +1,21 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * with outgoing.
-+     * total frame size > 512.
-+       area except outgoing <= 512
-+     * number of callee-saved reg >= 2.
-+     * Split stack adjustment into two subtractions.
-+       the first subtractions could be optimized into "stp !".  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
-+t_frame_run (test10)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c
-@@ -0,0 +1,105 @@
-+/* Test vrnd_f64 works correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include "arm_neon.h"
-+
-+extern void abort (void);
-+
-+/* Bit offset to round mode field in FPCR.  */
-+#define RMODE_START 22
-+
-+#define FPROUNDING_ZERO 3
-+
-+/* Set RMODE field of FPCR control register
-+   to rounding mode passed.  */
-+void __inline __attribute__ ((__always_inline__))
-+set_rounding_mode (uint32_t mode)
-+{
-+  uint32_t r;
-+
-+  /* Read current FPCR.  */
-+  asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :);
-+
-+  /* Clear rmode.  */
-+  r &= ~(3 << RMODE_START);
-+  /* Calculate desired FPCR.  */
-+  r |= mode << RMODE_START;
-+
-+  /* Write desired FPCR back.  */
-+  asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :);
-+}
-+
-+float64x1_t __attribute__ ((noinline))
-+compare_f64 (float64x1_t passed, float64_t expected)
-+{
-+  return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected)
-+	  > __DBL_EPSILON__);
-+}
-+
-+void __attribute__ ((noinline))
-+run_round_tests (float64x1_t *tests,
-+		 float64_t expectations[][6])
-+{
-+  int i;
-+
-+  for (i = 0; i < 6; i++)
-+    {
-+      if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i]))
-+	abort ();
-+      if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i]))
-+	abort ();
-+      if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i]))
-+	abort ();
-+      if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i]))
-+	abort ();
-+      if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i]))
-+	abort ();
-+      if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i]))
-+	abort ();
-+      if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i]))
-+	abort ();
-+    }
-+}
-+
-+int
-+main (int argc, char **argv)
-+{
-+  float64x1_t tests[6] =
-+    {
-+      vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5.  */
-+      vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4.  */
-+      vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6.  */
-+      vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5.  */
-+      vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4.  */
-+      vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6.  */
-+    };
-+
-+  float64_t expectations[7][6] =
-+  {
-+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrnd - round towards zero.  */
-+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndx - round using FPCR mode.  */
-+    { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 },    /* vrndp - round to plus infinity.  */
-+    { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 },   /* vrndn - round ties to even.  */
-+    { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity.  */
-+    { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },    /* vrndi - round using FPCR mode.  */
-+    { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 },  /* vrnda - round ties away from 0.  */
-+  };
-+
-+  /* Set floating point control register
-+     to have predictable vrndx and vrndi behaviour.  */
-+  set_rounding_mode (FPROUNDING_ZERO);
-+
-+  run_round_tests (tests, expectations);
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c
-@@ -305,13 +305,28 @@
-   return res;
- }
- 
--/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
-+/* { dg-final { scan-assembler-times "\\tfaddp\\td\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
- 
-+float64_t
-+test_vpaddd_f64 (float64x2_t a)
-+{
-+  return vpaddd_f64 (a);
-+}
-+
-+/* { dg-final { scan-assembler-times "\\taddp\\td\[0-9\]+, v\[0-9\]+\.2d" 2 } } */
-+
-+int64_t
- test_vpaddd_s64 (int64x2_t a)
- {
-   return vpaddd_s64 (a);
- }
- 
-+uint64_t
-+test_vpaddd_u64 (uint64x2_t a)
-+{
-+  return vpaddd_u64 (a);
-+}
-+
- /* { dg-final { scan-assembler-times "\\tuqadd\\td\[0-9\]+" 1 } } */
- 
- uint64x1_t
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
-@@ -0,0 +1,20 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * without outgoing.
-+     * total frame size > 512.
-+     * number of callee-saved reg == 2.
-+     * split stack adjustment into two subtractions.
-+       the second subtraction should use "stp !".  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test7, 700, "x19")
-+t_frame_run (test7)
-+
-+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 2 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/pic-symrefplus.c
-@@ -34,6 +34,9 @@
-   values [];
- };
- extern const struct locale_data _nl_C_LC_TIME __attribute__ ((visibility ("hidden")));
-+extern void *memset (void *s, int c, size_t n);
-+extern size_t strlen (const char *s);
-+extern int __strncasecmp_l (const char *s1, const char *s2, size_t n, __locale_t locale);
- char *
- __strptime_internal (rp, fmt, tmp, statep , locale)
-      const char *rp;
-@@ -40,6 +43,7 @@
-      const char *fmt;
-      __locale_t locale;
-      void *statep;
-+     int tmp;
- {
-   struct locale_data *const current = locale->__locales[__LC_TIME];
-   const char *rp_backup;
-@@ -124,5 +128,9 @@
- }
- char *
- __strptime_l (buf, format, tm , locale)
-+     int buf;
-+     int format;
-+     int tm;
-+     int locale;
- {
- }
---- a/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
-@@ -0,0 +1,24 @@
-+/* Test vbslq_f64 can be folded.  */
-+/* { dg-do assemble } */
-+/* { dg-options "--save-temps -O3" } */
-+
-+#include <arm_neon.h>
-+
-+/* Should fold out one half of the BSL, leaving just a BIC.  */
-+
-+float32x4_t
-+half_fold_me (uint32x4_t mask)
-+{
-+  float32x4_t a = {0.0, 0.0, 0.0, 0.0};
-+  float32x4_t b = {2.0, 4.0, 8.0, 16.0};
-+  return vbslq_f32 (mask, a, b);
-+
-+}
-+
-+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
-+/* { dg-final { scan-assembler-not "bit\\tv" } } */
-+/* { dg-final { scan-assembler-not "bif\\tv" } } */
-+/* { dg-final { scan-assembler "bic\\tv" } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
-+
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_11.c
-@@ -0,0 +1,16 @@
-+/* Verify:
-+     * without outgoing.
-+     * total frame size <= 512.
-+     * number of callee-save reg >= 2.
-+     * optimized code should use "stp !" for stack adjustment.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern (test11, 400, )
-+t_frame_run (test11)
-+
-+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c
-@@ -0,0 +1,47 @@
-+/* Test vqneg_s64 intrinsics work correctly.  */
-+/* { dg-do run } */
-+/* { dg-options "--save-temps" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+int __attribute__ ((noinline))
-+test_vqneg_s64 (int64x1_t passed, int64_t expected)
-+{
-+  return vget_lane_s64 (vqneg_s64 (passed), 0) != expected;
-+}
-+
-+int __attribute__ ((noinline))
-+test_vqnegd_s64 (int64_t passed, int64_t expected)
-+{
-+  return vqnegd_s64 (passed) != expected;
-+}
-+
-+/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */
-+
-+int
-+main (int argc, char **argv)
-+{
-+  /* Basic test.  */
-+  if (test_vqneg_s64 (vcreate_s64 (-1), 1))
-+    abort ();
-+  if (test_vqnegd_s64 (-1, 1))
-+    abort ();
-+
-+  /* Negating max int64_t.  */
-+  if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001))
-+    abort ();
-+  if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001))
-+    abort ();
-+
-+  /* Negating min int64_t.
-+     Note, exact negation cannot be represented as int64_t.  */
-+  if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff))
-+    abort ();
-+  if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff))
-+    abort ();
-+
-+  return 0;
-+}
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_low_1.c
-@@ -0,0 +1,60 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -std=c99" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT)				\
-+VARIANT (uint8_t, 8, uint8x8_t, uint8x16_t, u8)		\
-+VARIANT (uint16_t, 4, uint16x4_t, uint16x8_t, u16)	\
-+VARIANT (uint32_t, 2, uint32x2_t, uint32x4_t, u32)	\
-+VARIANT (uint64_t, 1, uint64x1_t, uint64x2_t, u64)	\
-+VARIANT (int8_t, 8, int8x8_t, int8x16_t, s8)		\
-+VARIANT (int16_t, 4, int16x4_t, int16x8_t, s16)		\
-+VARIANT (int32_t, 2, int32x2_t, int32x4_t, s32)		\
-+VARIANT (int64_t, 1, int64x1_t, int64x2_t, s64)		\
-+VARIANT (float32_t, 2, float32x2_t, float32x4_t, f32)	\
-+VARIANT (float64_t, 1, float64x1_t, float64x2_t, f64)
-+
-+
-+#define TESTMETH(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)	\
-+int								\
-+test_vget_low_ ##SUFFIX (BASETYPE *data)			\
-+{								\
-+  BASETYPE temp [NUM64];					\
-+  TYPE128 vec = vld1q_##SUFFIX (data);				\
-+  TYPE64 low = vget_low_##SUFFIX (vec);				\
-+  vst1_##SUFFIX (temp, low);					\
-+  for (int i = 0; i < NUM64; i++)				\
-+    if (temp[i] != data[i])					\
-+      return 1;							\
-+  return 0;							\
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, NUM64, TYPE64, TYPE128, SUFFIX)		\
-+  if (test_vget_low_##SUFFIX (BASETYPE ## _ ## data) != 0)	\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  uint8_t uint8_t_data[16] =
-+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+  int8_t int8_t_data[16] =
-+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+  VARIANTS (CHECK);
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
-@@ -0,0 +1,18 @@
-+/* Verify:
-+     * -fomit-frame-pointer.
-+     * with outgoing.
-+     * total frame size bigger than 512.
-+     * number of callee-saved reg == 1.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test8, 700, , 8, a[8])
-+t_frame_run (test8)
-+
-+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
-+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/vset_lane_1.c
-@@ -0,0 +1,85 @@
-+/* { dg-do run } */
-+/* { dg-options "-O3 -fno-inline" } */
-+
-+#include <arm_neon.h>
-+
-+extern void abort (void);
-+
-+#define VARIANTS(VARIANT)			\
-+VARIANT (uint8_t, , 8, uint8x8_t, _u8, 5)	\
-+VARIANT (uint16_t, , 4, uint16x4_t, _u16, 3)	\
-+VARIANT (uint32_t, , 2, uint32x2_t, _u32, 1)	\
-+VARIANT (uint64_t, , 1, uint64x1_t, _u64, 0)	\
-+VARIANT (int8_t, , 8, int8x8_t, _s8, 6)		\
-+VARIANT (int16_t, , 4, int16x4_t, _s16, 2)	\
-+VARIANT (int32_t, , 2, int32x2_t, _s32, 0)	\
-+VARIANT (int64_t, , 1, int64x1_t, _s64, 0)	\
-+VARIANT (poly8_t, , 8, poly8x8_t, _p8, 6)	\
-+VARIANT (poly16_t, , 4, poly16x4_t, _p16, 2)	\
-+VARIANT (float32_t, , 2, float32x2_t, _f32, 1)	\
-+VARIANT (float64_t, , 1, float64x1_t, _f64, 0)	\
-+VARIANT (uint8_t, q, 16, uint8x16_t, _u8, 11)	\
-+VARIANT (uint16_t, q, 8, uint16x8_t, _u16, 7)	\
-+VARIANT (uint32_t, q, 4, uint32x4_t, _u32, 2)	\
-+VARIANT (uint64_t, q, 2, uint64x2_t, _u64, 1)	\
-+VARIANT (int8_t, q, 16, int8x16_t, _s8, 13)	\
-+VARIANT (int16_t, q, 8, int16x8_t, _s16, 5)	\
-+VARIANT (int32_t, q, 4, int32x4_t, _s32, 3)	\
-+VARIANT (int64_t, q, 2, int64x2_t, _s64, 0)	\
-+VARIANT (poly8_t, q, 16, poly8x16_t, _p8, 14)	\
-+VARIANT (poly16_t, q, 8, poly16x8_t, _p16, 6)	\
-+VARIANT (float32_t, q, 4, float32x4_t, _f32, 2) \
-+VARIANT (float64_t, q, 2, float64x2_t, _f64, 1)
-+
-+#define TESTMETH(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX)	\
-+int							\
-+test_vset_lane ##Q##SUFFIX (BASETYPE *data)		\
-+{							\
-+  BASETYPE temp [NUM];					\
-+  TYPE vec = vld1##Q##SUFFIX (data);			\
-+  TYPE vec2;						\
-+  BASETYPE changed = data[INDEX] - INDEX;		\
-+  int check;						\
-+  vec = vset##Q##_lane##SUFFIX (changed, vec, INDEX);	\
-+  asm volatile ("orr %0.16b, %1.16b, %1.16b"		\
-+		: "=w"(vec2) : "w" (vec) : );		\
-+  vst1##Q##SUFFIX (temp, vec2);				\
-+  for (check = 0; check < NUM; check++)			\
-+    {							\
-+      BASETYPE desired = data[check];			\
-+      if (check==INDEX) desired = changed;		\
-+      if (temp[check] != desired)			\
-+        return 1;					\
-+    }							\
-+  return 0;						\
-+}
-+
-+VARIANTS (TESTMETH)
-+
-+#define CHECK(BASETYPE, Q, NUM, TYPE, SUFFIX, INDEX)		\
-+  if (test_vset_lane##Q##SUFFIX (BASETYPE ## _ ## data) != 0)	\
-+    abort ();
-+
-+int
-+main (int argc, char **argv)
-+{
-+  uint8_t uint8_t_data[16] =
-+      { 1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47 };
-+  uint16_t uint16_t_data[8] = { 1, 22, 333, 4444, 55555, 6666, 777, 88 };
-+  uint32_t uint32_t_data[4] = { 65537, 11, 70000, 23 };
-+  uint64_t uint64_t_data[2] = { 0xdeadbeefcafebabeULL, 0x0123456789abcdefULL };
-+  int8_t int8_t_data[16] =
-+      { -1, -3, -5, -7, 9, -11, -13, 15, -17, -19, 21, -23, 25, 27, -29, -31 };
-+  int16_t int16_t_data[8] = { -17, 19, 3, -999, 44048, 505, 9999, 1000};
-+  int32_t int32_t_data[4] = { 123456789, -987654321, -135792468, 975318642 };
-+  int64_t int64_t_data[2] = {0xfedcba9876543210LL, 0xdeadbabecafebeefLL };
-+  poly8_t poly8_t_data[16] =
-+      { 0, 7, 13, 18, 22, 25, 27, 28, 29, 31, 34, 38, 43, 49, 56, 64 };
-+  poly16_t poly16_t_data[8] = { 11111, 2222, 333, 44, 5, 65432, 54321, 43210 };
-+  float32_t float32_t_data[4] = { 3.14159, 2.718, 1.414, 100.0 };
-+  float64_t float64_t_data[2] = { 1.01001000100001, 12345.6789 };
-+
-+  VARIANTS (CHECK);
-+
-+  return 0;
-+}
---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
-+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
-@@ -0,0 +1,19 @@
-+/* Verify:
-+     * with outgoing.
-+     * total frame size <= 512.
-+     * number of callee-save reg >= 2.  */
-+
-+/* { dg-do run } */
-+/* { dg-options "-O2 --save-temps" } */
-+
-+#include "test_frame_common.h"
-+
-+t_frame_pattern_outgoing (test12, 400, , 8, a[8])
-+t_frame_run (test12)
-+
-+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-+
-+/* Check epilogue using write-back.  */
-+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */
-+
-+/* { dg-final { cleanup-saved-temps } } */
---- a/src/gcc/testsuite/lib/gcc.exp
-+++ b/src/gcc/testsuite/lib/gcc.exp
-@@ -126,7 +126,9 @@
-     global GCC_UNDER_TEST
-     global TOOL_OPTIONS
-     global TEST_ALWAYS_FLAGS
--	
-+    global flags_to_postpone
-+    global board_info
-+
-     if {[target_info needs_status_wrapper] != "" && \
- 	    [target_info needs_status_wrapper] != "0" && \
- 	    [info exists gluefile] } {
-@@ -162,8 +164,26 @@
- 	set options [concat "{additional_flags=$TOOL_OPTIONS}" $options]
-     }
- 
-+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+    # appended here to multilib_flags as it can be overridden by the latter
-+    # if it was added earlier. After the target_compile, multilib_flags is
-+    # restored to its orignal content.
-+    set tboard [target_info name]
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+    }
-+
-     lappend options "timeout=[timeout_value]"
-     lappend options "compiler=$GCC_UNDER_TEST"
-     set options [dg-additional-files-options $options $source]
--    return [target_compile $source $dest $type $options]
-+    set return_val [target_compile $source $dest $type $options]
-+
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set board_info($tboard,multilib_flags) $orig_multilib_flags
-+        set flags_to_postpone ""
-+    }
-+
-+    return $return_val
- }
-+
---- a/src/gcc/testsuite/lib/g++.exp
-+++ b/src/gcc/testsuite/lib/g++.exp
-@@ -288,6 +288,8 @@
-     global gluefile wrap_flags
-     global ALWAYS_CXXFLAGS
-     global GXX_UNDER_TEST
-+    global flags_to_postpone
-+    global board_info
- 
-     if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
- 	lappend options "libs=${gluefile}"
-@@ -313,10 +315,25 @@
- 	exec rm -f $rponame
-     }
- 
-+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+    # appended here to multilib_flags as it can be overridden by the latter
-+    # if it was added earlier. After the target_compile, multilib_flags is
-+    # restored to its orignal content.
-+    set tboard [target_info name]
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+    }
-+
-     set options [dg-additional-files-options $options $source]
- 
-     set result [target_compile $source $dest $type $options]
- 
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set board_info($tboard,multilib_flags) $orig_multilib_flags
-+        set flags_to_postpone ""
-+    }
-+
-     return $result
- }
- 
---- a/src/gcc/testsuite/lib/wrapper.exp
-+++ b/src/gcc/testsuite/lib/wrapper.exp
-@@ -34,9 +34,11 @@
- 	# became true for dejagnu-1.4.4.  The set of warnings and code
- 	# that gcc objects on may change, so just make sure -w is always
- 	# passed to turn off all warnings.
-+	unset_currtarget_info wrap_compile_flags
- 	set_currtarget_info wrap_compile_flags \
- 	    "$saved_wrap_compile_flags -w $flags"
- 	set result [build_wrapper $filename]
-+	unset_currtarget_info wrap_compile_flags
- 	set_currtarget_info wrap_compile_flags "$saved_wrap_compile_flags"
- 	if { $result != "" } {
- 	    set gluefile [lindex $result 0]
---- a/src/gcc/testsuite/lib/compat.exp
-+++ b/src/gcc/testsuite/lib/compat.exp
-@@ -134,7 +134,6 @@
- 		     "$options"]
-     if ![${tool}_check_compile "$testcase $testname link" "" \
- 	 $dest $comp_output] then {
--	unresolved "$testcase $testname execute $optstr"
- 	return
-     }
- 
---- a/src/gcc/testsuite/lib/gcc-defs.exp
-+++ b/src/gcc/testsuite/lib/gcc-defs.exp
-@@ -54,14 +54,19 @@
-     if { [info proc ${tool}-dg-prune] != "" } {
- 	global target_triplet
- 	set gcc_output [${tool}-dg-prune $target_triplet $gcc_output]
-+	if [string match "*::unsupported::*" $gcc_output] then {
-+	    regsub -- "::unsupported::" $gcc_output "" gcc_output
-+	    unsupported "$testcase: $gcc_output"
-+	    return 0
-+	}
-+    } else {
-+	set unsupported_message [${tool}_check_unsupported_p $gcc_output]
-+	if { $unsupported_message != "" } {
-+	    unsupported "$testcase: $unsupported_message"
-+	    return 0
-+	}
-     }
- 
--    set unsupported_message [${tool}_check_unsupported_p $gcc_output]
--    if { $unsupported_message != "" } {
--	unsupported "$testcase: $unsupported_message"
--	return 0
--    }
--
-     # remove any leftover LF/CR to make sure any output is legit
-     regsub -all -- "\[\r\n\]*" $gcc_output "" gcc_output
- 
---- a/src/gcc/testsuite/lib/gfortran.exp
-+++ b/src/gcc/testsuite/lib/gfortran.exp
-@@ -234,6 +234,8 @@
-     global gluefile wrap_flags
-     global ALWAYS_GFORTRANFLAGS
-     global GFORTRAN_UNDER_TEST
-+    global flags_to_postpone
-+    global board_info
- 
-     if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
- 	lappend options "libs=${gluefile}"
-@@ -240,10 +242,27 @@
- 	lappend options "ldflags=${wrap_flags}"
-     }
- 
-+    # bind_pic_locally adds -fpie/-fPIE flags to flags_to_postpone and it is
-+    # appended here to multilib_flags as it can be overridden by the latter
-+    # if it was added earlier. After the target_compile, multilib_flags is
-+    # restored to its orignal content.
-+    set tboard [target_info name]
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set orig_multilib_flags "[board_info [target_info name] multilib_flags]"
-+        append board_info($tboard,multilib_flags) " $flags_to_postpone"
-+    }
-+
-     lappend options "compiler=$GFORTRAN_UNDER_TEST"
-     lappend options "timeout=[timeout_value]"
- 
-     set options [concat "$ALWAYS_GFORTRANFLAGS" $options]
-     set options [dg-additional-files-options $options $source]
--    return [target_compile $source $dest $type $options]
-+    set return_val [target_compile $source $dest $type $options]
-+
-+    if {[board_info $tboard exists multilib_flags]} {
-+        set board_info($tboard,multilib_flags) $orig_multilib_flags
-+        set flags_to_postpone ""
-+    }
-+
-+    return $return_val
- }
---- a/src/gcc/testsuite/lib/target-supports.exp
-+++ b/src/gcc/testsuite/lib/target-supports.exp
-@@ -2261,7 +2261,7 @@
-     }]
- }
- 
--# Return 1 is this is an arm target using 32-bit instructions
-+# Return 1 if this is an arm target using 32-bit instructions
- proc check_effective_target_arm32 { } {
-     return [check_no_compiler_messages arm32 assembly {
- 	#if !defined(__arm__) || (defined(__thumb__) && !defined(__thumb2__))
-@@ -2270,10 +2270,10 @@
-     }]
- }
- 
--# Return 1 is this is an arm target not using Thumb
-+# Return 1 if this is an arm target not using Thumb
- proc check_effective_target_arm_nothumb { } {
-     return [check_no_compiler_messages arm_nothumb assembly {
--	#if (defined(__thumb__) || defined(__thumb2__))
-+	#if !defined(__arm__) || (defined(__thumb__) || defined(__thumb2__))
- 	#error FOO
- 	#endif
-     }]
-@@ -2394,6 +2394,7 @@
- 	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=crypto-neon-fp-armv8" "-mfpu=crypto-neon-fp-armv8 -mfloat-abi=softfp"} {
- 	    if { [check_no_compiler_messages_nocache arm_crypto_ok object {
- 		#include "arm_neon.h"
-+		extern uint8x16_t vaeseq_u8 (uint8x16_t, uint8x16_t);
- 		uint8x16_t
- 		foo (uint8x16_t a, uint8x16_t b)
- 		{
-@@ -2538,6 +2539,7 @@
- 	               "-mfpu=neon-fp16 -mfloat-abi=softfp"} {
- 	    if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
- 		#include "arm_neon.h"
-+		extern float16x4_t vcvt_f16_f32 (float32x4_t);
- 		float16x4_t
- 		foo (float32x4_t arg)
- 		{
-@@ -2613,6 +2615,7 @@
- 	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
- 	    if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
- 		#include "arm_neon.h"
-+		extern float32x2_t vfma_f32 (float32x2_t, float32x2_t, float32x2_t);
- 		float32x2_t 
- 		foo (float32x2_t a, float32x2_t b, float32x2_t c)
-                 {
-@@ -3324,6 +3327,43 @@
-     return $et_vect_shift_saved
- }
- 
-+proc check_effective_target_whole_vector_shift { } {
-+    if { [istarget x86_64-*-*]
-+	 || [istarget ia64-*-*]
-+	 || ([check_effective_target_arm32]
-+	     && [check_effective_target_arm_little_endian])
-+	 || ([istarget mips*-*-*]
-+	     && [check_effective_target_mips_loongson]) } {
-+	set answer 1
-+    } else {
-+	set answer 0
-+    }
-+
-+    verbose "check_effective_target_vect_long: returning $answer" 2
-+    return $answer
-+}
-+
-+# Return 1 if the target supports vector bswap operations.
-+
-+proc check_effective_target_vect_bswap { } {
-+    global et_vect_bswap_saved
-+
-+    if [info exists et_vect_bswap_saved] {
-+	verbose "check_effective_target_vect_bswap: using cached result" 2
-+    } else {
-+	set et_vect_bswap_saved 0
-+	if { [istarget aarch64*-*-*]
-+             || ([istarget arm*-*-*]
-+                && [check_effective_target_arm_neon])
-+	   } {
-+	   set et_vect_bswap_saved 1
-+	}
-+    }
-+
-+    verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2
-+    return $et_vect_bswap_saved
-+}
-+
- # Return 1 if the target supports hardware vector shift operation for char.
- 
- proc check_effective_target_vect_shift_char { } {
-@@ -3522,8 +3562,7 @@
-     } else {
-         set et_vect_perm_saved 0
-         if { [is-effective-target arm_neon_ok]
--	     || ([istarget aarch64*-*-*]
--		 && [is-effective-target aarch64_little_endian])
-+	     || [istarget aarch64*-*-*]
- 	     || [istarget powerpc*-*-*]
-              || [istarget spu-*-*]
- 	     || [istarget i?86-*-*]
-@@ -5206,16 +5245,26 @@
-     return $flags
- }
- 
-+if {![info exists flags_to_postpone]} {
-+    set flags_to_postpone ""
-+}
-+
- # Add to FLAGS the flags needed to enable functions to bind locally
- # when using pic/PIC passes in the testsuite.
-+proc add_options_for_bind_pic_locally { flags } {
-+    global flags_to_postpone
- 
--proc add_options_for_bind_pic_locally { flags } {
-+    # Instead of returning 'flags' with the -fPIE or -fpie appended, we save it
-+    # in 'flags_to_postpone' and append it later in gcc_target_compile procedure in
-+    # order to make sure that the multilib_flags doesn't override this.
-+
-     if {[check_no_compiler_messages using_pic2 assembly {
-         #if __PIC__ != 2
-         #error FOO
-         #endif
-     }]} {
--	return "$flags -fPIE"
-+        set flags_to_postpone "-fPIE"
-+        return $flags
-     }
-     if {[check_no_compiler_messages using_pic1 assembly {
-         #if __PIC__ != 1
-@@ -5222,9 +5271,9 @@
-         #error FOO
-         #endif
-     }]} {
--	return "$flags -fpie"
-+        set flags_to_postpone "-fpie"
-+        return $flags
-     }
--
-     return $flags
- }
- 
---- a/src/gcc/testsuite/ChangeLog.linaro
-+++ b/src/gcc/testsuite/ChangeLog.linaro
-@@ -0,0 +1,1031 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2015-01-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r218451.
-+	2014-12-06  James Greenhalgh  <james.greenhalgh@arm.com>
-+		    Sebastian Pop  <s.pop@samsung.com>
-+		    Brian Rzycki  <b.rzycki@samsung.com>
-+
-+	PR tree-optimization/54742
-+	* gcc.dg/tree-ssa/ssa-dom-thread-6.c: New test.
-+	* gcc.dg/tree-ssa/ssa-dom-thread-7.c: New test.
-+
-+2015-01-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211075.
-+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	gcc.target/arm/simd/vrev16p8_1.c: New file.
-+	gcc.target/arm/simd/vrev16qp8_1.c: New file.
-+	gcc.target/arm/simd/vrev16qs8_1.c: New file.
-+	gcc.target/arm/simd/vrev16qu8_1.c: New file.
-+	gcc.target/arm/simd/vrev16s8_1.c: New file.
-+	gcc.target/arm/simd/vrev16u8_1.c: New file.
-+	gcc.target/arm/simd/vrev32p16_1.c: New file.
-+	gcc.target/arm/simd/vrev32p8_1.c: New file.
-+	gcc.target/arm/simd/vrev32qp16_1.c: New file.
-+	gcc.target/arm/simd/vrev32qp8_1.c: New file.
-+	gcc.target/arm/simd/vrev32qs16_1.c: New file.
-+	gcc.target/arm/simd/vrev32qs8_1.c: New file.
-+	gcc.target/arm/simd/vrev32qu16_1.c: New file.
-+	gcc.target/arm/simd/vrev32qu8_1.c: New file.
-+	gcc.target/arm/simd/vrev32s16_1.c: New file.
-+	gcc.target/arm/simd/vrev32s8_1.c: New file.
-+	gcc.target/arm/simd/vrev32u16_1.c: New file.
-+	gcc.target/arm/simd/vrev32u8_1.c: New file.
-+	gcc.target/arm/simd/vrev64f32_1.c: New file.
-+	gcc.target/arm/simd/vrev64p16_1.c: New file.
-+	gcc.target/arm/simd/vrev64p8_1.c: New file.
-+	gcc.target/arm/simd/vrev64qf32_1.c: New file.
-+	gcc.target/arm/simd/vrev64qp16_1.c: New file.
-+	gcc.target/arm/simd/vrev64qp8_1.c: New file.
-+	gcc.target/arm/simd/vrev64qs16_1.c: New file.
-+	gcc.target/arm/simd/vrev64qs32_1.c: New file.
-+	gcc.target/arm/simd/vrev64qs8_1.c: New file.
-+	gcc.target/arm/simd/vrev64qu16_1.c: New file.
-+	gcc.target/arm/simd/vrev64qu32_1.c: New file.
-+	gcc.target/arm/simd/vrev64qu8_1.c: New file.
-+	gcc.target/arm/simd/vrev64s16_1.c: New file.
-+	gcc.target/arm/simd/vrev64s32_1.c: New file.
-+	gcc.target/arm/simd/vrev64s8_1.c: New file.
-+	gcc.target/arm/simd/vrev64u16_1.c: New file.
-+	gcc.target/arm/simd/vrev64u32_1.c: New file.
-+	gcc.target/arm/simd/vrev64u8_1.c: New file.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209620.
-+	2014-04-22  Vidya Praveen  <vidyapraveen@arm.com>
-+
-+	* gcc.target/aarch64/cvtf_1.c: New.
-+
-+2015-01-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217362.
-+	2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* gcc.target/aarch64/vbslq_f64_1.c: New.
-+	* gcc.target/aarch64/vbslq_f64_2.c: Likewise.
-+	* gcc.target/aarch64/vbslq_u64_1.c: Likewise.
-+	* gcc.target/aarch64/vbslq_u64_2.c: Likewise.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r217742.
-+	2014-11-18  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	PR target/63937
-+	* gcc.dg/memset-2.c: New.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216638.
-+	2014-10-24  Christophe Lyon  <christophe.lyon@linaro.org>
-+
-+	* lib/wrapper.exp ({tool}_maybe_build_wrapper): Clear
-+	wrap_compile_flags before setting it.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216544.
-+	2014-10-22  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/pic-constantpool1.c: Add explicit declaration.
-+	* gcc.target/aarch64/pic-symrefplus.c: Likewise.
-+	* gcc.target/aarch64/reload-valid-spoff.c: Likewise.
-+	* gcc.target/aarch64/vect.x: Likewise.
-+	* gcc.target/aarch64/vect-ld1r.x: Add return type.
-+	* gcc.target/aarch64/vect-fmax-fmin.c: Likewise.
-+	* gcc.target/aarch64/vect-fp.c: Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216543.
-+	2014-10-22  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* lib/compat.exp (compat-run): Remove "unresolved".
-+	* lib/gcc-defs.exp (${tools}_check_compile): Update code logic for
-+	unsupported testcase.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r216517.
-+	2014-10-21  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/arm/20031108-1.c (Proc_7): Add explicit declaration.
-+	(Proc_1): Add return type.
-+	* gcc.target/arm/cold-lc.c (show_stack): Add explict declaration.
-+	* gcc.target/arm/neon-modes-2.c (foo): Likewise.
-+	* gcc.target/arm/pr43920-2.c (lseek): Likewise.
-+	* gcc.target/arm/pr44788.c (foo): Likewise.
-+	* gcc.target/arm/pr55642.c (abs): Likewise.
-+	* gcc.target/arm/pr58784.c (f): Likewise.
-+	* gcc.target/arm/pr60650.c (foo1, foo2): Likewise.
-+	* gcc.target/arm/vfp-ldmdbs.c (bar): Likewise.
-+	* gcc.target/arm/vfp-ldmias.c (bar): Likewise.
-+	* gcc.target/arm/pr60650-2.c (fn1, fn2): Add return type and add type
-+	for local variables.
-+	* lib/target-supports.exp
-+	(check_effective_target_arm_crypto_ok_nocache): Add declaration for
-+	vaeseq_u8.
-+	(check_effective_target_arm_neon_fp16_ok_nocache): Add declaration for
-+	vcvt_f16_f32.
-+	(check_effective_target_arm_neonv2_ok_nocache): Add declaration for
-+	vfma_f32.
-+	* gcc.target/arm/pr51968.c: Add -Wno-implicit-function-declaration.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+ 
-+	Backport from trunk r215071.
-+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/int_comparisons_1.c: Tighten regexp.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215540.
-+	2014-09-24  Zhenqiang Chen  <zhenqiang.chen@arm.com>
-+
-+	* gcc.target/arm/pr63210.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215475.
-+	2014-09-22  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.dg/vect/vect-reduc-or_1.c: New test.
-+	* gcc.dg/vect/vect-reduc-or_2.c: Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215473.
-+	2014-09-22  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* lib/target-supports.exp (check_effective_target_whole_vector_shift):
-+	New.
-+
-+	* gcc.dg/vect/vect-reduc-mul_1.c: New test.
-+	* gcc.dg/vect/vect-reduc-mul_2.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215177.
-+	2014-09-11  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vset_lane_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215129.
-+	2014-09-10  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vstN_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215126.
-+	2014-09-10  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vldN_lane_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215078.
-+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vldN_dup_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215077.
-+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vld1-vst1_1.c: Rewrite to test all variants.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215072.
-+	2014-09-09  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vldN_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215047.
-+	2014-09-09  Tony Wang  <tony.wang@arm.com>
-+
-+	* gcc.target/arm/xordi3-opt.c: Disable this
-+	test case for thumb1 target.
-+	* gcc.target/arm/iordi3-opt.c: Ditto.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215046.
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/61749
-+	* gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c: New test.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214950.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vget_high_1.c: New test.
-+	* gcc.target/aarch64/vget_low_1.c: Likewise.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214948.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/int_comparisons.x: New file.
-+	* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
-+	* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
-+
-+2014-12-04  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213382.
-+	2014-07-31  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* gcc.target/aarch64/scalar_intrinsics.c (test_vpaddd_f64): New.
-+	(test_vpaddd_s64): Likewise.
-+	(test_vpaddd_s64): Likewise.
-+	* gcc.target/aarch64/simd/vpaddd_f64: New.
-+	* gcc.target/aarch64/simd/vpaddd_s64: New.
-+	* gcc.target/aarch64/simd/vpaddd_u64: New.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-10-08  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214825, r214826, r215085.
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/vect-lceilf_1.c: Make input and output arrays global
-+	and 16-byte aligned.
-+	* gcc.target/arm/vect-lfloorf_1.c: Likewise.
-+	* gcc.target/arm/vect-lroundf_1.c: Likewise.
-+	* gcc.target/arm/vect-rounding-btruncf.c: Likewise.
-+	* gcc.target/arm/vect-rounding-ceilf.c: Likewise.
-+	* gcc.target/arm/vect-rounding-floorf.c: Likewise.
-+	* gcc.target/arm/vect-rounding-roundf.c: Likewise.
-+
-+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/62275
-+	* gcc.target/arm/vect-lceilf_1.c: New test.
-+	* gcc.target/arm/vect-lfloorf_1.c: Likewise.
-+	* gcc.target/arm/vect-lroundf_1.c: Likewise.
-+
-+	2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/62275
-+	* gcc.target/arm/lceil-vcvt_1.c: New test.
-+	* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
-+	* gcc.target/arm/lround-vcvt_1.c: Likewise.
-+
-+2014-10-06  Venkataramanan Kumar  <venkataramanan.kumar@linaro.org>
-+
-+	Backport from trunk r214943.
-+	2014-09-05  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/vrbit_1.c: New test.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215385.
-+	2014-09-19  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* gcc.dg/ssp-3.c: New.
-+	* gcc.dg/ssp-4.c: Likewise.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215136.
-+	2014-09-10  Xinliang David Li  <davidxl@google.com>
-+
-+	PR target/63209
-+	* gcc.c-torture/execute/pr63209.c: New test.
-+
-+2014-10-06  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215067.
-+	2014-09-09  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/arm/vect-copysignf.c: New testcase.
-+
-+2014-10-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r215050, r215051, r215052, r215053, r215054.
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/vfp-1.c: Updated expected assembly.
-+
-+	2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/pr51835.c: Update expected assembly.
-+	* gcc.target/arm/vfp-1.c: Likewise.
-+	* gcc.target/arm/vfp-ldmdbd.c: Likewise.
-+	* gcc.target/arm/vfp-ldmdbs.c: Likewise.
-+	* gcc.target/arm/vfp-ldmiad.c: Likewise.
-+	* gcc.target/arm/vfp-ldmias.c: Likewise.
-+	* gcc.target/arm/vfp-stmdbd.c: Likewise.
-+	* gcc.target/arm/vfp-stmdbs.c: Likewise.
-+	* gcc.target/arm/vfp-stmiad.c: Likewise.
-+	* gcc.target/arm/vfp-stmias.c: Likewise.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r214526.
-+	2014-08-26  Joseph Myers  <joseph@codesourcery.com>
-+
-+	PR target/60606
-+	PR target/61330
-+	* gcc.dg/torture/pr60606-1.c, gcc.target/arm/pr60606-2.c,
-+	gcc.target/arm/pr60606-3.c, gcc.target/arm/pr60606-4.c: New tests.
-+
-+2014-09-03  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213659.
-+	2014-08-06  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vdup_n_2.c: New test.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213701.
-+	2014-08-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.dg/pr61756.c: Remove arm-specific dg-options.
-+
-+2014-08-26  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213488, r213489.
-+	2014-08-01  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/legitimize_stack_var_before_reload_1.c: New
-+	testcase.
-+
-+2014-08-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212927.
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.dg/ira-shrinkwrap-prep-1.c (target): Add arm_nothumb.
-+	* gcc.dg/ira-shrinkwrap-prep-2.c (target): Likewise.
-+	* gcc.dg/pr10474.c (target): Likewise.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213555.
-+	2014-08-04  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR target/61713
-+	* gcc.dg/pr61756.c: New test.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r213376.
-+	2014-07-31  Charles Baylis  <charles.baylis@linaro.org>
-+
-+	PR target/61948
-+	* gcc.target/arm/pr61948.c: New test case.
-+
-+2014-08-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212959, r212976, r212999, r213000.
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/test_frame_1.c: Match optimized instruction
-+	sequences.
-+	* gcc.target/aarch64/test_frame_2.c: Likewise.
-+	* gcc.target/aarch64/test_frame_4.c: Likewise.
-+	* gcc.target/aarch64/test_frame_6.c: Likewise.
-+	* gcc.target/aarch64/test_frame_7.c: Likewise.
-+	* gcc.target/aarch64/test_frame_8.c: Likewise.
-+	* gcc.target/aarch64/test_frame_10.c: Likewise.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/test_frame_1.c: Match optimized instruction
-+	sequences.
-+	* gcc.target/aarch64/test_frame_10.c: Likewise.
-+	* gcc.target/aarch64/test_frame_2.c: Likewise.
-+	* gcc.target/aarch64/test_frame_4.c: Likewise.
-+	* gcc.target/aarch64/test_frame_6.c: Likewise.
-+	* gcc.target/aarch64/test_frame_7.c: Likewise.
-+	* gcc.target/aarch64/test_frame_8.c: Likewise.
-+	* gcc.target/aarch64/test_fp_attribute_1.c: Likewise.
-+
-+	2014-07-24  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/test_frame_12.c: Match optimized instruction
-+	sequences.
-+
-+	2014-07-23  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/test_frame_common.h: New file.
-+	* gcc.target/aarch64/test_frame_1.c: Likewise.
-+	* gcc.target/aarch64/test_frame_2.c: Likewise.
-+	* gcc.target/aarch64/test_frame_3.c: Likewise.
-+	* gcc.target/aarch64/test_frame_4.c: Likewise.
-+	* gcc.target/aarch64/test_frame_5.c: Likewise.
-+	* gcc.target/aarch64/test_frame_6.c: Likewise.
-+	* gcc.target/aarch64/test_frame_7.c: Likewise.
-+	* gcc.target/aarch64/test_frame_8.c: Likewise.
-+	* gcc.target/aarch64/test_frame_9.c: Likewise.
-+	* gcc.target/aarch64/test_frame_10.c: Likewise.
-+	* gcc.target/aarch64/test_frame_11.c: Likewise.
-+	* gcc.target/aarch64/test_frame_12.c: Likewise.
-+	* gcc.target/aarch64/test_frame_13.c: Likewise.
-+	* gcc.target/aarch64/test_frame_14.c: Likewise.
-+	* gcc.target/aarch64/test_frame_15.c: Likewise.
-+
-+2014-08-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r212023, r212024.
-+	2014-06-26  Vidya Praveen  <vidyapraveen@arm.com>
-+
-+	* gcc.dg/inline-22.c: Add bind_pic_locally.
-+	* gcc.dg/inline_4.c: Ditto.
-+	* gcc.dg/fail_always_inline.c: Ditto.
-+	* g++.dg/ipa/devirt-25.C: Ditto.
-+
-+	2014-06-26  Vidya Praveen  <vidyapraveen@arm.com>
-+
-+	* lib/target-support.exp (bind_pic_locally): Save the flags to
-+	'flags_to_postpone' instead of appending to 'flags'.
-+	* lib/gcc.exp (gcc_target_compile): Append board_info's multilib_flags
-+	with flags_to_postpone and revert after target_compile.
-+	* lib/g++.exp (g++_target_compile): Ditto.
-+	* lib/gfortran.exp (gfortran_target_compile): Ditto.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211887.
-+	2014-06-23  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211441.
-+	2014-06-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/aarch64/acle/acle.exp: New.
-+	* gcc.target/aarch64/acle/crc32b.c: New test.
-+	* gcc.target/aarch64/acle/crc32cb.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32cd.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32ch.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32cw.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32d.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32h.c: Likewise.
-+	* gcc.target/aarch64/acle/crc32w.c: Likewise.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210153.
-+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/vrev16p8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16p8.x: New file.
-+	* gcc.target/aarch64/simd/vrev16qp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16qp8.x: New file.
-+	* gcc.target/aarch64/simd/vrev16qs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16qs8.x: New file.
-+	* gcc.target/aarch64/simd/vrev16qu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16qu8.x: New file.
-+	* gcc.target/aarch64/simd/vrev16s8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16s8.x: New file.
-+	* gcc.target/aarch64/simd/vrev16u8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev16u8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32p16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32p16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32p8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32p8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qp16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qp8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qs16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qs16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qs8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qu16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32qu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32qu8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32s16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32s16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32s8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32s8.x: New file.
-+	* gcc.target/aarch64/simd/vrev32u16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32u16.x: New file.
-+	* gcc.target/aarch64/simd/vrev32u8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev32u8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64f32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64f32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64p16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64p16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64p8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64p8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qf32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qp16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qp8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qs16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qs16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qs32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qs32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qs8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qu16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qu32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64qu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64qu8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64s16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64s16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64s32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64s32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64s8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64s8.x: New file.
-+	* gcc.target/aarch64/simd/vrev64u16_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64u16.x: New file.
-+	* gcc.target/aarch64/simd/vrev64u32_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64u32.x: New file.
-+	* gcc.target/aarch64/simd/vrev64u8_1.c: New file.
-+	* gcc.target/aarch64/simd/vrev64u8.x: New file.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210148, r210151, r210422.
-+	2014-05-14  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/arm/simd/vtrnqf32_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqp16_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqp8_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqs16_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqs32_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqs8_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqu16_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqu32_1.c: New file.
-+	* gcc.target/arm/simd/vtrnqu8_1.c: New file.
-+	* gcc.target/arm/simd/vtrnf32_1.c: New file.
-+	* gcc.target/arm/simd/vtrnp16_1.c: New file.
-+	* gcc.target/arm/simd/vtrnp8_1.c: New file.
-+	* gcc.target/arm/simd/vtrns16_1.c: New file.
-+	* gcc.target/arm/simd/vtrns32_1.c: New file.
-+	* gcc.target/arm/simd/vtrns8_1.c: New file.
-+	* gcc.target/arm/simd/vtrnu16_1.c: New file.
-+	* gcc.target/arm/simd/vtrnu32_1.c: New file.
-+	* gcc.target/arm/simd/vtrnu8_1.c: New file.
-+
-+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12].
-+	* gcc.target/aarch64/vtrnu32.c: Likewise.
-+	* gcc.target/aarch64/vtrnf32.c: Likewise.
-+
-+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/vtrnf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnf32.x: New file.
-+	* gcc.target/aarch64/simd/vtrnp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnp16.x: New file.
-+	* gcc.target/aarch64/simd/vtrnp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnp8.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqf32.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqp16.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqp8.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqs16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqs16.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqs32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqs32.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqs8.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqu16.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqu32.x: New file.
-+	* gcc.target/aarch64/simd/vtrnqu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnqu8.x: New file.
-+	* gcc.target/aarch64/simd/vtrns16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrns16.x: New file.
-+	* gcc.target/aarch64/simd/vtrns32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrns32.x: New file.
-+	* gcc.target/aarch64/simd/vtrns8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrns8.x: New file.
-+	* gcc.target/aarch64/simd/vtrnu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnu16.x: New file.
-+	* gcc.target/aarch64/simd/vtrnu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnu32.x: New file.
-+	* gcc.target/aarch64/simd/vtrnu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vtrnu8.x: New file.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209794, 209858.
-+	2014-04-25  Marek Polacek  <polacek@redhat.com>
-+
-+	PR c/60114
-+	* gcc.dg/pr60114.c: New test.
-+
-+	2014-04-28  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	PR c/60983
-+	* gcc.dg/pr60114.c: Use signed chars.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210861.
-+	2014-05-23  Jiong Wang   <jiong.wang@arm.com>
-+
-+	* gcc.target/aarch64/tail_indirect_call_1.c: New.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211314.
-+	2014-06-06  James Greenhalgh  <james.greenhalgh@arm.com>
-+
-+	* gcc.dg/tree-ssa/pr42585.c: Skip for AArch64.
-+	* gcc.dg/tree-ssa/sra-12.c: Likewise.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210967.
-+	2014-05-27  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* lib/target-supports.exp (check_effective_target_vect_bswap):
-+	Specify arm*-*-* support.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r210152, 211059.
-+	2014-05-29  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/arm/simd/vextQf32_1.c: New file.
-+	* gcc.target/arm/simd/vextQp16_1.c: New file.
-+	* gcc.target/arm/simd/vextQp8_1.c: New file.
-+	* gcc.target/arm/simd/vextQs16_1.c: New file.
-+	* gcc.target/arm/simd/vextQs32_1.c: New file.
-+	* gcc.target/arm/simd/vextQs64_1.c: New file.
-+	* gcc.target/arm/simd/vextQs8_1.c: New file.
-+	* gcc.target/arm/simd/vextQu16_1.c: New file.
-+	* gcc.target/arm/simd/vextQu32_1.c: New file.
-+	* gcc.target/arm/simd/vextQu64_1.c: New file.
-+	* gcc.target/arm/simd/vextQu8_1.c: New file.
-+	* gcc.target/arm/simd/vextQp64_1.c: New file.
-+	* gcc.target/arm/simd/vextf32_1.c: New file.
-+	* gcc.target/arm/simd/vextp16_1.c: New file.
-+	* gcc.target/arm/simd/vextp8_1.c: New file.
-+	* gcc.target/arm/simd/vexts16_1.c: New file.
-+	* gcc.target/arm/simd/vexts32_1.c: New file.
-+	* gcc.target/arm/simd/vexts64_1.c: New file.
-+	* gcc.target/arm/simd/vexts8_1.c: New file.
-+	* gcc.target/arm/simd/vextu16_1.c: New file.
-+	* gcc.target/arm/simd/vextu32_1.c: New file.
-+	* gcc.target/arm/simd/vextu64_1.c: New file.
-+	* gcc.target/arm/simd/vextu8_1.c: New file.
-+	* gcc.target/arm/simd/vextp64_1.c: New file.
-+
-+	2014-05-07  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/ext_f32.x: New file.
-+	* gcc.target/aarch64/simd/ext_f32_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_p16.x: New file.
-+	* gcc.target/aarch64/simd/ext_p16_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_p8.x: New file.
-+	* gcc.target/aarch64/simd/ext_p8_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_s16.x: New file.
-+	* gcc.target/aarch64/simd/ext_s16_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_s32.x: New file.
-+	* gcc.target/aarch64/simd/ext_s32_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_s64.x: New file.
-+	* gcc.target/aarch64/simd/ext_s64_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_s8.x: New file.
-+	* gcc.target/aarch64/simd/ext_s8_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_u16.x: New file.
-+	* gcc.target/aarch64/simd/ext_u16_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_u32.x: New file.
-+	* gcc.target/aarch64/simd/ext_u32_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_u64.x: New file.
-+	* gcc.target/aarch64/simd/ext_u64_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_u8.x: New file.
-+	* gcc.target/aarch64/simd/ext_u8_1.c: New file.
-+	* gcc.target/aarch64/simd/ext_f64.c: New file.
-+	* gcc.target/aarch64/simd/extq_f32.x: New file.
-+	* gcc.target/aarch64/simd/extq_f32_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_p16.x: New file.
-+	* gcc.target/aarch64/simd/extq_p16_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_p8.x: New file.
-+	* gcc.target/aarch64/simd/extq_p8_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_s16.x: New file.
-+	* gcc.target/aarch64/simd/extq_s16_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_s32.x: New file.
-+	* gcc.target/aarch64/simd/extq_s32_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_s64.x: New file.
-+	* gcc.target/aarch64/simd/extq_s64_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_s8.x: New file.
-+	* gcc.target/aarch64/simd/extq_s8_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_u16.x: New file.
-+	* gcc.target/aarch64/simd/extq_u16_1.c: New file.
-+	* gcc.target/aarch64/simd/extq_u32.x: New file.
-+
-+2014-07-16  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209940, r209943, r209947.
-+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/arm/simd/vuzpqf32_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqp16_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqp8_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqs16_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqs32_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqs8_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqu16_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqu32_1.c: New file.
-+	* gcc.target/arm/simd/vuzpqu8_1.c: New file.
-+	* gcc.target/arm/simd/vuzpf32_1.c: New file.
-+	* gcc.target/arm/simd/vuzpp16_1.c: New file.
-+	* gcc.target/arm/simd/vuzpp8_1.c: New file.
-+	* gcc.target/arm/simd/vuzps16_1.c: New file.
-+	* gcc.target/arm/simd/vuzps32_1.c: New file.
-+	* gcc.target/arm/simd/vuzps8_1.c: New file.
-+	* gcc.target/arm/simd/vuzpu16_1.c: New file.
-+	* gcc.target/arm/simd/vuzpu32_1.c: New file.
-+	* gcc.target/arm/simd/vuzpu8_1.c: New file.
-+
-+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2.
-+	* gcc.target/aarch64/vuzpu32_1.c: Likewise.
-+	* gcc.target/aarch64/vuzpf32_1.c: Likewise.
-+
-+	2014-04-30  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/vuzpf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpf32.x: New file.
-+	* gcc.target/aarch64/simd/vuzpp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpp16.x: New file.
-+	* gcc.target/aarch64/simd/vuzpp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpp8.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqf32.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqp16.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqp8.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqs16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqs16.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqs32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqs32.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqs8.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqu16.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqu32.x: New file.
-+	* gcc.target/aarch64/simd/vuzpqu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpqu8.x: New file.
-+	* gcc.target/aarch64/simd/vuzps16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzps16.x: New file.
-+	* gcc.target/aarch64/simd/vuzps32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzps32.x: New file.
-+	* gcc.target/aarch64/simd/vuzps8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzps8.x: New file.
-+	* gcc.target/aarch64/simd/vuzpu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpu16.x: New file.
-+	* gcc.target/aarch64/simd/vuzpu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpu32.x: New file.
-+	* gcc.target/aarch64/simd/vuzpu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vuzpu8.x: New file.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r211206.
-+	2014-06-03  Andrew Pinski  <apinski@cavium.com>
-+
-+	* gcc.c-torture/compile/20140528-1.c: New testcase.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209908.
-+	2013-04-29  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/arm/simd/simd.exp: New file.
-+	* gcc.target/arm/simd/vzipqf32_1.c: New file.
-+	* gcc.target/arm/simd/vzipqp16_1.c: New file.
-+	* gcc.target/arm/simd/vzipqp8_1.c: New file.
-+	* gcc.target/arm/simd/vzipqs16_1.c: New file.
-+	* gcc.target/arm/simd/vzipqs32_1.c: New file.
-+	* gcc.target/arm/simd/vzipqs8_1.c: New file.
-+	* gcc.target/arm/simd/vzipqu16_1.c: New file.
-+	* gcc.target/arm/simd/vzipqu32_1.c: New file.
-+	* gcc.target/arm/simd/vzipqu8_1.c: New file.
-+	* gcc.target/arm/simd/vzipf32_1.c: New file.
-+	* gcc.target/arm/simd/vzipp16_1.c: New file.
-+	* gcc.target/arm/simd/vzipp8_1.c: New file.
-+	* gcc.target/arm/simd/vzips16_1.c: New file.
-+	* gcc.target/arm/simd/vzips32_1.c: New file.
-+	* gcc.target/arm/simd/vzips8_1.c: New file.
-+	* gcc.target/arm/simd/vzipu16_1.c: New file.
-+	* gcc.target/arm/simd/vzipu32_1.c: New file.
-+	* gcc.target/arm/simd/vzipu8_1.c: New file.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209893.
-+	2014-04-29  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* gcc.target/aarch64/simd/simd.exp: New file.
-+	* gcc.target/aarch64/simd/vzipf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipf32.x: New file.
-+	* gcc.target/aarch64/simd/vzipp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipp16.x: New file.
-+	* gcc.target/aarch64/simd/vzipp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipp8.x: New file.
-+	* gcc.target/aarch64/simd/vzipqf32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqf32.x: New file.
-+	* gcc.target/aarch64/simd/vzipqp16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqp16.x: New file.
-+	* gcc.target/aarch64/simd/vzipqp8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqp8.x: New file.
-+	* gcc.target/aarch64/simd/vzipqs16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqs16.x: New file.
-+	* gcc.target/aarch64/simd/vzipqs32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqs32.x: New file.
-+	* gcc.target/aarch64/simd/vzipqs8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqs8.x: New file.
-+	* gcc.target/aarch64/simd/vzipqu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqu16.x: New file.
-+	* gcc.target/aarch64/simd/vzipqu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqu32.x: New file.
-+	* gcc.target/aarch64/simd/vzipqu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipqu8.x: New file.
-+	* gcc.target/aarch64/simd/vzips16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzips16.x: New file.
-+	* gcc.target/aarch64/simd/vzips32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzips32.x: New file.
-+	* gcc.target/aarch64/simd/vzips8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzips8.x: New file.
-+	* gcc.target/aarch64/simd/vzipu16_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipu16.x: New file.
-+	* gcc.target/aarch64/simd/vzipu32_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipu32.x: New file.
-+	* gcc.target/aarch64/simd/vzipu8_1.c: New file.
-+	* gcc.target/aarch64/simd/vzipu8.x: New file.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209808.
-+	2014-04-25  Jiong Wang  <jiong.wang@arm.com>
-+
-+	* gcc.target/arm/tail-long-call.c: New test.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209749.
-+	2014-04-24  Alan Lawrence  <alan.lawrence@arm.com>
-+
-+	* lib/target-supports.exp (check_effective_target_vect_perm): Return
-+	true for aarch64_be.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209736.
-+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* lib/target-supports.exp (check_effective_target_vect_bswap): New.
-+	* gcc.dg/vect/vect-bswap16: New test.
-+	* gcc.dg/vect/vect-bswap32: Likewise.
-+	* gcc.dg/vect/vect-bswap64: Likewise.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209713.
-+	2014-04-23  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* gcc.target/aarch64/vdup_lane_1.c: New testcase.
-+	* gcc.target/aarch64/vdup_lane_2.c: New testcase.
-+	* gcc.target/aarch64/vdup_n_1.c: New testcase.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209704, 209705.
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/arm/rev16.c: New test.
-+
-+	2014-04-23  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* gcc.target/aarch64/rev16_1.c: New test.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209642.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* gcc.target/aarch64/vreinterpret_f64_1.c: New.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209640.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* gcc.target/aarch64/vqneg_s64_1.c: New testcase.
-+	* gcc.target/aarch64/vqabs_s64_1.c: New testcase.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209613, 209614.
-+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* gcc.target/arm/anddi_notdi-1.c: New test.
-+	* gcc.target/arm/iordi_notdi-1.c: New test case.
-+
-+	2014-04-22  Ian Bolton  <ian.bolton@arm.com>
-+
-+	* gcc.target/arm/iordi_notdi-1.c: New test.
-+
-+2014-05-23  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209559.
-+	2014-04-22  Alex Velenko  <Alex.Velenko@arm.com>
-+
-+	* gcc.target/aarch64/vrnd_f64_1.c : New file.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209889.
-+	2014-04-29  Zhenqiang Chen  <zhenqiang.chen@linaro.org>
-+
-+	* gcc.target/aarch64/fcsel_1.c: New test case.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
-+++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c
-@@ -0,0 +1,9 @@
-+unsigned f(unsigned flags, unsigned capabilities)
-+{
-+  unsigned gfp_mask;
-+  unsigned gfp_notmask = 0;
-+  gfp_mask = flags & ((1 << 25) - 1);
-+  if (!(capabilities & 0x00000001))
-+    gfp_mask |= 0x1000000u;
-+  return (gfp_mask & ~gfp_notmask);
-+}
---- a/src/gcc/testsuite/gcc.dg/fail_always_inline.c
-+++ b/src/gcc/testsuite/gcc.dg/fail_always_inline.c
-@@ -1,4 +1,5 @@
- /* { dg-do compile } */
-+/* { dg-add-options bind_pic_locally } */
- 
- extern __attribute__ ((always_inline)) void
-  bar() { } /* { dg-warning "function might not be inlinable" } */
---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
-+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-1.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue"  } */
- 
- long __attribute__((noinline, noclone))
---- a/src/gcc/testsuite/gcc.dg/pr10474.c
-+++ b/src/gcc/testsuite/gcc.dg/pr10474.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-pro_and_epilogue"  } */
- 
- void f(int *i)
---- a/src/gcc/testsuite/gcc.dg/ssp-4.c
-+++ b/src/gcc/testsuite/gcc.dg/ssp-4.c
-@@ -0,0 +1,18 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
-+/* { dg-require-effective-target fstack_protector } */
-+
-+typedef unsigned int uint32_t;
-+struct ctx
-+{
-+  uint32_t A;
-+};
-+
-+void *
-+buffer_copy (const struct ctx *ctx, void *resbuf)
-+{
-+  uint32_t buffer[4];
-+  buffer[0] = (ctx->A);
-+  __builtin_memcpy (resbuf, buffer, sizeof (buffer));
-+  return resbuf;
-+}
---- a/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
-+++ b/src/gcc/testsuite/gcc.dg/ira-shrinkwrap-prep-2.c
-@@ -1,4 +1,4 @@
--/* { dg-do compile { target { { x86_64-*-* && lp64 } || { powerpc*-*-* && lp64 } } } } */
-+/* { dg-do compile { target { { x86_64-*-* && lp64 } || { { powerpc*-*-* && lp64 } || arm_nothumb } } } } */
- /* { dg-options "-O3 -fdump-rtl-ira -fdump-rtl-pro_and_epilogue"  } */
- 
- long __attribute__((noinline, noclone))
---- a/src/gcc/testsuite/gcc.dg/inline-22.c
-+++ b/src/gcc/testsuite/gcc.dg/inline-22.c
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-funit-at-a-time -Wno-attributes" } */
-+/* { dg-add-options bind_pic_locally } */
- /* Verify we can inline without a complete prototype and with promoted
-    arguments.  See also PR32492.  */
- __attribute__((always_inline)) void f1() {}
---- a/src/gcc/testsuite/gcc.dg/memset-2.c
-+++ b/src/gcc/testsuite/gcc.dg/memset-2.c
-@@ -0,0 +1,11 @@
-+/* PR target/63937 */
-+/* { dg-do compile { target lp64 } } */
-+/* { dg-options "-O2" } */
-+
-+void
-+foo (char *p)
-+{
-+  p = __builtin_assume_aligned (p, 64);
-+  __builtin_memset (p, 0, 0x100000001ULL);
-+}
-+
---- a/src/gcc/testsuite/gcc.dg/inline_4.c
-+++ b/src/gcc/testsuite/gcc.dg/inline_4.c
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-einline=foo2 -fdisable-ipa-inline -Wno-attributes" } */
-+/* { dg-add-options bind_pic_locally } */
- int g;
- __attribute__((always_inline)) void bar (void)
- {
---- a/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
-+++ b/src/gcc/testsuite/gcc.dg/torture/pr60606-1.c
-@@ -0,0 +1,9 @@
-+/* { dg-do compile } */
-+/* { dg-options "-ffat-lto-objects" } */
-+
-+int
-+f (void)
-+{
-+  register unsigned int r asm ("no-such-register"); /* { dg-error "invalid register name" } */
-+  return r;
-+}
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
-@@ -0,0 +1,43 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
-+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */
-+/* { dg-final { cleanup-tree-dump "dom1" } } */
-+
-+int sum0, sum1, sum2, sum3;
-+int foo (char *s, char **ret)
-+{
-+  int state=0;
-+  char c;
-+
-+  for (; *s && state != 4; s++)
-+    {
-+      c = *s;
-+      if (c == '*')
-+	{
-+	  s++;
-+	  break;
-+	}
-+      switch (state)
-+	{
-+	case 0:
-+	  if (c == '+')
-+	    state = 1;
-+	  else if (c != '-')
-+	    sum0+=c;
-+	  break;
-+	case 1:
-+	  if (c == '+')
-+	    state = 2;
-+	  else if (c == '-')
-+	    state = 0;
-+	  else
-+	    sum1+=c;
-+	  break;
-+	default:
-+	  break;
-+	}
-+
-+    }
-+  *ret = s;
-+  return state;
-+}
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c
-@@ -35,6 +35,6 @@
- /* Whether the structs are totally scalarized or not depends on the
-    MOVE_RATIO macro definition in the back end.  The scalarization will
-    not take place when using small values for MOVE_RATIO.  */
--/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
--/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */
- /* { dg-final { cleanup-tree-dump "optimized" } } */
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
-@@ -21,5 +21,5 @@
-   *p = l;
- }
- 
--/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */
-+/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */
- /* { dg-final { cleanup-tree-dump "release_ssa" } } */
---- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
-+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
-@@ -0,0 +1,127 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
-+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */
-+/* { dg-final { cleanup-tree-dump "dom1" } } */
-+
-+enum STATE {
-+  S0=0,
-+  SI,
-+  S1,
-+  S2,
-+  S3,
-+  S4,
-+  S5,
-+  S6
-+};
-+
-+int bar (enum STATE s);
-+
-+enum STATE foo (unsigned char **y, unsigned *c)
-+{
-+  unsigned char *x = *y;
-+  unsigned char n;
-+  enum STATE s = S0;
-+
-+  for( ; *x && s != SI; x++ )
-+    {
-+      n = *x;
-+      if (n == 'x')
-+	{
-+	  x++;
-+	  break;
-+	}
-+      switch(s)
-+	{
-+	case S0:
-+	  if(bar(n))
-+	    s = S3;
-+	  else if( n == 'a' || n == 'b' )
-+	    s = S1;
-+	  else if( n == 'c' )
-+	    s = S4;
-+	  else
-+	    {
-+	      s = SI;
-+	      c[SI]++;
-+	    }
-+	  c[S0]++;
-+	  break;
-+	case S1:
-+	  if(bar(n))
-+	    {
-+	      s = S3;
-+	      c[S1]++;
-+	    }
-+	  else if( n == 'c' )
-+	    {
-+	      s = S4;
-+	      c[S1]++;
-+	    }
-+	  else
-+	    {
-+	      s = SI;
-+	      c[S1]++;
-+	    }
-+	  break;
-+	case S3:
-+	  if( n == 'c' )
-+	    {
-+	      s = S4;
-+	      c[S3]++;
-+	    }
-+	  else if(!bar(n))
-+	    {
-+	      s = SI;
-+	      c[S3]++;
-+	    }
-+	  break;
-+	case S4:
-+	  if( n == 'E' || n == 'e' )
-+	    {
-+	      s = S2;
-+	      c[S4]++;
-+	    }
-+	  else if(!bar(n))
-+	    {
-+	      s = SI;
-+	      c[S4]++;
-+	    }
-+	  break;
-+	case S2:
-+	  if( n == 'a' || n == 'b' )
-+	    {
-+	      s = S5;
-+	      c[S2]++;
-+	    }
-+	  else
-+	    {
-+	      s = SI;
-+	      c[S2]++;
-+	    }
-+	  break;
-+	case S5:
-+	  if(bar(n))
-+	    {
-+	      s = S6;
-+	      c[S5]++;
-+	    }
-+	  else
-+	    {
-+	      s = SI;
-+	      c[S5]++;
-+	    }
-+	  break;
-+	case S6:
-+	  if(!bar(n))
-+	    {
-+	      s = SI;
-+	      c[SI]++;
-+	    }
-+	  break;
-+	default:
-+	  break;
-+	}
-+    }
-+  *y=x;
-+  return s;
-+}
---- a/src/gcc/testsuite/gcc.dg/pr60114.c
-+++ b/src/gcc/testsuite/gcc.dg/pr60114.c
-@@ -0,0 +1,31 @@
-+/* PR c/60114 */
-+/* { dg-do compile } */
-+/* { dg-options "-Wconversion" } */
-+
-+struct S { int n, u[2]; };
-+const signed char z[] = {
-+  [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */
-+  [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */
-+};
-+int A[] = {
-+            0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */
-+            0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */
-+            0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */
-+          };
-+int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */
-+union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */
-+typedef int H[];
-+void
-+foo (void)
-+{
-+  signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */
-+                    1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */
-+                  { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */
-+                };
-+  (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */
-+  (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */
-+  struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */
-+  struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */
-+  struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */
-+  H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */
-+}
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_1.c
-@@ -0,0 +1,36 @@
-+/* { dg-require-effective-target vect_int_mult } */
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts.  */
-+
-+extern void abort(void);
-+
-+unsigned char in[16];
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+  unsigned char i = 0;
-+  unsigned char sum = 1;
-+
-+  for (i = 0; i < 16; i++)
-+    in[i] = i + i + 1;
-+
-+  /* Prevent constant propagation of the entire loop below.  */
-+  asm volatile ("" : : : "memory");
-+
-+  for (i = 0; i < 16; i++)
-+    sum *= in[i];
-+
-+  if (sum != 33)
-+    {
-+      __builtin_printf("Failed %d\n", sum);
-+      abort();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-mul_2.c
-@@ -0,0 +1,32 @@
-+/* { dg-require-effective-target vect_int_mult } */
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts and folded.  */
-+
-+extern void abort(void);
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+  unsigned char in[16];
-+  unsigned char i = 0;
-+  unsigned char sum = 1;
-+
-+  for (i = 0; i < 16; i++)
-+    in[i] = i + i + 1;
-+
-+  for (i = 0; i < 16; i++)
-+    sum *= in[i];
-+
-+  if (sum != 33)
-+    {
-+      __builtin_printf("Failed %d\n", sum);
-+      abort();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_1.c
-@@ -0,0 +1,35 @@
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts.  */
-+
-+extern void abort(void);
-+
-+unsigned char in[16] __attribute__((__aligned__(16)));
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+  unsigned char i = 0;
-+  unsigned char sum = 1;
-+
-+  for (i = 0; i < 16; i++)
-+    in[i] = (i + i + 1) & 0xfd;
-+
-+  /* Prevent constant propagation of the entire loop below.  */
-+  asm volatile ("" : : : "memory");
-+
-+  for (i = 0; i < 16; i++)
-+    sum |= in[i];
-+
-+  if (sum != 29)
-+    {
-+      __builtin_printf("Failed %d\n", sum);
-+      abort();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo32 (unsigned int* a)
-+{
-+  int i = 0;
-+  for (i = 0; i < N; ++i)
-+    a[i] = __builtin_bswap32 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+  unsigned int arr[N];
-+  unsigned int expect[N];
-+  int i;
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      arr[i] = i;
-+      expect[i] = __builtin_bswap32 (i);
-+      if (y) /* Avoid vectorisation.  */
-+        abort ();
-+    }
-+
-+  vfoo32 (arr);
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      if (arr[i] != expect[i])
-+        abort ();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-reduc-or_2.c
-@@ -0,0 +1,31 @@
-+/* { dg-require-effective-target whole_vector_shift } */
-+
-+/* Write a reduction loop to be reduced using vector shifts and folded.  */
-+
-+extern void abort(void);
-+
-+int
-+main (unsigned char argc, char **argv)
-+{
-+  unsigned char in[16] __attribute__((aligned(16)));
-+  unsigned char i = 0;
-+  unsigned char sum = 1;
-+
-+  for (i = 0; i < 16; i++)
-+    in[i] = (i + i + 1) & 0xfd;
-+
-+  for (i = 0; i < 16; i++)
-+    sum |= in[i];
-+
-+  if (sum != 29)
-+    {
-+      __builtin_printf("Failed %d\n", sum);
-+      abort();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Reduce using vector shifts" "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
-+
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo16 (unsigned short int* a)
-+{
-+  int i = 0;
-+  for (i = 0; i < N; ++i)
-+    a[i] = __builtin_bswap16 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+  unsigned short arr[N];
-+  unsigned short expect[N];
-+  int i;
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      arr[i] = i;
-+      expect[i] = __builtin_bswap16 (i);
-+      if (y) /* Avoid vectorisation.  */
-+        abort ();
-+    }
-+
-+  vfoo16 (arr);
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      if (arr[i] != expect[i])
-+        abort ();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
-+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c
-@@ -0,0 +1,44 @@
-+/* { dg-require-effective-target vect_bswap } */
-+
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+volatile int y = 0;
-+
-+static inline void
-+vfoo64 (unsigned long long* a)
-+{
-+  int i = 0;
-+  for (i = 0; i < N; ++i)
-+    a[i] = __builtin_bswap64 (a[i]);
-+}
-+
-+int
-+main (void)
-+{
-+  unsigned long long arr[N];
-+  unsigned long long expect[N];
-+  int i;
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      arr[i] = i;
-+      expect[i] = __builtin_bswap64 (i);
-+      if (y) /* Avoid vectorisation.  */
-+        abort ();
-+    }
-+
-+  vfoo64 (arr);
-+
-+  for (i = 0; i < N; ++i)
-+    {
-+      if (arr[i] != expect[i])
-+        abort ();
-+    }
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { cleanup-tree-dump "vect" } } */
---- a/src/gcc/testsuite/gcc.dg/ssp-3.c
-+++ b/src/gcc/testsuite/gcc.dg/ssp-3.c
-@@ -0,0 +1,16 @@
-+/* { dg-do assemble } */
-+/* { dg-options "-fstack-protector-strong -O1 -frename-registers" } */
-+/* { dg-require-effective-target fstack_protector } */
-+
-+extern int bar (const char *s, int *argc);
-+extern int baz (const char *s);
-+
-+char
-+foo (const char *s)
-+{
-+  int argc;
-+  int ret;
-+  if ( !bar (s, &argc))
-+    ret = baz (s);
-+  return *s;
-+}
---- a/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
-+++ b/src/gcc/testsuite/g++.dg/ipa/devirt-25.C
-@@ -1,5 +1,6 @@
- /* { dg-do compile } */
- /* { dg-options "-O3 -fdump-ipa-cp"  } */
-+/* { dg-add-options bind_pic_locally } */
- 
- class ert_RefCounter {
-  protected:
---- a/src/gcc/objcp/ChangeLog.linaro
-+++ b/src/gcc/objcp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/cp/ChangeLog.linaro
-+++ b/src/gcc/cp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/compare-elim.c
-+++ b/src/gcc/compare-elim.c
-@@ -100,6 +100,9 @@
-      constants.  */
-   rtx in_a, in_b;
- 
-+  /* The REG_EH_REGION of the comparison.  */
-+  rtx eh_note;
-+
-   /* Information about how this comparison is used.  */
-   struct comparison_use uses[MAX_CMP_USE];
- 
-@@ -262,6 +265,7 @@
-   struct comparison *last_cmp;
-   rtx insn, next, last_clobber;
-   bool last_cmp_valid;
-+  bool need_purge = false;
-   bitmap killed;
- 
-   killed = BITMAP_ALLOC (NULL);
-@@ -303,44 +307,60 @@
-       if (src)
- 	{
- 	  enum machine_mode src_mode = GET_MODE (src);
-+	  rtx eh_note = NULL;
- 
--	  /* Eliminate a compare that's redundant with the previous.  */
--	  if (last_cmp_valid
--	      && rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
--	      && rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
--	    {
--	      rtx flags, x;
--	      enum machine_mode new_mode
--		= targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
-+	  if (flag_non_call_exceptions)
-+	    eh_note = find_reg_note (insn, REG_EH_REGION, NULL);
- 
--	      /* New mode is incompatible with the previous compare mode.  */
--	      if (new_mode == VOIDmode)
--		continue;
-+	  if (!last_cmp_valid)
-+	    goto dont_delete;
- 
--	      if (new_mode != last_cmp->orig_mode)
--		{
--		  flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
-+	  /* Take care that it's in the same EH region.  */
-+	  if (flag_non_call_exceptions
-+	      && !rtx_equal_p (eh_note, last_cmp->eh_note))
-+	    goto dont_delete;
- 
--		  /* Generate new comparison for substitution.  */
--		  x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
--		  x = gen_rtx_SET (VOIDmode, flags, x);
-+	  /* Make sure the compare is redundant with the previous.  */
-+	  if (!rtx_equal_p (last_cmp->in_a, XEXP (src, 0))
-+	      || !rtx_equal_p (last_cmp->in_b, XEXP (src, 1)))
-+	    goto dont_delete;
- 
--		  if (!validate_change (last_cmp->insn,
--					&PATTERN (last_cmp->insn), x, false))
--		    continue;
-+	  /* New mode must be compatible with the previous compare mode.  */
-+	  {
-+	    enum machine_mode new_mode
-+	      = targetm.cc_modes_compatible (last_cmp->orig_mode, src_mode);
-+	    if (new_mode == VOIDmode)
-+	      goto dont_delete;
- 
--		  last_cmp->orig_mode = new_mode;
--		}
-+	    if (new_mode != last_cmp->orig_mode)
-+	      {
-+		rtx x, flags = gen_rtx_REG (src_mode, targetm.flags_regnum);
- 
--	      delete_insn (insn);
--	      continue;
--	    }
-+		/* Generate new comparison for substitution.  */
-+		x = gen_rtx_COMPARE (new_mode, XEXP (src, 0), XEXP (src, 1));
-+		x = gen_rtx_SET (VOIDmode, flags, x);
- 
-+		if (!validate_change (last_cmp->insn,
-+				      &PATTERN (last_cmp->insn), x, false))
-+		  goto dont_delete;
-+
-+		last_cmp->orig_mode = new_mode;
-+	      }
-+	  }
-+
-+	  /* All tests and substitutions succeeded!  */
-+	  if (eh_note)
-+	    need_purge = true;
-+	  delete_insn (insn);
-+	  continue;
-+
-+	dont_delete:
- 	  last_cmp = XCNEW (struct comparison);
- 	  last_cmp->insn = insn;
- 	  last_cmp->prev_clobber = last_clobber;
- 	  last_cmp->in_a = XEXP (src, 0);
- 	  last_cmp->in_b = XEXP (src, 1);
-+	  last_cmp->eh_note = eh_note;
- 	  last_cmp->orig_mode = src_mode;
- 	  all_compares.safe_push (last_cmp);
- 
-@@ -404,6 +424,11 @@
- 	    }
- 	}
-     }
-+
-+  /* If we deleted a compare with a REG_EH_REGION note, we may need to
-+     remove EH edges.  */
-+  if (need_purge)
-+    purge_dead_edges (bb);
- }
- 
- /* Find all comparisons in the function.  */
---- a/src/gcc/ira-int.h
-+++ b/src/gcc/ira-int.h
-@@ -281,6 +281,9 @@
-   /* Mode of the allocno which is the mode of the corresponding
-      pseudo-register.  */
-   ENUM_BITFIELD (machine_mode) mode : 8;
-+  /* Widest mode of the allocno which in at least one case could be
-+     for paradoxical subregs where wmode > mode.  */
-+  ENUM_BITFIELD (machine_mode) wmode : 8;
-   /* Register class which should be used for allocation for given
-      allocno.  NO_REGS means that we should use memory.  */
-   ENUM_BITFIELD (reg_class) aclass : 16;
-@@ -313,7 +316,7 @@
-      number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
-      reload (at this point pseudo-register has only one allocno) which
-      did not get stack slot yet.  */
--  short int hard_regno;
-+  signed int hard_regno : 16;
-   /* Allocnos with the same regno are linked by the following member.
-      Allocnos corresponding to inner loops are first in the list (it
-      corresponds to depth-first traverse of the loops).  */
-@@ -430,6 +433,7 @@
- #define ALLOCNO_BAD_SPILL_P(A) ((A)->bad_spill_p)
- #define ALLOCNO_ASSIGNED_P(A) ((A)->assigned_p)
- #define ALLOCNO_MODE(A) ((A)->mode)
-+#define ALLOCNO_WMODE(A) ((A)->wmode)
- #define ALLOCNO_PREFS(A) ((A)->allocno_prefs)
- #define ALLOCNO_COPIES(A) ((A)->allocno_copies)
- #define ALLOCNO_HARD_REG_COSTS(A) ((A)->hard_reg_costs)
---- a/src/gcc/ira-color.c
-+++ b/src/gcc/ira-color.c
-@@ -1711,6 +1711,7 @@
-         {
- 	  ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
- 	  enum reg_class conflict_aclass;
-+	  allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a);
- 
- 	  /* Reload can give another class so we need to check all
- 	     allocnos.  */
-@@ -1782,7 +1783,12 @@
- 		    hard_regno = ira_class_hard_regs[aclass][j];
- 		    ira_assert (hard_regno >= 0);
- 		    k = ira_class_hard_reg_index[conflict_aclass][hard_regno];
--		    if (k < 0)
-+		    if (k < 0
-+			   /* If HARD_REGNO is not available for CONFLICT_A,
-+			      the conflict would be ignored, since HARD_REGNO
-+			      will never be assigned to CONFLICT_A.  */
-+			|| !TEST_HARD_REG_BIT (data->profitable_hard_regs,
-+					       hard_regno))
- 		      continue;
- 		    full_costs[j] -= conflict_costs[k];
- 		  }
---- a/src/gcc/ifcvt.c
-+++ b/src/gcc/ifcvt.c
-@@ -1432,10 +1432,17 @@
-       end_sequence ();
-     }
- 
--  /* Don't even try if the comparison operands are weird.  */
-+  /* Don't even try if the comparison operands are weird
-+     except that the target supports cbranchcc4.  */
-   if (! general_operand (cmp_a, GET_MODE (cmp_a))
-       || ! general_operand (cmp_b, GET_MODE (cmp_b)))
--    return NULL_RTX;
-+    {
-+#if HAVE_cbranchcc4
-+      if (GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
-+	  || cmp_b != const0_rtx)
-+#endif
-+	return NULL_RTX;
-+    }
- 
- #if HAVE_conditional_move
-   unsignedp = (code == LTU || code == GEU
-@@ -1753,7 +1760,12 @@
- {
-   rtx cond, set, insn;
-   int reverse;
-+  int allow_cc_mode = false;
-+#if HAVE_cbranchcc4
-+  allow_cc_mode = true;
-+#endif
- 
-+
-   /* If target is already mentioned in the known condition, return it.  */
-   if (reg_mentioned_p (target, if_info->cond))
-     {
-@@ -1874,7 +1886,7 @@
-     }
- 
-   cond = canonicalize_condition (if_info->jump, cond, reverse,
--				 earliest, target, false, true);
-+				 earliest, target, allow_cc_mode, true);
-   if (! cond || ! reg_mentioned_p (target, cond))
-     return NULL;
- 
-@@ -2325,6 +2337,10 @@
- {
-   rtx cond, set, tmp;
-   bool reverse;
-+  int allow_cc_mode = false;
-+#if HAVE_cbranchcc4
-+  allow_cc_mode = true;
-+#endif
- 
-   if (! any_condjump_p (jump))
-     return NULL_RTX;
-@@ -2361,7 +2377,7 @@
-   /* Otherwise, fall back on canonicalize_condition to do the dirty
-      work of manipulating MODE_CC values and COMPARE rtx codes.  */
-   tmp = canonicalize_condition (jump, cond, reverse, earliest,
--				NULL_RTX, false, true);
-+				NULL_RTX, allow_cc_mode, true);
- 
-   /* We don't handle side-effects in the condition, like handling
-      REG_INC notes and making sure no duplicate conditions are emitted.  */
---- a/src/gcc/expr.c
-+++ b/src/gcc/expr.c
-@@ -68,22 +68,6 @@
- #include "tree-ssa-address.h"
- #include "cfgexpand.h"
- 
--/* Decide whether a function's arguments should be processed
--   from first to last or from last to first.
--
--   They should if the stack and args grow in opposite directions, but
--   only if we have push insns.  */
--
--#ifdef PUSH_ROUNDING
--
--#ifndef PUSH_ARGS_REVERSED
--#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD)
--#define PUSH_ARGS_REVERSED	/* If it's last to first.  */
--#endif
--#endif
--
--#endif
--
- #ifndef STACK_PUSH_CODE
- #ifdef STACK_GROWS_DOWNWARD
- #define STACK_PUSH_CODE PRE_DEC
-@@ -172,37 +156,6 @@
- static rtx const_vector_from_tree (tree);
- static void write_complex_part (rtx, rtx, bool);
- 
--/* This macro is used to determine whether move_by_pieces should be called
--   to perform a structure copy.  */
--#ifndef MOVE_BY_PIECES_P
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
--   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether clear_by_pieces should be
--   called to clear storage.  */
--#ifndef CLEAR_BY_PIECES_P
--#define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
--   < (unsigned int) CLEAR_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether store_by_pieces should be
--   called to "memset" storage with byte values other than zero.  */
--#ifndef SET_BY_PIECES_P
--#define SET_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
--   < (unsigned int) SET_RATIO (optimize_insn_for_speed_p ()))
--#endif
--
--/* This macro is used to determine whether store_by_pieces should be
--   called to "memcpy" storage when the source is a constant string.  */
--#ifndef STORE_BY_PIECES_P
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
--   < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()))
--#endif
- 
- /* This is run to set up which modes can be used
-    directly in memory and to initialize the block move optab.  It is run
-@@ -843,22 +796,16 @@
-   return mode;
- }
- 
--/* STORE_MAX_PIECES is the number of bytes at a time that we can
--   store efficiently.  Due to internal GCC limitations, this is
--   MOVE_MAX_PIECES limited by the number of bytes GCC can represent
--   for an immediate constant.  */
--
--#define STORE_MAX_PIECES  MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
--
- /* Determine whether the LEN bytes can be moved by using several move
-    instructions.  Return nonzero if a call to move_by_pieces should
-    succeed.  */
- 
- int
--can_move_by_pieces (unsigned HOST_WIDE_INT len ATTRIBUTE_UNUSED,
--		    unsigned int align ATTRIBUTE_UNUSED)
-+can_move_by_pieces (unsigned HOST_WIDE_INT len,
-+		    unsigned int align)
- {
--  return MOVE_BY_PIECES_P (len, align);
-+  return targetm.use_by_pieces_infrastructure_p (len, align, MOVE_BY_PIECES,
-+						 optimize_insn_for_speed_p ());
- }
- 
- /* Generate several move instructions to copy LEN bytes from block FROM to
-@@ -1195,7 +1142,7 @@
-       set_mem_size (y, INTVAL (size));
-     }
- 
--  if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align))
-+  if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
-     move_by_pieces (x, y, INTVAL (size), align, 0);
-   else if (emit_block_move_via_movmem (x, y, size, align,
- 				       expected_align, expected_size,
-@@ -2396,6 +2343,18 @@
-     = gen_rtx_EXPR_LIST (mode, gen_rtx_USE (VOIDmode, reg), *call_fusage);
- }
- 
-+/* Add a CLOBBER expression for REG to the (possibly empty) list pointed
-+   to by CALL_FUSAGE.  REG must denote a hard register.  */
-+
-+void
-+clobber_reg_mode (rtx *call_fusage, rtx reg, enum machine_mode mode)
-+{
-+  gcc_assert (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER);
-+
-+  *call_fusage
-+    = gen_rtx_EXPR_LIST (mode, gen_rtx_CLOBBER (VOIDmode, reg), *call_fusage);
-+}
-+
- /* Add USE expressions to *CALL_FUSAGE for each of NREGS consecutive regs,
-    starting at REGNO.  All of these registers must be hard registers.  */
- 
-@@ -2498,9 +2457,11 @@
-   if (len == 0)
-     return 1;
- 
--  if (! (memsetp
--	 ? SET_BY_PIECES_P (len, align)
--	 : STORE_BY_PIECES_P (len, align)))
-+  if (!targetm.use_by_pieces_infrastructure_p (len, align,
-+					       memsetp
-+						 ? SET_BY_PIECES
-+						 : STORE_BY_PIECES,
-+					       optimize_insn_for_speed_p ()))
-     return 0;
- 
-   align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
-@@ -2576,9 +2537,13 @@
-       return to;
-     }
- 
--  gcc_assert (memsetp
--	      ? SET_BY_PIECES_P (len, align)
--	      : STORE_BY_PIECES_P (len, align));
-+  gcc_assert (targetm.use_by_pieces_infrastructure_p
-+		(len, align,
-+		 memsetp
-+		   ? SET_BY_PIECES
-+		   : STORE_BY_PIECES,
-+		 optimize_insn_for_speed_p ()));
-+
-   data.constfun = constfun;
-   data.constfundata = constfundata;
-   data.len = len;
-@@ -2815,7 +2780,9 @@
-   align = MEM_ALIGN (object);
- 
-   if (CONST_INT_P (size)
--      && CLEAR_BY_PIECES_P (INTVAL (size), align))
-+      && targetm.use_by_pieces_infrastructure_p (INTVAL (size), align,
-+						 CLEAR_BY_PIECES,
-+						 optimize_insn_for_speed_p ()))
-     clear_by_pieces (object, INTVAL (size), align);
-   else if (set_storage_via_setmem (object, size, const0_rtx, align,
- 				   expected_align, expected_size,
-@@ -4221,7 +4188,7 @@
- 	  && CONST_INT_P (size)
- 	  && skip == 0
- 	  && MEM_ALIGN (xinner) >= align
--	  && (MOVE_BY_PIECES_P ((unsigned) INTVAL (size) - used, align))
-+	  && can_move_by_pieces ((unsigned) INTVAL (size) - used, align)
- 	  /* Here we avoid the case of a structure whose weak alignment
- 	     forces many pushes of a small amount of data,
- 	     and such small pushes do rounding that causes trouble.  */
-@@ -4353,11 +4320,7 @@
-       /* Loop over all the words allocated on the stack for this arg.  */
-       /* We can do it by words, because any scalar bigger than a word
- 	 has a size a multiple of a word.  */
--#ifndef PUSH_ARGS_REVERSED
--      for (i = not_stack; i < size; i++)
--#else
-       for (i = size - 1; i >= not_stack; i--)
--#endif
- 	if (i >= not_stack + offset)
- 	  emit_push_insn (operand_subword_force (x, i, mode),
- 			  word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX,
-@@ -7838,7 +7801,7 @@
- 	    && ! (target != 0 && safe_from_p (target, exp, 1)))
- 		  || TREE_ADDRESSABLE (exp)
- 		  || (tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
--		      && (! MOVE_BY_PIECES_P
-+		      && (! can_move_by_pieces
- 				     (tree_to_uhwi (TYPE_SIZE_UNIT (type)),
- 				      TYPE_ALIGN (type)))
- 		      && ! mostly_zeros_p (exp))))
---- a/src/gcc/expr.h
-+++ b/src/gcc/expr.h
-@@ -346,6 +346,7 @@
- /* Mark REG as holding a parameter for the next CALL_INSN.
-    Mode is TYPE_MODE of the non-promoted parameter, or VOIDmode.  */
- extern void use_reg_mode (rtx *, rtx, enum machine_mode);
-+extern void clobber_reg_mode (rtx *, rtx, enum machine_mode);
- 
- extern rtx copy_blkmode_to_reg (enum machine_mode, tree);
- 
-@@ -356,6 +357,13 @@
-   use_reg_mode (fusage, reg, VOIDmode);
- }
- 
-+/* Mark REG as clobbered by the call with FUSAGE as CALL_INSN_FUNCTION_USAGE.  */
-+static inline void
-+clobber_reg (rtx *fusage, rtx reg)
-+{
-+  clobber_reg_mode (fusage, reg, VOIDmode);
-+}
-+
- /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters
-    for the next CALL_INSN.  */
- extern void use_regs (rtx *, int, int);
---- a/src/gcc/go/ChangeLog.linaro
-+++ b/src/gcc/go/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/genattrtab.c
-+++ b/src/gcc/genattrtab.c
-@@ -4765,6 +4765,7 @@
- 
- static struct bypass_list *all_bypasses;
- static size_t n_bypasses;
-+static size_t n_bypassed;
- 
- static void
- gen_bypass_1 (const char *s, size_t len)
-@@ -4810,12 +4811,18 @@
-   struct bypass_list *b;
-   struct insn_reserv *r;
- 
-+  n_bypassed = 0;
-+
-   /* The reservation list is likely to be much longer than the bypass
-      list.  */
-   for (r = all_insn_reservs; r; r = r->next)
-     for (b = all_bypasses; b; b = b->next)
-       if (fnmatch (b->pattern, r->name, 0) == 0)
--	r->bypassed = true;
-+        {
-+          n_bypassed++;
-+          r->bypassed = true;
-+          break;
-+        }
- }
- 
- /* Check that attribute NAME is used in define_insn_reservation condition
-@@ -5074,7 +5081,7 @@
-       process_bypasses ();
- 
-       byps_exp = rtx_alloc (COND);
--      XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2);
-+      XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2);
-       XEXP (byps_exp, 1) = make_numeric_value (0);
-       for (decl = all_insn_reservs, i = 0;
- 	   decl;
---- a/src/gcc/ada/ChangeLog.linaro
-+++ b/src/gcc/ada/ChangeLog.linaro
-@@ -0,0 +1,95 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-05-13  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209653,209866,209871.
-+
-+	2014-04-28  Richard Henderson  <rth@redhat.com>
-+
-+	* gcc-interface/Makefile.in: Support aarch64-linux.
-+
-+	2014-04-28  Eric Botcazou  <ebotcazou@adacore.com>
-+
-+	* exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix,
-+	add 'Suffix' parameter and adjust comment.
-+	(Get_External_Name_With_Suffix): Delete.
-+	* exp_dbug.adb (Get_External_Name_With_Suffix): Merge into...
-+	(Get_External_Name): ...here.  Add 'False' default to Has_Suffix, add
-+	'Suffix' parameter.
-+	(Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name.
-+	Call Get_External_Name instead of Get_External_Name_With_Suffix.
-+	(Get_Secondary_DT_External_Name): Likewise.
-+	* exp_cg.adb (Write_Call_Info): Likewise.
-+	* exp_disp.adb (Export_DT): Likewise.
-+	(Import_DT): Likewise.
-+	* comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC
-+	parameter with False default.
-+	* comperr.adb (Compiler_Abort): Likewise.  Adjust accordingly.
-+	* types.h (Fat_Pointer): Rename into...
-+	(String_Pointer): ...this.  Add comment on interfacing rules.
-+	* fe.h (Compiler_Abort): Adjust for above renaming.
-+	(Error_Msg_N): Likewise.
-+	(Error_Msg_NE): Likewise.
-+	(Get_External_Name): Likewise.  Add third parameter.
-+	(Get_External_Name_With_Suffix): Delete.
-+	* gcc-interface/decl.c (STDCALL_PREFIX): Define.
-+	(create_concat_name): Adjust call to Get_External_Name, remove call to
-+	Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming.
-+	* gcc-interface/trans.c (post_error): Likewise.
-+	(post_error_ne): Likewise.
-+	* gcc-interface/misc.c (internal_error_function): Likewise.
-+
-+	2014-04-22  Richard Henderson  <rth@redhat.com>
-+
-+	* init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define.
-+	(__gnat_alternate_stack): Enable for all linux except ia64.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/common/config/aarch64/aarch64-common.c
-+++ b/src/gcc/common/config/aarch64/aarch64-common.c
-@@ -44,6 +44,8 @@
-   {
-     /* Enable section anchors by default at -O1 or higher.  */
-     { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
-+    /* Enable -fsched-pressure by default when optimizing.  */
-+    { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
-     /* Enable redundant extension instructions removal at -O2 and higher.  */
-     { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
-     { OPT_LEVELS_NONE, 0, NULL, 0 }
---- a/src/gcc/fortran/ChangeLog.linaro
-+++ b/src/gcc/fortran/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/configure.ac
-+++ b/src/gcc/configure.ac
-@@ -809,7 +809,7 @@
- )
- AC_SUBST(CONFIGURE_SPECS)
- 
--ACX_PKGVERSION([GCC])
-+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`])
- ACX_BUGURL([http://gcc.gnu.org/bugs.html])
- 
- # Sanity check enable_languages in case someone does not run the toplevel
---- a/src/gcc/ira-build.c
-+++ b/src/gcc/ira-build.c
-@@ -523,6 +523,7 @@
-   ALLOCNO_BAD_SPILL_P (a) = false;
-   ALLOCNO_ASSIGNED_P (a) = false;
-   ALLOCNO_MODE (a) = (regno < 0 ? VOIDmode : PSEUDO_REGNO_MODE (regno));
-+  ALLOCNO_WMODE (a) = ALLOCNO_MODE (a);
-   ALLOCNO_PREFS (a) = NULL;
-   ALLOCNO_COPIES (a) = NULL;
-   ALLOCNO_HARD_REG_COSTS (a) = NULL;
-@@ -892,6 +893,7 @@
-   parent = ALLOCNO_LOOP_TREE_NODE (a)->parent;
-   cap = ira_create_allocno (ALLOCNO_REGNO (a), true, parent);
-   ALLOCNO_MODE (cap) = ALLOCNO_MODE (a);
-+  ALLOCNO_WMODE (cap) = ALLOCNO_WMODE (a);
-   aclass = ALLOCNO_CLASS (a);
-   ira_set_allocno_class (cap, aclass);
-   ira_create_allocno_objects (cap);
-@@ -1856,9 +1858,9 @@
- 
- /* This recursive function creates allocnos corresponding to
-    pseudo-registers containing in X.  True OUTPUT_P means that X is
--   a lvalue.  */
-+   an lvalue.  PARENT corresponds to the parent expression of X.  */
- static void
--create_insn_allocnos (rtx x, bool output_p)
-+create_insn_allocnos (rtx x, rtx outer, bool output_p)
- {
-   int i, j;
-   const char *fmt;
-@@ -1873,7 +1875,15 @@
- 	  ira_allocno_t a;
- 
- 	  if ((a = ira_curr_regno_allocno_map[regno]) == NULL)
--	    a = ira_create_allocno (regno, false, ira_curr_loop_tree_node);
-+	    {
-+	      a = ira_create_allocno (regno, false, ira_curr_loop_tree_node);
-+	      if (outer != NULL && GET_CODE (outer) == SUBREG)
-+		{
-+		  enum machine_mode wmode = GET_MODE (outer);
-+		  if (GET_MODE_SIZE (wmode) > GET_MODE_SIZE (ALLOCNO_WMODE (a)))
-+		    ALLOCNO_WMODE (a) = wmode;
-+		}
-+	    }
- 
- 	  ALLOCNO_NREFS (a)++;
- 	  ALLOCNO_FREQ (a) += REG_FREQ_FROM_BB (curr_bb);
-@@ -1884,25 +1894,25 @@
-     }
-   else if (code == SET)
-     {
--      create_insn_allocnos (SET_DEST (x), true);
--      create_insn_allocnos (SET_SRC (x), false);
-+      create_insn_allocnos (SET_DEST (x), NULL, true);
-+      create_insn_allocnos (SET_SRC (x), NULL, false);
-       return;
-     }
-   else if (code == CLOBBER)
-     {
--      create_insn_allocnos (XEXP (x, 0), true);
-+      create_insn_allocnos (XEXP (x, 0), NULL, true);
-       return;
-     }
-   else if (code == MEM)
-     {
--      create_insn_allocnos (XEXP (x, 0), false);
-+      create_insn_allocnos (XEXP (x, 0), NULL, false);
-       return;
-     }
-   else if (code == PRE_DEC || code == POST_DEC || code == PRE_INC ||
- 	   code == POST_INC || code == POST_MODIFY || code == PRE_MODIFY)
-     {
--      create_insn_allocnos (XEXP (x, 0), true);
--      create_insn_allocnos (XEXP (x, 0), false);
-+      create_insn_allocnos (XEXP (x, 0), NULL, true);
-+      create_insn_allocnos (XEXP (x, 0), NULL, false);
-       return;
-     }
- 
-@@ -1910,10 +1920,10 @@
-   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
-     {
-       if (fmt[i] == 'e')
--	create_insn_allocnos (XEXP (x, i), output_p);
-+	create_insn_allocnos (XEXP (x, i), x, output_p);
-       else if (fmt[i] == 'E')
- 	for (j = 0; j < XVECLEN (x, i); j++)
--	  create_insn_allocnos (XVECEXP (x, i, j), output_p);
-+	  create_insn_allocnos (XVECEXP (x, i, j), x, output_p);
-     }
- }
- 
-@@ -1932,7 +1942,7 @@
-   ira_assert (bb != NULL);
-   FOR_BB_INSNS_REVERSE (bb, insn)
-     if (NONDEBUG_INSN_P (insn))
--      create_insn_allocnos (PATTERN (insn), false);
-+      create_insn_allocnos (PATTERN (insn), NULL, false);
-   /* It might be a allocno living through from one subloop to
-      another.  */
-   EXECUTE_IF_SET_IN_REG_SET (df_get_live_in (bb), FIRST_PSEUDO_REGISTER, i, bi)
---- a/src/gcc/calls.c
-+++ b/src/gcc/calls.c
-@@ -1104,8 +1104,6 @@
- {
-   CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far);
-   location_t loc = EXPR_LOCATION (exp);
--  /* 1 if scanning parms front to back, -1 if scanning back to front.  */
--  int inc;
- 
-   /* Count arg position in order args appear.  */
-   int argpos;
-@@ -1116,22 +1114,9 @@
-   args_size->var = 0;
- 
-   /* In this loop, we consider args in the order they are written.
--     We fill up ARGS from the front or from the back if necessary
--     so that in any case the first arg to be pushed ends up at the front.  */
-+     We fill up ARGS from the back.  */
- 
--  if (PUSH_ARGS_REVERSED)
--    {
--      i = num_actuals - 1, inc = -1;
--      /* In this case, must reverse order of args
--	 so that we compute and push the last arg first.  */
--    }
--  else
--    {
--      i = 0, inc = 1;
--    }
--
--  /* First fill in the actual arguments in the ARGS array, splitting
--     complex arguments if necessary.  */
-+  i = num_actuals - 1;
-   {
-     int j = i;
-     call_expr_arg_iterator iter;
-@@ -1140,7 +1125,7 @@
-     if (struct_value_addr_value)
-       {
- 	args[j].tree_value = struct_value_addr_value;
--	j += inc;
-+	j--;
-       }
-     FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
-       {
-@@ -1152,17 +1137,17 @@
- 	  {
- 	    tree subtype = TREE_TYPE (argtype);
- 	    args[j].tree_value = build1 (REALPART_EXPR, subtype, arg);
--	    j += inc;
-+	    j--;
- 	    args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg);
- 	  }
- 	else
- 	  args[j].tree_value = arg;
--	j += inc;
-+	j--;
-       }
-   }
- 
-   /* I counts args in order (to be) pushed; ARGPOS counts in order written.  */
--  for (argpos = 0; argpos < num_actuals; i += inc, argpos++)
-+  for (argpos = 0; argpos < num_actuals; i--, argpos++)
-     {
-       tree type = TREE_TYPE (args[i].tree_value);
-       int unsignedp;
-@@ -2952,9 +2937,8 @@
- 
-       compute_argument_addresses (args, argblock, num_actuals);
- 
--      /* If we push args individually in reverse order, perform stack alignment
--	 before the first push (the last arg).  */
--      if (PUSH_ARGS_REVERSED && argblock == 0
-+      /* Perform stack alignment before the first push (the last arg).  */
-+      if (argblock == 0
-           && adjusted_args_size.constant > reg_parm_stack_space
- 	  && adjusted_args_size.constant != unadjusted_args_size)
- 	{
-@@ -3097,12 +3081,6 @@
- 		sibcall_failure = 1;
- 	    }
- 
--      /* If we pushed args in forward order, perform stack alignment
--	 after pushing the last arg.  */
--      if (!PUSH_ARGS_REVERSED && argblock == 0)
--	anti_adjust_stack (GEN_INT (adjusted_args_size.constant
--				    - unadjusted_args_size));
--
-       /* If register arguments require space on the stack and stack space
- 	 was not preallocated, allocate stack space here for arguments
- 	 passed in registers.  */
-@@ -3152,8 +3130,7 @@
-       if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
- 	{
- 	  int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
--	  if (PUSH_ARGS_REVERSED)
--	    arg_nr = num_actuals - arg_nr - 1;
-+	  arg_nr = num_actuals - arg_nr - 1;
- 	  if (arg_nr >= 0
- 	      && arg_nr < num_actuals
- 	      && args[arg_nr].reg
-@@ -3597,7 +3574,6 @@
-      isn't present here, so we default to native calling abi here.  */
-   tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
-   tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */
--  int inc;
-   int count;
-   rtx argblock = 0;
-   CUMULATIVE_ARGS args_so_far_v;
-@@ -3946,22 +3922,13 @@
- 	argblock = push_block (GEN_INT (args_size.constant), 0, 0);
-     }
- 
--  /* If we push args individually in reverse order, perform stack alignment
-+  /* We push args individually in reverse order, perform stack alignment
-      before the first push (the last arg).  */
--  if (argblock == 0 && PUSH_ARGS_REVERSED)
-+  if (argblock == 0)
-     anti_adjust_stack (GEN_INT (args_size.constant
- 				- original_args_size.constant));
- 
--  if (PUSH_ARGS_REVERSED)
--    {
--      inc = -1;
--      argnum = nargs - 1;
--    }
--  else
--    {
--      inc = 1;
--      argnum = 0;
--    }
-+  argnum = nargs - 1;
- 
- #ifdef REG_PARM_STACK_SPACE
-   if (ACCUMULATE_OUTGOING_ARGS)
-@@ -3978,7 +3945,7 @@
- 
-   /* ARGNUM indexes the ARGVEC array in the order in which the arguments
-      are to be pushed.  */
--  for (count = 0; count < nargs; count++, argnum += inc)
-+  for (count = 0; count < nargs; count++, argnum--)
-     {
-       enum machine_mode mode = argvec[argnum].mode;
-       rtx val = argvec[argnum].value;
-@@ -4080,17 +4047,8 @@
- 	}
-     }
- 
--  /* If we pushed args in forward order, perform stack alignment
--     after pushing the last arg.  */
--  if (argblock == 0 && !PUSH_ARGS_REVERSED)
--    anti_adjust_stack (GEN_INT (args_size.constant
--				- original_args_size.constant));
-+  argnum = nargs - 1;
- 
--  if (PUSH_ARGS_REVERSED)
--    argnum = nargs - 1;
--  else
--    argnum = 0;
--
-   fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
- 
-   /* Now load any reg parms into their regs.  */
-@@ -4097,7 +4055,7 @@
- 
-   /* ARGNUM indexes the ARGVEC array in the order in which the arguments
-      are to be pushed.  */
--  for (count = 0; count < nargs; count++, argnum += inc)
-+  for (count = 0; count < nargs; count++, argnum--)
-     {
-       enum machine_mode mode = argvec[argnum].mode;
-       rtx val = argvec[argnum].value;
---- a/src/gcc/cfgexpand.c
-+++ b/src/gcc/cfgexpand.c
-@@ -1292,7 +1292,12 @@
-   else if (TREE_CODE (var) == VAR_DECL && DECL_HARD_REGISTER (var))
-     {
-       if (really_expand)
--        expand_one_hard_reg_var (var);
-+	{
-+	  expand_one_hard_reg_var (var);
-+	  if (!DECL_HARD_REGISTER (var))
-+	    /* Invalid register specification.  */
-+	    expand_one_error_var (var);
-+	}
-     }
-   else if (use_register_for_decl (var))
-     {
---- a/src/gcc/explow.c
-+++ b/src/gcc/explow.c
-@@ -329,11 +329,13 @@
-    an address in the address space's address mode, or vice versa (TO_MODE says
-    which way).  We take advantage of the fact that pointers are not allowed to
-    overflow by commuting arithmetic operations over conversions so that address
--   arithmetic insns can be used.  */
-+   arithmetic insns can be used. IN_CONST is true if this conversion is inside
-+   a CONST.  */
- 
--rtx
--convert_memory_address_addr_space (enum machine_mode to_mode ATTRIBUTE_UNUSED,
--				   rtx x, addr_space_t as ATTRIBUTE_UNUSED)
-+static rtx
-+convert_memory_address_addr_space_1 (enum machine_mode to_mode ATTRIBUTE_UNUSED,
-+				     rtx x, addr_space_t as ATTRIBUTE_UNUSED,
-+				     bool in_const)
- {
- #ifndef POINTERS_EXTEND_UNSIGNED
-   gcc_assert (GET_MODE (x) == to_mode || GET_MODE (x) == VOIDmode);
-@@ -389,32 +391,29 @@
- 
-     case CONST:
-       return gen_rtx_CONST (to_mode,
--			    convert_memory_address_addr_space
--			      (to_mode, XEXP (x, 0), as));
-+			    convert_memory_address_addr_space_1
-+			      (to_mode, XEXP (x, 0), as, true));
-       break;
- 
-     case PLUS:
-     case MULT:
--      /* FIXME: For addition, we used to permute the conversion and
--	 addition operation only if one operand is a constant and
--	 converting the constant does not change it or if one operand
--	 is a constant and we are using a ptr_extend instruction
--	 (POINTERS_EXTEND_UNSIGNED < 0) even if the resulting address
--	 may overflow/underflow.  We relax the condition to include
--	 zero-extend (POINTERS_EXTEND_UNSIGNED > 0) since the other
--	 parts of the compiler depend on it.  See PR 49721.
--
-+      /* For addition we can safely permute the conversion and addition
-+	 operation if one operand is a constant and converting the constant
-+	 does not change it or if one operand is a constant and we are
-+	 using a ptr_extend instruction  (POINTERS_EXTEND_UNSIGNED < 0).
- 	 We can always safely permute them if we are making the address
--	 narrower.  */
-+	 narrower. Inside a CONST RTL, this is safe for both pointers
-+	 zero or sign extended as pointers cannot wrap. */
-       if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode)
- 	  || (GET_CODE (x) == PLUS
- 	      && CONST_INT_P (XEXP (x, 1))
--	      && (POINTERS_EXTEND_UNSIGNED != 0
--		  || XEXP (x, 1) == convert_memory_address_addr_space
--		  			(to_mode, XEXP (x, 1), as))))
-+	      && ((in_const && POINTERS_EXTEND_UNSIGNED != 0)
-+		  || XEXP (x, 1) == convert_memory_address_addr_space_1
-+				     (to_mode, XEXP (x, 1), as, in_const)
-+                  || POINTERS_EXTEND_UNSIGNED < 0)))
- 	return gen_rtx_fmt_ee (GET_CODE (x), to_mode,
--			       convert_memory_address_addr_space
--				 (to_mode, XEXP (x, 0), as),
-+			       convert_memory_address_addr_space_1
-+				 (to_mode, XEXP (x, 0), as, in_const),
- 			       XEXP (x, 1));
-       break;
- 
-@@ -426,6 +425,18 @@
- 			x, POINTERS_EXTEND_UNSIGNED);
- #endif /* defined(POINTERS_EXTEND_UNSIGNED) */
- }
-+
-+/* Given X, a memory address in address space AS' pointer mode, convert it to
-+   an address in the address space's address mode, or vice versa (TO_MODE says
-+   which way).  We take advantage of the fact that pointers are not allowed to
-+   overflow by commuting arithmetic operations over conversions so that address
-+   arithmetic insns can be used.  */
-+
-+rtx
-+convert_memory_address_addr_space (enum machine_mode to_mode, rtx x, addr_space_t as)
-+{
-+  return convert_memory_address_addr_space_1 (to_mode, x, as, false);
-+}
- 
- /* Return something equivalent to X but valid as a memory address for something
-    of mode MODE in the named address space AS.  When X is not itself valid,
---- a/src/gcc/lto/ChangeLog.linaro
-+++ b/src/gcc/lto/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/po/ChangeLog.linaro
-+++ b/src/gcc/po/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/gcc/varasm.c
-+++ b/src/gcc/varasm.c
-@@ -1335,6 +1335,11 @@
- 	  /* As a register variable, it has no section.  */
- 	  return;
- 	}
-+      /* Avoid internal errors from invalid register
-+	 specifications.  */
-+      SET_DECL_ASSEMBLER_NAME (decl, NULL_TREE);
-+      DECL_HARD_REGISTER (decl) = 0;
-+      return;
-     }
-   /* Now handle ordinary static variables and functions (in memory).
-      Also handle vars declared register invalidly.  */
---- a/src/gcc/sched-deps.c
-+++ b/src/gcc/sched-deps.c
-@@ -2828,35 +2828,42 @@
-     sched_deps_info->finish_rhs ();
- }
- 
--/* Try to group comparison and the following conditional jump INSN if
--   they're already adjacent. This is to prevent scheduler from scheduling
--   them apart.  */
-+/* Try to group two fuseable insns together to prevent scheduler
-+   from scheduling them apart.  */
- 
- static void
--try_group_insn (rtx insn)
-+sched_macro_fuse_insns (rtx insn)
- {
--  unsigned int condreg1, condreg2;
--  rtx cc_reg_1;
-   rtx prev;
- 
--  if (!any_condjump_p (insn))
--    return;
-+  if (any_condjump_p (insn))
-+    {
-+      unsigned int condreg1, condreg2;
-+      rtx cc_reg_1;
-+      targetm.fixed_condition_code_regs (&condreg1, &condreg2);
-+      cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-+      prev = prev_nonnote_nondebug_insn (insn);
-+      if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
-+          || !prev
-+          || !modified_in_p (cc_reg_1, prev))
-+        return;
-+    }
-+  else
-+    {
-+      rtx insn_set = single_set (insn);
- 
--  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
--  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
--  prev = prev_nonnote_nondebug_insn (insn);
--  if (!reg_referenced_p (cc_reg_1, PATTERN (insn))
--      || !prev
--      || !modified_in_p (cc_reg_1, prev))
--    return;
-+      prev = prev_nonnote_nondebug_insn (insn);
-+      if (!prev
-+          || !insn_set
-+          || !single_set (prev)
-+          || !modified_in_p (SET_DEST (insn_set), prev))
-+        return;
- 
--  /* Different microarchitectures support macro fusions for different
--     combinations of insn pairs.  */
--  if (!targetm.sched.macro_fusion_pair_p
--      || !targetm.sched.macro_fusion_pair_p (prev, insn))
--    return;
-+    }
- 
--  SCHED_GROUP_P (insn) = 1;
-+  if (targetm.sched.macro_fusion_pair_p (prev, insn))
-+    SCHED_GROUP_P (insn) = 1;
-+
- }
- 
- /* Analyze an INSN with pattern X to find all dependencies.  */
-@@ -2885,7 +2892,7 @@
-   /* Group compare and branch insns for macro-fusion.  */
-   if (targetm.sched.macro_fusion_p
-       && targetm.sched.macro_fusion_p ())
--    try_group_insn (insn);
-+    sched_macro_fuse_insns (insn);
- 
-   if (may_trap_p (x))
-     /* Avoid moving trapping instructions across function calls that might
---- a/src/gcc/var-tracking.c
-+++ b/src/gcc/var-tracking.c
-@@ -5997,7 +5997,8 @@
-     {
-       cselib_val *oval = cselib_lookup (oloc, GET_MODE (oloc), 0, VOIDmode);
- 
--      gcc_assert (oval != v);
-+      if (oval == v)
-+	return;
-       gcc_assert (REG_P (oloc) || MEM_P (oloc));
- 
-       if (oval && !cselib_preserved_value_p (oval))
---- a/src/gcc/system.h
-+++ b/src/gcc/system.h
-@@ -830,7 +830,8 @@
- 	CAN_DEBUG_WITHOUT_FP UNLIKELY_EXECUTED_TEXT_SECTION_NAME	\
- 	HOT_TEXT_SECTION_NAME LEGITIMATE_CONSTANT_P ALWAYS_STRIP_DOTDOT	\
- 	OUTPUT_ADDR_CONST_EXTRA SMALL_REGISTER_CLASSES ASM_OUTPUT_IDENT	\
--	ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK
-+	ASM_BYTE_OP MEMBER_TYPE_FORCES_BLK CLEAR_BY_PIECES_P		\
-+ 	MOVE_BY_PIECES_P SET_BY_PIECES_P STORE_BY_PIECES_P
- 
- /* Target macros only used for code built for the target, that have
-    moved to libgcc-tm.h or have never been present elsewhere.  */
-@@ -912,7 +913,8 @@
- 	USE_COMMON_FOR_ONE_ONLY IFCVT_EXTRA_FIELDS IFCVT_INIT_EXTRA_FIELDS \
- 	CASE_USE_BIT_TESTS FIXUNS_TRUNC_LIKE_FIX_TRUNC                     \
-         GO_IF_MODE_DEPENDENT_ADDRESS DELAY_SLOTS_FOR_EPILOGUE              \
--        ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS
-+        ELIGIBLE_FOR_EPILOGUE_DELAY TARGET_C99_FUNCTIONS TARGET_HAS_SINCOS \
-+	LARGEST_EXPONENT_IS_NORNAL ROUND_TOWARDS_ZERO
- 
- /* Hooks that are no longer used.  */
-  #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE	\
---- a/src/gcc/config.gcc
-+++ b/src/gcc/config.gcc
-@@ -312,8 +312,9 @@
- aarch64*-*-*)
- 	cpu_type=aarch64
- 	need_64bit_hwint=yes
--	extra_headers="arm_neon.h"
-+	extra_headers="arm_neon.h arm_acle.h"
- 	extra_objs="aarch64-builtins.o aarch-common.o"
-+	target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
- 	target_has_targetm_common=yes
- 	;;
- alpha*-*-*)
---- a/src/gcc/Makefile.in
-+++ b/src/gcc/Makefile.in
-@@ -814,10 +814,12 @@
- DEVPHASE    := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
- DATESTAMP   := $(srcdir)/DATESTAMP # YYYYMMDD or empty
- REVISION    := $(srcdir)/REVISION  # [BRANCH revision XXXXXX]
-+LINAROVER   := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]
- 
- BASEVER_c   := $(shell cat $(BASEVER))
- DEVPHASE_c  := $(shell cat $(DEVPHASE))
- DATESTAMP_c := $(shell cat $(DATESTAMP))
-+LINAROVER_c := $(shell cat $(LINAROVER))
- 
- ifeq (,$(wildcard $(REVISION)))
- REVISION_c  :=
-@@ -838,6 +840,7 @@
- DATESTAMP_s := "\"$(if $(DEVPHASE_c), $(DATESTAMP_c))\""
- PKGVERSION_s:= "\"@PKGVERSION@\""
- BUGURL_s    := "\"@REPORT_BUGS_TO@\""
-+LINAROVER_s := "\"$(LINAROVER_c)\""
- 
- PKGVERSION  := @PKGVERSION@
- BUGURL_TEXI := @REPORT_BUGS_TEXI@
-@@ -2542,8 +2545,9 @@
-   -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
-   @TARGET_SYSTEM_ROOT_DEFINE@
- 
--CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
--cppbuiltin.o: $(BASEVER)
-+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \
-+	-DLINAROVER=$(LINAROVER_s)
-+cppbuiltin.o: $(BASEVER) $(LINAROVER)
- 
- CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
- 
-@@ -2799,8 +2803,7 @@
- 	 gcov.texi trouble.texi bugreport.texi service.texi		\
- 	 contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi	\
- 	 fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi	\
--	 implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi	\
--	 arm-acle-intrinsics.texi
-+	 implement-c.texi implement-cxx.texi
- 
- # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with
- # the generated tm.texi; the latter might have a more recent timestamp,
---- a/src/gcc/tree-cfg.c
-+++ b/src/gcc/tree-cfg.c
-@@ -2594,7 +2594,7 @@
-    near its "logical" location.  This is of most help to humans looking
-    at debugging dumps.  */
- 
--static basic_block
-+basic_block
- split_edge_bb_loc (edge edge_in)
- {
-   basic_block dest = edge_in->dest;
---- a/src/gcc/tree-cfg.h
-+++ b/src/gcc/tree-cfg.h
-@@ -62,6 +62,7 @@
- extern tree gimple_block_label (basic_block);
- extern void add_phi_args_after_copy_bb (basic_block);
- extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
-+extern basic_block split_edge_bb_loc (edge);
- extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
- 					basic_block *, bool);
- extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
---- a/src/gcc/ree.c
-+++ b/src/gcc/ree.c
-@@ -794,6 +794,14 @@
-       if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
- 	return false;
- 
-+      enum machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
-+      rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
-+
-+      /* Ensure the number of hard registers of the copy match.  */
-+      if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode)
-+	  != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg)))
-+	return false;
-+
-       /* There's only one reaching def.  */
-       rtx def_insn = state->defs_list[0];
- 
-@@ -843,7 +851,7 @@
-       start_sequence ();
-       rtx pat = PATTERN (cand->insn);
-       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
--                                 REGNO (XEXP (SET_SRC (pat), 0)));
-+                                 REGNO (get_extended_src_reg (SET_SRC (pat))));
-       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
-                                  REGNO (SET_DEST (pat)));
-       emit_move_insn (new_dst, new_src);
---- a/src/gcc/config/s390/s390.c
-+++ b/src/gcc/config/s390/s390.c
-@@ -12066,6 +12066,18 @@
-   register_pass (&insert_pass_s390_early_mach);
- }
- 
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
-+
-+static bool
-+s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+				     unsigned int align ATTRIBUTE_UNUSED,
-+				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
-+				     bool speed_p ATTRIBUTE_UNUSED)
-+{
-+  return (size == 1 || size == 2
-+	  || size == 4 || (TARGET_ZARCH && size == 8));
-+}
-+
- /* Initialize GCC target structure.  */
- 
- #undef  TARGET_ASM_ALIGNED_HI_OP
-@@ -12248,6 +12260,10 @@
- #undef TARGET_SET_UP_BY_PROLOGUE
- #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
- 
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+  s390_use_by_pieces_infrastructure_p
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
- 
- #include "gt-s390.h"
---- a/src/gcc/config/s390/s390.h
-+++ b/src/gcc/config/s390/s390.h
-@@ -752,24 +752,6 @@
- #define MOVE_MAX_PIECES (TARGET_ZARCH ? 8 : 4)
- #define MAX_MOVE_MAX 16
- 
--/* Determine whether to use move_by_pieces or block move insn.  */
--#define MOVE_BY_PIECES_P(SIZE, ALIGN)		\
--  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
--    || (TARGET_ZARCH && (SIZE) == 8) )
--
--/* Determine whether to use clear_by_pieces or block clear insn.  */
--#define CLEAR_BY_PIECES_P(SIZE, ALIGN)		\
--  ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
--    || (TARGET_ZARCH && (SIZE) == 8) )
--
--/* This macro is used to determine whether store_by_pieces should be
--   called to "memcpy" storage when the source is a constant string.  */
--#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
--
--/* Likewise to decide whether to "memset" storage with byte values
--   other than zero.  */
--#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN)
--
- /* Don't perform CSE on function addresses.  */
- #define NO_FUNCTION_CSE
- 
---- a/src/gcc/config/i386/i386.c
-+++ b/src/gcc/config/i386/i386.c
-@@ -25796,6 +25796,9 @@
-   rtx compare_set = NULL_RTX, test_if, cond;
-   rtx alu_set = NULL_RTX, addr = NULL_RTX;
- 
-+  if (!any_condjump_p (condjmp))
-+    return false;
-+
-   if (get_attr_type (condgen) != TYPE_TEST
-       && get_attr_type (condgen) != TYPE_ICMP
-       && get_attr_type (condgen) != TYPE_INCDEC
---- a/src/gcc/config/sh/sh.c
-+++ b/src/gcc/config/sh/sh.c
-@@ -317,6 +317,10 @@
- static bool sh_legitimate_constant_p (enum machine_mode, rtx);
- static int mov_insn_size (enum machine_mode, bool);
- static int mov_insn_alignment_mask (enum machine_mode, bool);
-+static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+					       unsigned int,
-+					       enum by_pieces_operation,
-+					       bool);
- static bool sequence_insn_p (rtx);
- static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
- static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
-@@ -601,6 +605,10 @@
- #undef TARGET_FIXED_CONDITION_CODE_REGS
- #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
- 
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+  sh_use_by_pieces_infrastructure_p
-+
- /* Machine-specific symbol_ref flags.  */
- #define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
- 
-@@ -13533,4 +13541,27 @@
-   return NULL_RTX;
- }
- 
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
-+
-+static bool
-+sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+				   unsigned int align,
-+				   enum by_pieces_operation op,
-+				   bool speed_p)
-+{
-+  switch (op)
-+    {
-+      case MOVE_BY_PIECES:
-+	return move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
-+	  < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
-+      case STORE_BY_PIECES:
-+      case SET_BY_PIECES:
-+	return move_by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1)
-+	  < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
-+      default:
-+	return default_use_by_pieces_infrastructure_p (size, align,
-+						       op, speed_p);
-+    }
-+}
-+
- #include "gt-sh.h"
---- a/src/gcc/config/sh/sh.h
-+++ b/src/gcc/config/sh/sh.h
-@@ -1584,16 +1584,6 @@
- #define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
- 					  ? 0 : TARGET_SH1)
- 
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
--   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
--
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
--   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
--
--#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
--
- /* Macros to check register numbers against specific register classes.  */
- 
- /* These assume that REGNO is a hard or pseudo reg number.
---- a/src/gcc/config/host-linux.c
-+++ b/src/gcc/config/host-linux.c
-@@ -86,6 +86,8 @@
- # define TRY_EMPTY_VM_SPACE	0x60000000
- #elif defined(__mc68000__)
- # define TRY_EMPTY_VM_SPACE	0x40000000
-+#elif defined(__aarch64__) && defined(__ILP32__)
-+# define TRY_EMPTY_VM_SPACE	0x60000000
- #elif defined(__aarch64__)
- # define TRY_EMPTY_VM_SPACE	0x1000000000
- #elif defined(__ARM_EABI__)
---- a/src/gcc/config/cris/cris.h
-+++ b/src/gcc/config/cris/cris.h
-@@ -80,15 +80,7 @@
- /* Which CPU version this is.  The parsed and adjusted cris_cpu_str.  */
- extern int cris_cpu_version;
- 
--/* Changing the order used to be necessary to put the fourth __make_dp
--   argument (a DImode parameter) in registers, to fit with the libfunc
--   parameter passing scheme used for intrinsic functions.  FIXME: Check
--   performance.  */
--#ifdef IN_LIBGCC2
--#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c)
--#endif
- 
--
- /* Node: Driver */
- 
- /* Also provide canonical vN definitions when user specifies an alias.  */
---- a/src/gcc/config/aarch64/geniterators.sh
-+++ b/src/gcc/config/aarch64/geniterators.sh
-@@ -0,0 +1,45 @@
-+#!/bin/sh
-+#
-+# Copyright (C) 2014 Free Software Foundation, Inc.
-+# Contributed by ARM Ltd.
-+#
-+# This file is part of GCC.
-+#
-+# GCC is free software; you can redistribute it and/or modify
-+# it under the terms of the GNU General Public License as published by
-+# the Free Software Foundation; either version 3, or (at your option)
-+# any later version.
-+#
-+# GCC is distributed in the hope that it will be useful,
-+# but WITHOUT ANY WARRANTY; without even the implied warranty of
-+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+# GNU General Public License for more details.
-+#
-+# You should have received a copy of the GNU General Public License
-+# along with GCC; see the file COPYING3.  If not see
-+# <http://www.gnu.org/licenses/>.
-+
-+# Generate aarch64-builtin-iterators.h, a file containing a series of
-+# BUILTIN_<ITERATOR> macros, which expand to VAR<N> Macros covering the
-+# same set of modes as the iterator in iterators.md
-+
-+echo "/* -*- buffer-read-only: t -*- */"
-+echo "/* Generated automatically by geniterators.sh from iterators.md.  */"
-+echo "#ifndef GCC_AARCH64_ITERATORS_H"
-+echo "#define GCC_AARCH64_ITERATORS_H"
-+
-+# Strip newlines, create records marked ITERATOR, and strip junk (anything
-+# which does not have a matching brace because it contains characters we
-+# don't want to or can't handle (e.g P, PTR iterators change depending on
-+# Pmode and ptr_mode).
-+cat $1 | tr "\n" " " \
-+       | sed 's/(define_mode_iterator \([A-Za-z0-9_]*\) \([]\[A-Z0-9 \t]*\)/\n#define BUILTIN_\1(T, N, MAP) \\ \2\n/g' \
-+       | grep '#define [A-Z0-9_(), \\]* \[[A-Z0-9[:space:]]*]' \
-+       | sed 's/\t//g' \
-+       | sed 's/  \+/ /g' \
-+       | sed 's/ \[\([A-Z0-9 ]*\)]/\n\L\1/' \
-+       | awk ' BEGIN { FS = " " ; OFS = ", "} \
-+	       /#/ { print } \
-+               ! /#/ { $1 = $1 ; printf "  VAR%d (T, N, MAP, %s)\n", NF, $0 }'
-+
-+echo "#endif /* GCC_AARCH64_ITERATORS_H  */"
---- a/src/gcc/config/aarch64/aarch64-simd.md
-+++ b/src/gcc/config/aarch64/aarch64-simd.md
-@@ -19,8 +19,8 @@
- ;; <http://www.gnu.org/licenses/>.
- 
- (define_expand "mov<mode>"
--  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
--	(match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
-+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-+	(match_operand:VALL 1 "general_operand" ""))]
-   "TARGET_SIMD"
-   "
-     if (GET_CODE (operands[0]) == MEM)
-@@ -29,8 +29,8 @@
- )
- 
- (define_expand "movmisalign<mode>"
--  [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
--        (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
-+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-+        (match_operand:VALL 1 "general_operand" ""))]
-   "TARGET_SIMD"
- {
-   /* This pattern is not permitted to fail during expansion: if both arguments
-@@ -91,9 +91,9 @@
- )
- 
- (define_insn "*aarch64_simd_mov<mode>"
--  [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
-+  [(set (match_operand:VD 0 "nonimmediate_operand"
- 		"=w, m,  w, ?r, ?w, ?r, w")
--	(match_operand:VD 1 "aarch64_simd_general_operand"
-+	(match_operand:VD 1 "general_operand"
- 		"m,  w,  w,  w,  r,  r, Dn"))]
-   "TARGET_SIMD
-    && (register_operand (operands[0], <MODE>mode)
-@@ -119,9 +119,9 @@
- )
- 
- (define_insn "*aarch64_simd_mov<mode>"
--  [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
-+  [(set (match_operand:VQ 0 "nonimmediate_operand"
- 		"=w, m,  w, ?r, ?w, ?r, w")
--	(match_operand:VQ 1 "aarch64_simd_general_operand"
-+	(match_operand:VQ 1 "general_operand"
- 		"m,  w,  w,  w,  r,  r, Dn"))]
-   "TARGET_SIMD
-    && (register_operand (operands[0], <MODE>mode)
-@@ -286,6 +286,23 @@
-   [(set_attr "type" "neon_mul_<Vetype><q>")]
- )
- 
-+(define_insn "bswap<mode>"
-+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
-+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
-+  "TARGET_SIMD"
-+  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
-+  [(set_attr "type" "neon_rev<q>")]
-+)
-+
-+(define_insn "aarch64_rbit<mode>"
-+  [(set (match_operand:VB 0 "register_operand" "=w")
-+	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
-+		   UNSPEC_RBIT))]
-+  "TARGET_SIMD"
-+  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
-+  [(set_attr "type" "neon_rbit")]
-+)
-+
- (define_insn "*aarch64_mul3_elt<mode>"
-  [(set (match_operand:VMUL 0 "register_operand" "=w")
-     (mult:VMUL
-@@ -954,7 +971,7 @@
-    dup\\t%d0, %1.d[0]
-    fmov\\t%d0, %1
-    dup\\t%d0, %1"
--  [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
-+  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-    (set_attr "simd" "yes,*,yes")
-    (set_attr "fp" "*,yes,*")
-    (set_attr "length" "4")]
-@@ -1046,7 +1063,7 @@
- 	  (match_operand:<VHALF> 1 "register_operand" "w,r")
-           (vec_select:<VHALF>
-                 (match_dup 0)
--                (match_operand:VQ 2 "vect_par_cnst_hi_half" ""))))]
-+                (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
-   "TARGET_SIMD && BYTES_BIG_ENDIAN"
-   "@
-    ins\\t%0.d[1], %1.d[0]
-@@ -1059,7 +1076,7 @@
-   (match_operand:<VHALF> 1 "register_operand" "")]
-  "TARGET_SIMD"
- {
--  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, BYTES_BIG_ENDIAN);
-+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
-   if (BYTES_BIG_ENDIAN)
-     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
- 		    operands[1], p));
-@@ -1099,7 +1116,7 @@
- ;; For quads.
- 
- (define_insn "vec_pack_trunc_<mode>"
-- [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
-+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
-        (vec_concat:<VNARROWQ2>
- 	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
- 	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
-@@ -1541,7 +1558,7 @@
- )
- 
- ;; Vector versions of the floating-point frint patterns.
--;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
-+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
- (define_insn "<frint_pattern><mode>2"
-   [(set (match_operand:VDQF 0 "register_operand" "=w")
- 	(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
-@@ -1853,15 +1870,15 @@
- ;;     bif op0, op1, mask
- 
- (define_insn "aarch64_simd_bsl<mode>_internal"
--  [(set (match_operand:VALLDIF 0 "register_operand"		"=w,w,w")
--	(ior:VALLDIF
--	   (and:VALLDIF
--	     (match_operand:<V_cmp_result> 1 "register_operand"	" 0,w,w")
--	     (match_operand:VALLDIF 2 "register_operand"	" w,w,0"))
--	   (and:VALLDIF
-+  [(set (match_operand:VSDQ_I_DI 0 "register_operand"		"=w,w,w")
-+	(ior:VSDQ_I_DI
-+	   (and:VSDQ_I_DI
- 	     (not:<V_cmp_result>
--		(match_dup:<V_cmp_result> 1))
--	     (match_operand:VALLDIF 3 "register_operand"	" w,0,w"))
-+	       (match_operand:<V_cmp_result> 1 "register_operand"	" 0,w,w"))
-+	     (match_operand:VSDQ_I_DI 3 "register_operand"	" w,0,w"))
-+	   (and:VSDQ_I_DI
-+	     (match_dup:<V_cmp_result> 1)
-+	     (match_operand:VSDQ_I_DI 2 "register_operand"	" w,w,0"))
- 	))]
-   "TARGET_SIMD"
-   "@
-@@ -1879,9 +1896,21 @@
-  "TARGET_SIMD"
- {
-   /* We can't alias operands together if they have different modes.  */
-+  rtx tmp = operands[0];
-+  if (FLOAT_MODE_P (<MODE>mode))
-+    {
-+      operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
-+      operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
-+      tmp = gen_reg_rtx (<V_cmp_result>mode);
-+    }
-   operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
--  emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
--						  operands[2], operands[3]));
-+  emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
-+							  operands[1],
-+							  operands[2],
-+							  operands[3]));
-+  if (tmp != operands[0])
-+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-+
-   DONE;
- })
- 
-@@ -1895,58 +1924,94 @@
- 	  (match_operand:VDQ 2 "nonmemory_operand")))]
-   "TARGET_SIMD"
- {
--  int inverse = 0, has_zero_imm_form = 0;
-   rtx op1 = operands[1];
-   rtx op2 = operands[2];
-   rtx mask = gen_reg_rtx (<MODE>mode);
-+  enum rtx_code code = GET_CODE (operands[3]);
- 
--  switch (GET_CODE (operands[3]))
-+  /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
-+     and desirable for other comparisons if it results in FOO ? -1 : 0
-+     (this allows direct use of the comparison result without a bsl).  */
-+  if (code == NE
-+      || (code != EQ
-+	  && op1 == CONST0_RTX (<V_cmp_result>mode)
-+	  && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
-     {
-+      op1 = operands[2];
-+      op2 = operands[1];
-+      switch (code)
-+        {
-+        case LE: code = GT; break;
-+        case LT: code = GE; break;
-+        case GE: code = LT; break;
-+        case GT: code = LE; break;
-+        /* No case EQ.  */
-+        case NE: code = EQ; break;
-+        case LTU: code = GEU; break;
-+        case LEU: code = GTU; break;
-+        case GTU: code = LEU; break;
-+        case GEU: code = LTU; break;
-+        default: gcc_unreachable ();
-+        }
-+    }
-+
-+  /* Make sure we can handle the last operand.  */
-+  switch (code)
-+    {
-+    case NE:
-+      /* Normalized to EQ above.  */
-+      gcc_unreachable ();
-+
-     case LE:
-     case LT:
--    case NE:
--      inverse = 1;
--      /* Fall through.  */
-     case GE:
-     case GT:
-     case EQ:
--      has_zero_imm_form = 1;
--      break;
--    case LEU:
--    case LTU:
--      inverse = 1;
--      break;
-+      /* These instructions have a form taking an immediate zero.  */
-+      if (operands[5] == CONST0_RTX (<MODE>mode))
-+        break;
-+      /* Fall through, as may need to load into register.  */
-     default:
-+      if (!REG_P (operands[5]))
-+        operands[5] = force_reg (<MODE>mode, operands[5]);
-       break;
-     }
- 
--  if (!REG_P (operands[5])
--      && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
--    operands[5] = force_reg (<MODE>mode, operands[5]);
--
--  switch (GET_CODE (operands[3]))
-+  switch (code)
-     {
-     case LT:
-+      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
-+      break;
-+
-     case GE:
-       emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
-       break;
- 
-     case LE:
-+      emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
-+      break;
-+
-     case GT:
-       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
-       break;
- 
-     case LTU:
-+      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
-+      break;
-+
-     case GEU:
-       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
-       break;
- 
-     case LEU:
-+      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
-+      break;
-+
-     case GTU:
-       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
-       break;
- 
--    case NE:
-+    /* NE has been normalized to EQ above.  */
-     case EQ:
-       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
-       break;
-@@ -1955,12 +2020,6 @@
-       gcc_unreachable ();
-     }
- 
--  if (inverse)
--    {
--      op1 = operands[2];
--      op2 = operands[1];
--    }
--
-     /* If we have (a = (b CMP c) ? -1 : 0);
-        Then we can simply move the generated mask.  */
- 
-@@ -2348,6 +2407,15 @@
-   DONE;
- })
- 
-+(define_expand "aarch64_reinterpretdf<mode>"
-+  [(match_operand:DF 0 "register_operand" "")
-+   (match_operand:VD_RE 1 "register_operand" "")]
-+  "TARGET_SIMD"
-+{
-+  aarch64_simd_reinterpret (operands[0], operands[1]);
-+  DONE;
-+})
-+
- (define_expand "aarch64_reinterpretv16qi<mode>"
-   [(match_operand:V16QI 0 "register_operand" "")
-    (match_operand:VQ 1 "register_operand" "")]
-@@ -2734,9 +2802,9 @@
- ;; <su>q<absneg>
- 
- (define_insn "aarch64_s<optab><mode>"
--  [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
--	(UNQOPS:VSDQ_I_BHSI
--	  (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
-+  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
-+	(UNQOPS:VSDQ_I
-+	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
-   "TARGET_SIMD"
-   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
-   [(set_attr "type" "neon_<optab><q>")]
-@@ -3788,26 +3856,46 @@
- 	  )))
-      (clobber (reg:CC CC_REGNUM))]
-   "TARGET_SIMD"
--  "@
--  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
--  cm<optab>\t%d0, %d1, #0
--  #"
--  "reload_completed
--   /* We need to prevent the split from
--      happening in the 'w' constraint cases.  */
--   && GP_REGNUM_P (REGNO (operands[0]))
--   && GP_REGNUM_P (REGNO (operands[1]))"
--  [(const_int 0)]
-+  "#"
-+  "reload_completed"
-+  [(set (match_operand:DI 0 "register_operand")
-+	(neg:DI
-+	  (COMPARISONS:DI
-+	    (match_operand:DI 1 "register_operand")
-+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
-+	  )))]
-   {
--    enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
--    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
--    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
--    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
--    DONE;
-+    /* If we are in the general purpose register file,
-+       we split to a sequence of comparison and store.  */
-+    if (GP_REGNUM_P (REGNO (operands[0]))
-+	&& GP_REGNUM_P (REGNO (operands[1])))
-+      {
-+	enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
-+	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-+	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+	DONE;
-+      }
-+    /* Otherwise, we expand to a similar pattern which does not
-+       clobber CC_REGNUM.  */
-   }
-   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
- )
- 
-+(define_insn "*aarch64_cm<optab>di"
-+  [(set (match_operand:DI 0 "register_operand" "=w,w")
-+	(neg:DI
-+	  (COMPARISONS:DI
-+	    (match_operand:DI 1 "register_operand" "w,w")
-+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
-+	  )))]
-+  "TARGET_SIMD && reload_completed"
-+  "@
-+  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
-+  cm<optab>\t%d0, %d1, #0"
-+  [(set_attr "type" "neon_compare, neon_compare_zero")]
-+)
-+
- ;; cm(hs|hi)
- 
- (define_insn "aarch64_cm<optab><mode>"
-@@ -3831,35 +3919,62 @@
- 	  )))
-     (clobber (reg:CC CC_REGNUM))]
-   "TARGET_SIMD"
--  "@
--  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
--  #"
--  "reload_completed
--   /* We need to prevent the split from
--      happening in the 'w' constraint cases.  */
--   && GP_REGNUM_P (REGNO (operands[0]))
--   && GP_REGNUM_P (REGNO (operands[1]))"
--  [(const_int 0)]
-+  "#"
-+  "reload_completed"
-+  [(set (match_operand:DI 0 "register_operand")
-+	(neg:DI
-+	  (UCOMPARISONS:DI
-+	    (match_operand:DI 1 "register_operand")
-+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
-+	  )))]
-   {
--    enum machine_mode mode = CCmode;
--    rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
--    rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
--    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
--    DONE;
-+    /* If we are in the general purpose register file,
-+       we split to a sequence of comparison and store.  */
-+    if (GP_REGNUM_P (REGNO (operands[0]))
-+	&& GP_REGNUM_P (REGNO (operands[1])))
-+      {
-+	enum machine_mode mode = CCmode;
-+	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-+	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+	DONE;
-+      }
-+    /* Otherwise, we expand to a similar pattern which does not
-+       clobber CC_REGNUM.  */
-   }
--  [(set_attr "type" "neon_compare, neon_compare_zero")]
-+  [(set_attr "type" "neon_compare,multiple")]
- )
- 
-+(define_insn "*aarch64_cm<optab>di"
-+  [(set (match_operand:DI 0 "register_operand" "=w")
-+	(neg:DI
-+	  (UCOMPARISONS:DI
-+	    (match_operand:DI 1 "register_operand" "w")
-+	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
-+	  )))]
-+  "TARGET_SIMD && reload_completed"
-+  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
-+  [(set_attr "type" "neon_compare")]
-+)
-+
- ;; cmtst
- 
-+;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
-+;; we don't have any insns using ne, and aarch64_vcond_internal outputs
-+;; not (neg (eq (and x y) 0))
-+;; which is rewritten by simplify_rtx as
-+;; plus (eq (and x y) 0) -1.
-+
- (define_insn "aarch64_cmtst<mode>"
-   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
--	(neg:<V_cmp_result>
--	  (ne:<V_cmp_result>
-+	(plus:<V_cmp_result>
-+	  (eq:<V_cmp_result>
- 	    (and:VDQ
- 	      (match_operand:VDQ 1 "register_operand" "w")
- 	      (match_operand:VDQ 2 "register_operand" "w"))
--	    (vec_duplicate:<V_cmp_result> (const_int 0)))))]
-+	    (match_operand:VDQ 3 "aarch64_simd_imm_zero"))
-+	  (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
-+  ]
-   "TARGET_SIMD"
-   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
-   [(set_attr "type" "neon_tst<q>")]
-@@ -3875,23 +3990,44 @@
- 	    (const_int 0))))
-     (clobber (reg:CC CC_REGNUM))]
-   "TARGET_SIMD"
--  "@
--  cmtst\t%d0, %d1, %d2
--  #"
--  "reload_completed
--   /* We need to prevent the split from
--      happening in the 'w' constraint cases.  */
--   && GP_REGNUM_P (REGNO (operands[0]))
--   && GP_REGNUM_P (REGNO (operands[1]))"
--  [(const_int 0)]
-+  "#"
-+  "reload_completed"
-+  [(set (match_operand:DI 0 "register_operand")
-+	(neg:DI
-+	  (ne:DI
-+	    (and:DI
-+	      (match_operand:DI 1 "register_operand")
-+	      (match_operand:DI 2 "register_operand"))
-+	    (const_int 0))))]
-   {
--    rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
--    enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
--    rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
--    rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
--    emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
--    DONE;
-+    /* If we are in the general purpose register file,
-+       we split to a sequence of comparison and store.  */
-+    if (GP_REGNUM_P (REGNO (operands[0]))
-+	&& GP_REGNUM_P (REGNO (operands[1])))
-+      {
-+	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
-+	enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
-+	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
-+	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
-+	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
-+	DONE;
-+      }
-+    /* Otherwise, we expand to a similar pattern which does not
-+       clobber CC_REGNUM.  */
-   }
-+  [(set_attr "type" "neon_tst,multiple")]
-+)
-+
-+(define_insn "*aarch64_cmtstdi"
-+  [(set (match_operand:DI 0 "register_operand" "=w")
-+	(neg:DI
-+	  (ne:DI
-+	    (and:DI
-+	      (match_operand:DI 1 "register_operand" "w")
-+	      (match_operand:DI 2 "register_operand" "w"))
-+	    (const_int 0))))]
-+  "TARGET_SIMD"
-+  "cmtst\t%d0, %d1, %d2"
-   [(set_attr "type" "neon_tst")]
- )
- 
-@@ -3972,6 +4108,16 @@
-   [(set_attr "type" "neon_load2_2reg<q>")]
- )
- 
-+(define_insn "aarch64_simd_ld2r<mode>"
-+  [(set (match_operand:OI 0 "register_operand" "=w")
-+       (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+                  UNSPEC_LD2_DUP))]
-+  "TARGET_SIMD"
-+  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
-+  [(set_attr "type" "neon_load2_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesoi<mode>"
-   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
- 	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
-@@ -3982,6 +4128,17 @@
-   [(set_attr "type" "neon_store2_2reg<q>")]
- )
- 
-+(define_insn "vec_store_lanesoi_lane<mode>"
-+  [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+	(unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
-+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+		    (match_operand:SI 2 "immediate_operand" "i")]
-+                   UNSPEC_ST2_LANE))]
-+  "TARGET_SIMD"
-+  "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
-+  [(set_attr "type" "neon_store3_one_lane<q>")]
-+)
-+
- (define_insn "vec_load_lanesci<mode>"
-   [(set (match_operand:CI 0 "register_operand" "=w")
- 	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
-@@ -3992,6 +4149,16 @@
-   [(set_attr "type" "neon_load3_3reg<q>")]
- )
- 
-+(define_insn "aarch64_simd_ld3r<mode>"
-+  [(set (match_operand:CI 0 "register_operand" "=w")
-+       (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+                  UNSPEC_LD3_DUP))]
-+  "TARGET_SIMD"
-+  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
-+  [(set_attr "type" "neon_load3_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesci<mode>"
-   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
- 	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
-@@ -4002,6 +4169,17 @@
-   [(set_attr "type" "neon_store3_3reg<q>")]
- )
- 
-+(define_insn "vec_store_lanesci_lane<mode>"
-+  [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+	(unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
-+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+		    (match_operand:SI 2 "immediate_operand" "i")]
-+                   UNSPEC_ST3_LANE))]
-+  "TARGET_SIMD"
-+  "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
-+  [(set_attr "type" "neon_store3_one_lane<q>")]
-+)
-+
- (define_insn "vec_load_lanesxi<mode>"
-   [(set (match_operand:XI 0 "register_operand" "=w")
- 	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
-@@ -4012,6 +4190,16 @@
-   [(set_attr "type" "neon_load4_4reg<q>")]
- )
- 
-+(define_insn "aarch64_simd_ld4r<mode>"
-+  [(set (match_operand:XI 0 "register_operand" "=w")
-+       (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
-+                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
-+                  UNSPEC_LD4_DUP))]
-+  "TARGET_SIMD"
-+  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
-+  [(set_attr "type" "neon_load4_all_lanes<q>")]
-+)
-+
- (define_insn "vec_store_lanesxi<mode>"
-   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
- 	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
-@@ -4022,6 +4210,17 @@
-   [(set_attr "type" "neon_store4_4reg<q>")]
- )
- 
-+(define_insn "vec_store_lanesxi_lane<mode>"
-+  [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
-+	(unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
-+                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+		    (match_operand:SI 2 "immediate_operand" "i")]
-+                   UNSPEC_ST4_LANE))]
-+  "TARGET_SIMD"
-+  "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
-+  [(set_attr "type" "neon_store4_one_lane<q>")]
-+)
-+
- ;; Reload patterns for AdvSIMD register list operands.
- 
- (define_expand "mov<mode>"
-@@ -4141,6 +4340,45 @@
-   aarch64_simd_disambiguate_copy (operands, dest, src, 4);
- })
- 
-+(define_expand "aarch64_ld2r<mode>"
-+  [(match_operand:OI 0 "register_operand" "=w")
-+   (match_operand:DI 1 "register_operand" "w")
-+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_TWO_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+  emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
-+  DONE;
-+})
-+
-+(define_expand "aarch64_ld3r<mode>"
-+  [(match_operand:CI 0 "register_operand" "=w")
-+   (match_operand:DI 1 "register_operand" "w")
-+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_THREE_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+  emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
-+  DONE;
-+})
-+
-+(define_expand "aarch64_ld4r<mode>"
-+  [(match_operand:XI 0 "register_operand" "=w")
-+   (match_operand:DI 1 "register_operand" "w")
-+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_FOUR_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[1]);
-+
-+  emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
-+  DONE;
-+})
-+
- (define_insn "aarch64_ld2<mode>_dreg"
-   [(set (match_operand:OI 0 "register_operand" "=w")
- 	(subreg:OI
-@@ -4375,7 +4613,7 @@
-    (match_operand:VB 1 "register_operand")
-    (match_operand:VB 2 "register_operand")
-    (match_operand:VB 3 "register_operand")]
--  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-+  "TARGET_SIMD"
- {
-   aarch64_expand_vec_perm (operands[0], operands[1],
- 			   operands[2], operands[3]);
-@@ -4430,6 +4668,44 @@
-   [(set_attr "type" "neon_permute<q>")]
- )
- 
-+;; Note immediate (third) operand is lane index not byte index.
-+(define_insn "aarch64_ext<mode>"
-+  [(set (match_operand:VALL 0 "register_operand" "=w")
-+        (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
-+                      (match_operand:VALL 2 "register_operand" "w")
-+                      (match_operand:SI 3 "immediate_operand" "i")]
-+                     UNSPEC_EXT))]
-+  "TARGET_SIMD"
-+{
-+  operands[3] = GEN_INT (INTVAL (operands[3])
-+      * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
-+  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
-+}
-+  [(set_attr "type" "neon_ext<q>")]
-+)
-+
-+;; This exists solely to check the arguments to the corresponding __builtin.
-+;; Used where we want an error for out-of-range indices which would otherwise
-+;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
-+(define_expand "aarch64_im_lane_boundsi"
-+  [(match_operand:SI 0 "immediate_operand" "i")
-+   (match_operand:SI 1 "immediate_operand" "i")]
-+  "TARGET_SIMD"
-+{
-+  aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
-+  DONE;
-+}
-+)
-+
-+(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
-+  [(set (match_operand:VALL 0 "register_operand" "=w")
-+	(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
-+                    REVERSE))]
-+  "TARGET_SIMD"
-+  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
-+  [(set_attr "type" "neon_rev<q>")]
-+)
-+
- (define_insn "aarch64_st2<mode>_dreg"
-   [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
- 	(unspec:TI [(match_operand:OI 1 "register_operand" "w")
-@@ -4516,6 +4792,57 @@
-   DONE;
- })
- 
-+(define_expand "aarch64_st2_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+  (match_operand:OI 1 "register_operand" "w")
-+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+  (match_operand:SI 2 "immediate_operand")]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_TWO_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[0]);
-+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+  emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
-+						  operands[1],
-+						  operands[2]));
-+  DONE;
-+})
-+
-+(define_expand "aarch64_st3_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+  (match_operand:CI 1 "register_operand" "w")
-+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+  (match_operand:SI 2 "immediate_operand")]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_THREE_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[0]);
-+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+  emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
-+						  operands[1],
-+						  operands[2]));
-+  DONE;
-+})
-+
-+(define_expand "aarch64_st4_lane<VQ:mode>"
-+ [(match_operand:DI 0 "register_operand" "r")
-+  (match_operand:XI 1 "register_operand" "w")
-+  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
-+  (match_operand:SI 2 "immediate_operand")]
-+  "TARGET_SIMD"
-+{
-+  enum machine_mode mode = <V_FOUR_ELEM>mode;
-+  rtx mem = gen_rtx_MEM (mode, operands[0]);
-+  operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
-+
-+  emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
-+						  operands[1],
-+						  operands[2]));
-+  DONE;
-+})
-+
- (define_expand "aarch64_st1<VALL:mode>"
-  [(match_operand:DI 0 "register_operand")
-   (match_operand:VALL 1 "register_operand")]
---- a/src/gcc/config/aarch64/predicates.md
-+++ b/src/gcc/config/aarch64/predicates.md
-@@ -26,6 +26,10 @@
- 			      && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
- )
- 
-+(define_predicate "aarch64_call_insn_operand"
-+  (ior (match_code "symbol_ref")
-+       (match_operand 0 "register_operand")))
-+
- (define_predicate "aarch64_simd_register"
-   (and (match_code "reg")
-        (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
-@@ -119,6 +123,10 @@
-        (match_test "INTVAL (op) != 0
- 		    && (unsigned) exact_log2 (INTVAL (op)) < 64")))
- 
-+(define_predicate "aarch64_mem_pair_offset"
-+  (and (match_code "const_int")
-+       (match_test "aarch64_offset_7bit_signed_scaled_p (mode, INTVAL (op))")))
-+
- (define_predicate "aarch64_mem_pair_operand"
-   (and (match_code "mem")
-        (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
-@@ -194,6 +202,18 @@
- (define_special_predicate "aarch64_comparison_operator"
-   (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt"))
- 
-+(define_special_predicate "aarch64_comparison_operation"
-+  (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt")
-+{
-+  if (XEXP (op, 1) != const0_rtx)
-+    return false;
-+  rtx op0 = XEXP (op, 0);
-+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
-+    return false;
-+  return aarch64_get_condition_code (op) >= 0;
-+})
-+
-+
- ;; True if the operand is memory reference suitable for a load/store exclusive.
- (define_predicate "aarch64_sync_memory_operand"
-   (and (match_operand 0 "memory_operand")
-@@ -203,62 +223,15 @@
- (define_special_predicate "vect_par_cnst_hi_half"
-   (match_code "parallel")
- {
--  HOST_WIDE_INT count = XVECLEN (op, 0);
--  int nunits = GET_MODE_NUNITS (mode);
--  int i;
--
--  if (count < 1
--      || count != nunits / 2)
--    return false;
-- 
--  if (!VECTOR_MODE_P (mode))
--    return false;
--
--  for (i = 0; i < count; i++)
--   {
--     rtx elt = XVECEXP (op, 0, i);
--     int val;
--
--     if (GET_CODE (elt) != CONST_INT)
--       return false;
--
--     val = INTVAL (elt);
--     if (val != (nunits / 2) + i)
--       return false;
--   }
--  return true;
-+  return aarch64_simd_check_vect_par_cnst_half (op, mode, true);
- })
- 
- (define_special_predicate "vect_par_cnst_lo_half"
-   (match_code "parallel")
- {
--  HOST_WIDE_INT count = XVECLEN (op, 0);
--  int nunits = GET_MODE_NUNITS (mode);
--  int i;
--
--  if (count < 1
--      || count != nunits / 2)
--    return false;
--
--  if (!VECTOR_MODE_P (mode))
--    return false;
--
--  for (i = 0; i < count; i++)
--   {
--     rtx elt = XVECEXP (op, 0, i);
--     int val;
--
--     if (GET_CODE (elt) != CONST_INT)
--       return false;
--
--     val = INTVAL (elt);
--     if (val != i)
--       return false;
--   }
--  return true;
-+  return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
- })
- 
--
- (define_special_predicate "aarch64_simd_lshift_imm"
-   (match_code "const_vector")
- {
-@@ -300,3 +273,9 @@
- {
-   return aarch64_simd_imm_zero_p (op, mode);
- })
-+
-+(define_special_predicate "aarch64_simd_imm_minus_one"
-+  (match_code "const_vector")
-+{
-+  return aarch64_const_vec_all_same_int_p (op, -1);
-+})
---- a/src/gcc/config/aarch64/arm_neon.h
-+++ b/src/gcc/config/aarch64/arm_neon.h
-@@ -2113,29 +2113,26 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
--						  (int8x8_t) __b);
-+  return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
--						   (int16x4_t) __b);
-+  return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
--						   (int32x2_t) __b);
-+  return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
--						 (int64x1_t) __b);
-+  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-+						     (uint64_t) __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -2165,29 +2162,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
--						    (int8x16_t) __b);
-+  return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
--						   (int16x8_t) __b);
-+  return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
--						   (int32x4_t) __b);
-+  return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
--						   (int64x2_t) __b);
-+  return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -2217,29 +2210,26 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
--						  (int8x8_t) __b);
-+  return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
--						   (int16x4_t) __b);
-+  return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
--						   (int32x2_t) __b);
-+  return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
--						 (int64x1_t) __b);
-+  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-+						     (uint64_t) __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -2269,29 +2259,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
--						    (int8x16_t) __b);
-+  return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
--						   (int16x8_t) __b);
-+  return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
--						   (int32x4_t) __b);
-+  return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
--						   (int64x2_t) __b);
-+  return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -2312,6 +2298,12 @@
-   return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
- }
- 
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vqneg_s64 (int64x1_t __a)
-+{
-+  return __builtin_aarch64_sqnegdi (__a);
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vqnegq_s8 (int8x16_t __a)
- {
-@@ -2348,6 +2340,12 @@
-   return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
- }
- 
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vqabs_s64 (int64x1_t __a)
-+{
-+  return __builtin_aarch64_sqabsdi (__a);
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vqabsq_s8 (int8x16_t __a)
- {
-@@ -2637,1352 +2635,1587 @@
- /* vreinterpret  */
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_p8_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv8qidf_ps (__a);
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s8 (int8x8_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s16 (int16x4_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s32 (int32x2_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_s64 (int64x1_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_f32 (float32x2_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u8 (uint8x8_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u16 (uint16x4_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u32 (uint32x2_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_u64 (uint64x1_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_p16 (poly16x4_t __a)
- {
--  return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (poly8x8_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_p8_f64 (float64x2_t __a)
-+{
-+  return (poly8x16_t) __a;
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s8 (int8x16_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s16 (int16x8_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s32 (int32x4_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_s64 (int64x2_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_f32 (float32x4_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u8 (uint8x16_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
--							       __a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u16 (uint16x8_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
--							      __a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u32 (uint32x4_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
--							      __a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_u64 (uint64x2_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
--							      __a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_p8_p16 (poly16x8_t __a)
- {
--  return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
--							      __a);
-+  return (poly8x16_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_p16_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv4hidf_ps (__a);
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s8 (int8x8_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s16 (int16x4_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s32 (int32x2_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_s64 (int64x1_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_f32 (float32x2_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u8 (uint8x8_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u16 (uint16x4_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u32 (uint32x2_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_u64 (uint64x1_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
- vreinterpret_p16_p8 (poly8x8_t __a)
- {
--  return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (poly16x4_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_p16_f64 (float64x2_t __a)
-+{
-+  return (poly16x8_t) __a;
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s8 (int8x16_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s16 (int16x8_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s32 (int32x4_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_s64 (int64x2_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_f32 (float32x4_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u8 (uint8x16_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
--							      __a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u16 (uint16x8_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u32 (uint32x4_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_u64 (uint64x2_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_p16_p8 (poly8x16_t __a)
- {
--  return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
--							      __a);
-+  return (poly16x8_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_f32_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv2sfdf (__a);
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s8 (int8x8_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s16 (int16x4_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s32 (int32x2_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_s64 (int64x1_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u8 (uint8x8_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u16 (uint16x4_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
--							      __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u32 (uint32x2_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
--							      __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_u64 (uint64x1_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_p8 (poly8x8_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vreinterpret_f32_p16 (poly16x4_t __a)
- {
--  return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
--							      __a);
-+  return (float32x2_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_f32_f64 (float64x2_t __a)
-+{
-+  return (float32x4_t) __a;
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s8 (int8x16_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s16 (int16x8_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s32 (int32x4_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_s64 (int64x2_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u8 (uint8x16_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
--							       __a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u16 (uint16x8_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
--							      __a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u32 (uint32x4_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
--							      __a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_u64 (uint64x2_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
--							      __a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_p8 (poly8x16_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
--							       __a);
-+  return (float32x4_t) __a;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_f32_p16 (poly16x8_t __a)
- {
--  return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
--							      __a);
-+  return (float32x4_t) __a;
- }
- 
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_f32 (float32x2_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv2sf (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_p8 (poly8x8_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv8qi_sp (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_p16 (poly16x4_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv4hi_sp (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s8 (int8x8_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv8qi (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s16 (int16x4_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv4hi (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s32 (int32x2_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv2si (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_s64 (int64x1_t __a)
-+{
-+  return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0));
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u8 (uint8x8_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv8qi_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u16 (uint16x4_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv4hi_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u32 (uint32x2_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdfv2si_su (__a);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__((__always_inline__))
-+vreinterpret_f64_u64 (uint64x1_t __a)
-+{
-+  return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0));
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_f32 (float32x4_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_p8 (poly8x16_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_p16 (poly16x8_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s8 (int8x16_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s16 (int16x8_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s32 (int32x4_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_s64 (int64x2_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u8 (uint8x16_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u16 (uint16x8_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u32 (uint32x4_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__((__always_inline__))
-+vreinterpretq_f64_u64 (uint64x2_t __a)
-+{
-+  return (float64x2_t) __a;
-+}
-+
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vreinterpret_s64_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdidf (__a);
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s8 (int8x8_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s16 (int16x4_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_s32 (int32x2_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_f32 (float32x2_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u8 (uint8x8_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u16 (uint16x4_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u32 (uint32x2_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_u64 (uint64x1_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_p8 (poly8x8_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vreinterpret_s64_p16 (poly16x4_t __a)
- {
--  return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+  return (int64x1_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vreinterpretq_s64_f64 (float64x2_t __a)
-+{
-+  return (int64x2_t) __a;
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s8 (int8x16_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s16 (int16x8_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_s32 (int32x4_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_f32 (float32x4_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u8 (uint8x16_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u16 (uint16x8_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u32 (uint32x4_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_u64 (uint64x2_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_p8 (poly8x16_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_s64_p16 (poly16x8_t __a)
- {
--  return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+  return (int64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vreinterpret_u64_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretdidf_us (__a);
-+}
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s8 (int8x8_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s16 (int16x4_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s32 (int32x2_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_s64 (int64x1_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_f32 (float32x2_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u8 (uint8x8_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u16 (uint16x4_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_u32 (uint32x2_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_p8 (poly8x8_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vreinterpret_u64_p16 (poly16x4_t __a)
- {
--  return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
-+  return (uint64x1_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vreinterpretq_u64_f64 (float64x2_t __a)
-+{
-+  return (uint64x2_t) __a;
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s8 (int8x16_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s16 (int16x8_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s32 (int32x4_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_s64 (int64x2_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_f32 (float32x4_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u8 (uint8x16_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
--							      __a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u16 (uint16x8_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_u32 (uint32x4_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_p8 (poly8x16_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
--							      __a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vreinterpretq_u64_p16 (poly16x8_t __a)
- {
--  return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
-+  return (uint64x2_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_s8_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv8qidf (__a);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s16 (int16x4_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s32 (int32x2_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_s64 (int64x1_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_f32 (float32x2_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u8 (uint8x8_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u16 (uint16x4_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u32 (uint32x2_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_u64 (uint64x1_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_p8 (poly8x8_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vreinterpret_s8_p16 (poly16x4_t __a)
- {
--  return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (int8x8_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_s8_f64 (float64x2_t __a)
-+{
-+  return (int8x16_t) __a;
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s16 (int16x8_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s32 (int32x4_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_s64 (int64x2_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_f32 (float32x4_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u8 (uint8x16_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
--							      __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u16 (uint16x8_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u32 (uint32x4_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_u64 (uint64x2_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_p8 (poly8x16_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
--							      __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_s8_p16 (poly16x8_t __a)
- {
--  return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
-+  return (int8x16_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_s16_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv4hidf (__a);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s8 (int8x8_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s32 (int32x2_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_s64 (int64x1_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_f32 (float32x2_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u8 (uint8x8_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u16 (uint16x4_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u32 (uint32x2_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_u64 (uint64x1_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_p8 (poly8x8_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vreinterpret_s16_p16 (poly16x4_t __a)
- {
--  return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+  return (int16x4_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_s16_f64 (float64x2_t __a)
-+{
-+  return (int16x8_t) __a;
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s8 (int8x16_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s32 (int32x4_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_s64 (int64x2_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_f32 (float32x4_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u8 (uint8x16_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u16 (uint16x8_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u32 (uint32x4_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_u64 (uint64x2_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_p8 (poly8x16_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_s16_p16 (poly16x8_t __a)
- {
--  return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+  return (int16x8_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_s32_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv2sidf (__a);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s8 (int8x8_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s16 (int16x4_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_s64 (int64x1_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_f32 (float32x2_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u8 (uint8x8_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u16 (uint16x4_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u32 (uint32x2_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_u64 (uint64x1_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_p8 (poly8x8_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vreinterpret_s32_p16 (poly16x4_t __a)
- {
--  return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+  return (int32x2_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_s32_f64 (float64x2_t __a)
-+{
-+  return (int32x4_t) __a;
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s8 (int8x16_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s16 (int16x8_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_s64 (int64x2_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_f32 (float32x4_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u8 (uint8x16_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u16 (uint16x8_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u32 (uint32x4_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_u64 (uint64x2_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_p8 (poly8x16_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_s32_p16 (poly16x8_t __a)
- {
--  return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+  return (int32x4_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vreinterpret_u8_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv8qidf_us (__a);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s8 (int8x8_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s16 (int16x4_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s32 (int32x2_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_s64 (int64x1_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_f32 (float32x2_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u16 (uint16x4_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u32 (uint32x2_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_u64 (uint64x1_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_p8 (poly8x8_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vreinterpret_u8_p16 (poly16x4_t __a)
- {
--  return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
-+  return (uint8x8_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vreinterpretq_u8_f64 (float64x2_t __a)
-+{
-+  return (uint8x16_t) __a;
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s8 (int8x16_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s16 (int16x8_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s32 (int32x4_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_s64 (int64x2_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_f32 (float32x4_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u16 (uint16x8_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
--							      __a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u32 (uint32x4_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
--							      __a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_u64 (uint64x2_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
--							      __a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_p8 (poly8x16_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
--							       __a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vreinterpretq_u8_p16 (poly16x8_t __a)
- {
--  return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
--							      __a);
-+  return (uint8x16_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vreinterpret_u16_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv4hidf_us (__a);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s8 (int8x8_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s16 (int16x4_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s32 (int32x2_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_s64 (int64x1_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_f32 (float32x2_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u8 (uint8x8_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u32 (uint32x2_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_u64 (uint64x1_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_p8 (poly8x8_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vreinterpret_u16_p16 (poly16x4_t __a)
- {
--  return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
-+  return (uint16x4_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vreinterpretq_u16_f64 (float64x2_t __a)
-+{
-+  return (uint16x8_t) __a;
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s8 (int8x16_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s16 (int16x8_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s32 (int32x4_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_s64 (int64x2_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_f32 (float32x4_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u8 (uint8x16_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
--							      __a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u32 (uint32x4_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_u64 (uint64x2_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_p8 (poly8x16_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
--							      __a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vreinterpretq_u16_p16 (poly16x8_t __a)
- {
--  return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
-+  return (uint16x8_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vreinterpret_u32_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_reinterpretv2sidf_us (__a);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s8 (int8x8_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s16 (int16x4_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s32 (int32x2_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_s64 (int64x1_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_f32 (float32x2_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u8 (uint8x8_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u16 (uint16x4_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_u64 (uint64x1_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_p8 (poly8x8_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vreinterpret_u32_p16 (poly16x4_t __a)
- {
--  return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
-+  return (uint32x2_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vreinterpretq_u32_f64 (float64x2_t __a)
-+{
-+  return (uint32x4_t) __a;
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s8 (int8x16_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s16 (int16x8_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s32 (int32x4_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_s64 (int64x2_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_f32 (float32x4_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u8 (uint8x16_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
--							      __a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u16 (uint16x8_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_u64 (uint64x2_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_p8 (poly8x16_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
--							      __a);
-+  return (uint32x4_t) __a;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vreinterpretq_u32_p16 (poly16x8_t __a)
- {
--  return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
-+  return (uint32x4_t) __a;
- }
- 
- #define __GET_LOW(__TYPE) \
-@@ -4064,6 +4297,85 @@
- 
- #undef __GET_LOW
- 
-+#define __GET_HIGH(__TYPE)					\
-+  uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a);		\
-+  uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1));	\
-+  return vreinterpret_##__TYPE##_u64 (hi);
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vget_high_f32 (float32x4_t __a)
-+{
-+  __GET_HIGH (f32);
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vget_high_f64 (float64x2_t __a)
-+{
-+  __GET_HIGH (f64);
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vget_high_p8 (poly8x16_t __a)
-+{
-+  __GET_HIGH (p8);
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vget_high_p16 (poly16x8_t __a)
-+{
-+  __GET_HIGH (p16);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vget_high_s8 (int8x16_t __a)
-+{
-+  __GET_HIGH (s8);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vget_high_s16 (int16x8_t __a)
-+{
-+  __GET_HIGH (s16);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vget_high_s32 (int32x4_t __a)
-+{
-+  __GET_HIGH (s32);
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vget_high_s64 (int64x2_t __a)
-+{
-+  __GET_HIGH (s64);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vget_high_u8 (uint8x16_t __a)
-+{
-+  __GET_HIGH (u8);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vget_high_u16 (uint16x8_t __a)
-+{
-+  __GET_HIGH (u16);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vget_high_u32 (uint32x4_t __a)
-+{
-+  __GET_HIGH (u32);
-+}
-+
-+#undef __GET_HIGH
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vget_high_u64 (uint64x2_t __a)
-+{
-+  return vcreate_u64 (vgetq_lane_u64 (__a, 1));
-+}
-+
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vcombine_s8 (int8x8_t __a, int8x8_t __b)
- {
-@@ -5408,318 +5720,6 @@
-   return result;
- }
- 
--#define vext_f32(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       float32x2_t b_ = (b);                                            \
--       float32x2_t a_ = (a);                                            \
--       float32x2_t result;                                              \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_f64(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       float64x1_t b_ = (b);                                            \
--       float64x1_t a_ = (a);                                            \
--       float64x1_t result;                                              \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_p8(a, b, c)                                                \
--  __extension__                                                         \
--    ({                                                                  \
--       poly8x8_t b_ = (b);                                              \
--       poly8x8_t a_ = (a);                                              \
--       poly8x8_t result;                                                \
--       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_p16(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       poly16x4_t b_ = (b);                                             \
--       poly16x4_t a_ = (a);                                             \
--       poly16x4_t result;                                               \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_s8(a, b, c)                                                \
--  __extension__                                                         \
--    ({                                                                  \
--       int8x8_t b_ = (b);                                               \
--       int8x8_t a_ = (a);                                               \
--       int8x8_t result;                                                 \
--       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_s16(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       int16x4_t b_ = (b);                                              \
--       int16x4_t a_ = (a);                                              \
--       int16x4_t result;                                                \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_s32(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       int32x2_t b_ = (b);                                              \
--       int32x2_t a_ = (a);                                              \
--       int32x2_t result;                                                \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_s64(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       int64x1_t b_ = (b);                                              \
--       int64x1_t a_ = (a);                                              \
--       int64x1_t result;                                                \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_u8(a, b, c)                                                \
--  __extension__                                                         \
--    ({                                                                  \
--       uint8x8_t b_ = (b);                                              \
--       uint8x8_t a_ = (a);                                              \
--       uint8x8_t result;                                                \
--       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_u16(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       uint16x4_t b_ = (b);                                             \
--       uint16x4_t a_ = (a);                                             \
--       uint16x4_t result;                                               \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_u32(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       uint32x2_t b_ = (b);                                             \
--       uint32x2_t a_ = (a);                                             \
--       uint32x2_t result;                                               \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vext_u64(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       uint64x1_t b_ = (b);                                             \
--       uint64x1_t a_ = (a);                                             \
--       uint64x1_t result;                                               \
--       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_f32(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       float32x4_t b_ = (b);                                            \
--       float32x4_t a_ = (a);                                            \
--       float32x4_t result;                                              \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_f64(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       float64x2_t b_ = (b);                                            \
--       float64x2_t a_ = (a);                                            \
--       float64x2_t result;                                              \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_p8(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       poly8x16_t b_ = (b);                                             \
--       poly8x16_t a_ = (a);                                             \
--       poly8x16_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_p16(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       poly16x8_t b_ = (b);                                             \
--       poly16x8_t a_ = (a);                                             \
--       poly16x8_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_s8(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       int8x16_t b_ = (b);                                              \
--       int8x16_t a_ = (a);                                              \
--       int8x16_t result;                                                \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_s16(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       int16x8_t b_ = (b);                                              \
--       int16x8_t a_ = (a);                                              \
--       int16x8_t result;                                                \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_s32(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       int32x4_t b_ = (b);                                              \
--       int32x4_t a_ = (a);                                              \
--       int32x4_t result;                                                \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_s64(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       int64x2_t b_ = (b);                                              \
--       int64x2_t a_ = (a);                                              \
--       int64x2_t result;                                                \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_u8(a, b, c)                                               \
--  __extension__                                                         \
--    ({                                                                  \
--       uint8x16_t b_ = (b);                                             \
--       uint8x16_t a_ = (a);                                             \
--       uint8x16_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_u16(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       uint16x8_t b_ = (b);                                             \
--       uint16x8_t a_ = (a);                                             \
--       uint16x8_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_u32(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       uint32x4_t b_ = (b);                                             \
--       uint32x4_t a_ = (a);                                             \
--       uint32x4_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
--#define vextq_u64(a, b, c)                                              \
--  __extension__                                                         \
--    ({                                                                  \
--       uint64x2_t b_ = (b);                                             \
--       uint64x2_t a_ = (a);                                             \
--       uint64x2_t result;                                               \
--       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
--                : "=w"(result)                                          \
--                : "w"(a_), "w"(b_), "i"(c)                              \
--                : /* No clobbers */);                                   \
--       result;                                                          \
--     })
--
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
- {
-@@ -5819,139 +5819,7 @@
-   return result;
- }
- 
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vget_high_f32 (float32x4_t a)
--{
--  float32x2_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
--vget_high_f64 (float64x2_t a)
--{
--  float64x1_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vget_high_p8 (poly8x16_t a)
--{
--  poly8x8_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vget_high_p16 (poly16x8_t a)
--{
--  poly16x4_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vget_high_s8 (int8x16_t a)
--{
--  int8x8_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vget_high_s16 (int16x8_t a)
--{
--  int16x4_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vget_high_s32 (int32x4_t a)
--{
--  int32x2_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
--vget_high_s64 (int64x2_t a)
--{
--  int64x1_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vget_high_u8 (uint8x16_t a)
--{
--  uint8x8_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vget_high_u16 (uint16x8_t a)
--{
--  uint16x4_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vget_high_u32 (uint32x4_t a)
--{
--  uint32x2_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vget_high_u64 (uint64x2_t a)
--{
--  uint64x1_t result;
--  __asm__ ("ins %0.d[0], %1.d[1]"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vhsub_s8 (int8x8_t a, int8x8_t b)
- {
-   int8x8_t result;
-@@ -6784,7 +6652,7 @@
- #define vmlal_high_lane_s16(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       int16x8_t c_ = (c);                                              \
-+       int16x4_t c_ = (c);                                              \
-        int16x8_t b_ = (b);                                              \
-        int32x4_t a_ = (a);                                              \
-        int32x4_t result;                                                \
-@@ -6798,7 +6666,7 @@
- #define vmlal_high_lane_s32(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       int32x4_t c_ = (c);                                              \
-+       int32x2_t c_ = (c);                                              \
-        int32x4_t b_ = (b);                                              \
-        int64x2_t a_ = (a);                                              \
-        int64x2_t result;                                                \
-@@ -6812,7 +6680,7 @@
- #define vmlal_high_lane_u16(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       uint16x8_t c_ = (c);                                             \
-+       uint16x4_t c_ = (c);                                             \
-        uint16x8_t b_ = (b);                                             \
-        uint32x4_t a_ = (a);                                             \
-        uint32x4_t result;                                               \
-@@ -6826,7 +6694,7 @@
- #define vmlal_high_lane_u32(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       uint32x4_t c_ = (c);                                             \
-+       uint32x2_t c_ = (c);                                             \
-        uint32x4_t b_ = (b);                                             \
-        uint64x2_t a_ = (a);                                             \
-        uint64x2_t result;                                               \
-@@ -7237,18 +7105,6 @@
-   return result;
- }
- 
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
--{
--  float64x2_t result;
--  float64x2_t t1;
--  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
--           : "=w"(result), "=w"(t1)
--           : "0"(a), "w"(b), "w"(c)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
- {
-@@ -7484,7 +7340,7 @@
- #define vmlsl_high_lane_s16(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       int16x8_t c_ = (c);                                              \
-+       int16x4_t c_ = (c);                                              \
-        int16x8_t b_ = (b);                                              \
-        int32x4_t a_ = (a);                                              \
-        int32x4_t result;                                                \
-@@ -7498,7 +7354,7 @@
- #define vmlsl_high_lane_s32(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       int32x4_t c_ = (c);                                              \
-+       int32x2_t c_ = (c);                                              \
-        int32x4_t b_ = (b);                                              \
-        int64x2_t a_ = (a);                                              \
-        int64x2_t result;                                                \
-@@ -7512,7 +7368,7 @@
- #define vmlsl_high_lane_u16(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       uint16x8_t c_ = (c);                                             \
-+       uint16x4_t c_ = (c);                                             \
-        uint16x8_t b_ = (b);                                             \
-        uint32x4_t a_ = (a);                                             \
-        uint32x4_t result;                                               \
-@@ -7526,7 +7382,7 @@
- #define vmlsl_high_lane_u32(a, b, c, d)                                 \
-   __extension__                                                         \
-     ({                                                                  \
--       uint32x4_t c_ = (c);                                             \
-+       uint32x2_t c_ = (c);                                             \
-        uint32x4_t b_ = (b);                                             \
-        uint64x2_t a_ = (a);                                             \
-        uint64x2_t result;                                               \
-@@ -7937,18 +7793,6 @@
-   return result;
- }
- 
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
--{
--  float64x2_t result;
--  float64x2_t t1;
--  __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
--           : "=w"(result), "=w"(t1)
--           : "0"(a), "w"(b), "x"(c)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
- {
-@@ -9312,57 +9156,7 @@
-   return result;
- }
- 
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vpadd_s8 (int8x8_t __a, int8x8_t __b)
--{
--  return __builtin_aarch64_addpv8qi (__a, __b);
--}
--
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vpadd_s16 (int16x4_t __a, int16x4_t __b)
--{
--  return __builtin_aarch64_addpv4hi (__a, __b);
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vpadd_s32 (int32x2_t __a, int32x2_t __b)
--{
--  return __builtin_aarch64_addpv2si (__a, __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
--{
--  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
--{
--  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
--{
--  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
--}
--
--__extension__ static __inline float64_t __attribute__ ((__always_inline__))
--vpaddd_f64 (float64x2_t a)
--{
--  float64_t result;
--  __asm__ ("faddp %d0,%1.2d"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vpaddl_s8 (int8x8_t a)
- {
-   int16x4_t result;
-@@ -10556,50 +10350,6 @@
-        result;                                                          \
-      })
- 
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrbit_s8 (int8x8_t a)
--{
--  int8x8_t result;
--  __asm__ ("rbit %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrbit_u8 (uint8x8_t a)
--{
--  uint8x8_t result;
--  __asm__ ("rbit %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrbitq_s8 (int8x16_t a)
--{
--  int8x16_t result;
--  __asm__ ("rbit %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrbitq_u8 (uint8x16_t a)
--{
--  uint8x16_t result;
--  __asm__ ("rbit %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrecpe_u32 (uint32x2_t a)
- {
-@@ -10622,402 +10372,6 @@
-   return result;
- }
- 
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev16_p8 (poly8x8_t a)
--{
--  poly8x8_t result;
--  __asm__ ("rev16 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev16_s8 (int8x8_t a)
--{
--  int8x8_t result;
--  __asm__ ("rev16 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev16_u8 (uint8x8_t a)
--{
--  uint8x8_t result;
--  __asm__ ("rev16 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev16q_p8 (poly8x16_t a)
--{
--  poly8x16_t result;
--  __asm__ ("rev16 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev16q_s8 (int8x16_t a)
--{
--  int8x16_t result;
--  __asm__ ("rev16 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev16q_u8 (uint8x16_t a)
--{
--  uint8x16_t result;
--  __asm__ ("rev16 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev32_p8 (poly8x8_t a)
--{
--  poly8x8_t result;
--  __asm__ ("rev32 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vrev32_p16 (poly16x4_t a)
--{
--  poly16x4_t result;
--  __asm__ ("rev32 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev32_s8 (int8x8_t a)
--{
--  int8x8_t result;
--  __asm__ ("rev32 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vrev32_s16 (int16x4_t a)
--{
--  int16x4_t result;
--  __asm__ ("rev32 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev32_u8 (uint8x8_t a)
--{
--  uint8x8_t result;
--  __asm__ ("rev32 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vrev32_u16 (uint16x4_t a)
--{
--  uint16x4_t result;
--  __asm__ ("rev32 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev32q_p8 (poly8x16_t a)
--{
--  poly8x16_t result;
--  __asm__ ("rev32 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vrev32q_p16 (poly16x8_t a)
--{
--  poly16x8_t result;
--  __asm__ ("rev32 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev32q_s8 (int8x16_t a)
--{
--  int8x16_t result;
--  __asm__ ("rev32 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vrev32q_s16 (int16x8_t a)
--{
--  int16x8_t result;
--  __asm__ ("rev32 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev32q_u8 (uint8x16_t a)
--{
--  uint8x16_t result;
--  __asm__ ("rev32 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vrev32q_u16 (uint16x8_t a)
--{
--  uint16x8_t result;
--  __asm__ ("rev32 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vrev64_f32 (float32x2_t a)
--{
--  float32x2_t result;
--  __asm__ ("rev64 %0.2s,%1.2s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vrev64_p8 (poly8x8_t a)
--{
--  poly8x8_t result;
--  __asm__ ("rev64 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vrev64_p16 (poly16x4_t a)
--{
--  poly16x4_t result;
--  __asm__ ("rev64 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vrev64_s8 (int8x8_t a)
--{
--  int8x8_t result;
--  __asm__ ("rev64 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vrev64_s16 (int16x4_t a)
--{
--  int16x4_t result;
--  __asm__ ("rev64 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vrev64_s32 (int32x2_t a)
--{
--  int32x2_t result;
--  __asm__ ("rev64 %0.2s,%1.2s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vrev64_u8 (uint8x8_t a)
--{
--  uint8x8_t result;
--  __asm__ ("rev64 %0.8b,%1.8b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vrev64_u16 (uint16x4_t a)
--{
--  uint16x4_t result;
--  __asm__ ("rev64 %0.4h,%1.4h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vrev64_u32 (uint32x2_t a)
--{
--  uint32x2_t result;
--  __asm__ ("rev64 %0.2s,%1.2s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vrev64q_f32 (float32x4_t a)
--{
--  float32x4_t result;
--  __asm__ ("rev64 %0.4s,%1.4s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vrev64q_p8 (poly8x16_t a)
--{
--  poly8x16_t result;
--  __asm__ ("rev64 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vrev64q_p16 (poly16x8_t a)
--{
--  poly16x8_t result;
--  __asm__ ("rev64 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vrev64q_s8 (int8x16_t a)
--{
--  int8x16_t result;
--  __asm__ ("rev64 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vrev64q_s16 (int16x8_t a)
--{
--  int16x8_t result;
--  __asm__ ("rev64 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vrev64q_s32 (int32x4_t a)
--{
--  int32x4_t result;
--  __asm__ ("rev64 %0.4s,%1.4s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vrev64q_u8 (uint8x16_t a)
--{
--  uint8x16_t result;
--  __asm__ ("rev64 %0.16b,%1.16b"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vrev64q_u16 (uint16x8_t a)
--{
--  uint16x8_t result;
--  __asm__ ("rev64 %0.8h,%1.8h"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vrev64q_u32 (uint32x4_t a)
--{
--  uint32x4_t result;
--  __asm__ ("rev64 %0.4s,%1.4s"
--           : "=w"(result)
--           : "w"(a)
--           : /* No clobbers */);
--  return result;
--}
--
- #define vrshrn_high_n_s16(a, b, c)                                      \
-   __extension__                                                         \
-     ({                                                                  \
-@@ -11323,17 +10677,6 @@
-   return result;
- }
- 
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vrsrtsq_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
- {
-@@ -12441,469 +11784,7 @@
-   return result;
- }
- 
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vtrn1_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vtrn1_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vtrn1_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vtrn1_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vtrn1_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vtrn1_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vtrn1_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vtrn1_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vtrn1_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vtrn1q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vtrn1q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vtrn1q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vtrn1q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vtrn1q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vtrn1q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vtrn2_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vtrn2_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vtrn2_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vtrn2_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vtrn2_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vtrn2_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vtrn2_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vtrn2_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vtrn2_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vtrn2q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vtrn2q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vtrn2q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vtrn2q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vtrn2q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vtrn2q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_p8 (poly8x8_t a, poly8x8_t b)
- {
-   uint8x8_t result;
-@@ -12946,930 +11827,7 @@
-            : /* No clobbers */);
-   return result;
- }
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vuzp1_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
- 
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vuzp1_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vuzp1_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vuzp1_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vuzp1_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vuzp1_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vuzp1_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vuzp1_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vuzp1_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vuzp1q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vuzp1q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vuzp1q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vuzp1q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vuzp1q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vuzp1q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vuzp2_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vuzp2_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vuzp2_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vuzp2_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vuzp2_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vuzp2_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vuzp2_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vuzp2_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vuzp2_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vuzp2q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vuzp2q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vuzp2q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vuzp2q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vuzp2q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vuzp2q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vzip1_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vzip1_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vzip1_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vzip1_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vzip1_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vzip1_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vzip1_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vzip1_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vzip1_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vzip1q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vzip1q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vzip1q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vzip1q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vzip1q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vzip1q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vzip1q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vzip1q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vzip1q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vzip1q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vzip1q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vzip1q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
--vzip2_f32 (float32x2_t a, float32x2_t b)
--{
--  float32x2_t result;
--  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vzip2_p8 (poly8x8_t a, poly8x8_t b)
--{
--  poly8x8_t result;
--  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
--vzip2_p16 (poly16x4_t a, poly16x4_t b)
--{
--  poly16x4_t result;
--  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
--vzip2_s8 (int8x8_t a, int8x8_t b)
--{
--  int8x8_t result;
--  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
--vzip2_s16 (int16x4_t a, int16x4_t b)
--{
--  int16x4_t result;
--  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
--vzip2_s32 (int32x2_t a, int32x2_t b)
--{
--  int32x2_t result;
--  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vzip2_u8 (uint8x8_t a, uint8x8_t b)
--{
--  uint8x8_t result;
--  __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vzip2_u16 (uint16x4_t a, uint16x4_t b)
--{
--  uint16x4_t result;
--  __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vzip2_u32 (uint32x2_t a, uint32x2_t b)
--{
--  uint32x2_t result;
--  __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
--vzip2q_f32 (float32x4_t a, float32x4_t b)
--{
--  float32x4_t result;
--  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
--vzip2q_f64 (float64x2_t a, float64x2_t b)
--{
--  float64x2_t result;
--  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
--vzip2q_p8 (poly8x16_t a, poly8x16_t b)
--{
--  poly8x16_t result;
--  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
--vzip2q_p16 (poly16x8_t a, poly16x8_t b)
--{
--  poly16x8_t result;
--  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
--vzip2q_s8 (int8x16_t a, int8x16_t b)
--{
--  int8x16_t result;
--  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
--vzip2q_s16 (int16x8_t a, int16x8_t b)
--{
--  int16x8_t result;
--  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
--vzip2q_s32 (int32x4_t a, int32x4_t b)
--{
--  int32x4_t result;
--  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
--vzip2q_s64 (int64x2_t a, int64x2_t b)
--{
--  int64x2_t result;
--  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vzip2q_u8 (uint8x16_t a, uint8x16_t b)
--{
--  uint8x16_t result;
--  __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vzip2q_u16 (uint16x8_t a, uint16x8_t b)
--{
--  uint16x8_t result;
--  __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vzip2q_u32 (uint32x4_t a, uint32x4_t b)
--{
--  uint32x4_t result;
--  __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vzip2q_u64 (uint64x2_t a, uint64x2_t b)
--{
--  uint64x2_t result;
--  __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
--           : "=w"(result)
--           : "w"(a), "w"(b)
--           : /* No clobbers */);
--  return result;
--}
--
- /* End of temporary inline asm implementations.  */
- 
- /* Start of temporary inline asm for vldn, vstn and friends.  */
-@@ -13953,46 +11911,6 @@
- __STRUCTN (float, 64, 4)
- #undef __STRUCTN
- 
--#define __LD2R_FUNC(rettype, structtype, ptrtype,			\
--		    regsuffix, funcsuffix, Q)				\
--  __extension__ static __inline rettype					\
--  __attribute__ ((__always_inline__)) 					\
--  vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
--  {									\
--    rettype result;							\
--    __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
--	     "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t"	\
--	     : "=Q"(result)						\
--	     : "Q"(*(const structtype *)ptr)				\
--	     : "memory", "v16", "v17");					\
--    return result;							\
--  }
--
--__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
--__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
--__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
--__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
--__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
--__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
--__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
--__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
--__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
--__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
--__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
--__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
--__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
--__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
--__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
--__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
--__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
--__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
--__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
--__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
--__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
--__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
--__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
--__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
--
- #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix,			\
- 			lnsuffix, funcsuffix, Q)			\
-   __extension__ static __inline rettype					\
-@@ -14035,46 +11953,6 @@
- __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
- __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
- 
--#define __LD3R_FUNC(rettype, structtype, ptrtype,			\
--		    regsuffix, funcsuffix, Q)				\
--  __extension__ static __inline rettype					\
--  __attribute__ ((__always_inline__))					\
--  vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
--  {									\
--    rettype result;							\
--    __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
--	     "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t"	\
--	     : "=Q"(result)						\
--	     : "Q"(*(const structtype *)ptr)				\
--	     : "memory", "v16", "v17", "v18");				\
--    return result;							\
--  }
--
--__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
--__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
--__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
--__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
--__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
--__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
--__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
--__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
--__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
--__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
--__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
--__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
--__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
--__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
--__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
--__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
--__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
--__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
--__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
--__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
--__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
--__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
--__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
--__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
--
- #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix,			\
- 			lnsuffix, funcsuffix, Q)			\
-   __extension__ static __inline rettype					\
-@@ -14117,46 +11995,6 @@
- __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
- __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
- 
--#define __LD4R_FUNC(rettype, structtype, ptrtype,			\
--		    regsuffix, funcsuffix, Q)				\
--  __extension__ static __inline rettype					\
--  __attribute__ ((__always_inline__))					\
--  vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr)			\
--  {									\
--    rettype result;							\
--    __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
--	     "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t"	\
--	     : "=Q"(result)						\
--	     : "Q"(*(const structtype *)ptr)				\
--	     : "memory", "v16", "v17", "v18", "v19");			\
--    return result;							\
--  }
--
--__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
--__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
--__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
--__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
--__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
--__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
--__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
--__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
--__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
--__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
--__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
--__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
--__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
--__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
--__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
--__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
--__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
--__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
--__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
--__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
--__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
--__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
--__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
--__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
--
- #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix,			\
- 			lnsuffix, funcsuffix, Q)			\
-   __extension__ static __inline rettype					\
-@@ -14199,132 +12037,225 @@
- __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
- __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
- 
--#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix,			\
--			lnsuffix, funcsuffix, Q)			\
--  typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype;	\
--  __extension__ static __inline void					\
--  __attribute__ ((__always_inline__))					\
--  vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
--				     intype b, const int c)		\
--  {									\
--    __ST2_LANE_STRUCTURE_##intype *__p =				\
--				(__ST2_LANE_STRUCTURE_##intype *)ptr;	\
--    __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t"	\
--	     "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t"	\
--	     : "=Q"(*__p)						\
--	     : "Q"(b), "i"(c)						\
--	     : "v16", "v17");						\
--  }
-+#define __ST2_LANE_FUNC(intype, largetype, ptrtype,			     \
-+			mode, ptr_mode, funcsuffix, signedtype)		     \
-+__extension__ static __inline void					     \
-+__attribute__ ((__always_inline__))					     \
-+vst2_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-+			  intype __b, const int __c)			     \
-+{									     \
-+  __builtin_aarch64_simd_oi __o;					     \
-+  largetype __temp;							     \
-+  __temp.val[0]								     \
-+    = vcombine_##funcsuffix (__b.val[0],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[1]								     \
-+    = vcombine_##funcsuffix (__b.val[1],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
-+					    (signedtype) __temp.val[0], 0);  \
-+  __o = __builtin_aarch64_set_qregoi##mode (__o,			     \
-+					    (signedtype) __temp.val[1], 1);  \
-+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-+				     __ptr, __o, __c);			     \
-+}
- 
--__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
--__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
--__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
--__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
--__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
--__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
--__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
--__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
--__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
--__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
--__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
--__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
--__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
--__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
--__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
--__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
--__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
--__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
--__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
--__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
--__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
--__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
--__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
--__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
-+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
-+		 float32x4_t)
-+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
-+		 float64x2_t)
-+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
-+		 int16x8_t)
-+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
-+		 int16x8_t)
-+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
-+		 int32x4_t)
-+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
-+		 int64x2_t)
- 
--#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix,			\
--			lnsuffix, funcsuffix, Q)			\
--  typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype;	\
--  __extension__ static __inline void					\
--  __attribute__ ((__always_inline__))					\
--  vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
--				     intype b, const int c)		\
--  {									\
--    __ST3_LANE_STRUCTURE_##intype *__p =				\
--				(__ST3_LANE_STRUCTURE_##intype *)ptr;	\
--    __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t"	\
--	     "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t"	\
--	     : "=Q"(*__p)						\
--	     : "Q"(b), "i"(c)						\
--	     : "v16", "v17", "v18");					\
--  }
-+#undef __ST2_LANE_FUNC
-+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-+__extension__ static __inline void					    \
-+__attribute__ ((__always_inline__))					    \
-+vst2q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-+			   intype __b, const int __c)			    \
-+{									    \
-+  union { intype __i;							    \
-+	  __builtin_aarch64_simd_oi __o; } __temp = { __b };		    \
-+  __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+				    __ptr, __temp.__o, __c);		    \
-+}
- 
--__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
--__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
--__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
--__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
--__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
--__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
--__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
--__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
--__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
--__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
--__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
--__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
--__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
--__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
--__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
--__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
--__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
--__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
--__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
--__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
--__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
--__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
--__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
--__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
-+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
-+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
-+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
-+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
-+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
-+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
-+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
-+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
-+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
-+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
-+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
-+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
- 
--#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix,			\
--			lnsuffix, funcsuffix, Q)			\
--  typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype;	\
--  __extension__ static __inline void					\
--  __attribute__ ((__always_inline__))					\
--  vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr,			\
--				     intype b, const int c)		\
--  {									\
--    __ST4_LANE_STRUCTURE_##intype *__p =				\
--				(__ST4_LANE_STRUCTURE_##intype *)ptr;	\
--    __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t"	\
--	     "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t"	\
--	     : "=Q"(*__p)						\
--	     : "Q"(b), "i"(c)						\
--	     : "v16", "v17", "v18", "v19");				\
--  }
-+#define __ST3_LANE_FUNC(intype, largetype, ptrtype,			     \
-+			mode, ptr_mode, funcsuffix, signedtype)		     \
-+__extension__ static __inline void					     \
-+__attribute__ ((__always_inline__))					     \
-+vst3_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-+			  intype __b, const int __c)			     \
-+{									     \
-+  __builtin_aarch64_simd_ci __o;					     \
-+  largetype __temp;							     \
-+  __temp.val[0]								     \
-+    = vcombine_##funcsuffix (__b.val[0],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[1]								     \
-+    = vcombine_##funcsuffix (__b.val[1],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[2]								     \
-+    = vcombine_##funcsuffix (__b.val[2],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
-+					    (signedtype) __temp.val[0], 0);  \
-+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
-+					    (signedtype) __temp.val[1], 1);  \
-+  __o = __builtin_aarch64_set_qregci##mode (__o,			     \
-+					    (signedtype) __temp.val[2], 2);  \
-+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-+				     __ptr, __o, __c);			     \
-+}
- 
--__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
--__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
--__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
--__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
--__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
--__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
--__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
--__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
--__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
--__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
--__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
--__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
--__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
--__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
--__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
--__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
--__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
--__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
--__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
--__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
--__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
--__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
--__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
--__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
-+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
-+		 float32x4_t)
-+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
-+		 float64x2_t)
-+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
-+		 int16x8_t)
-+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
-+		 int16x8_t)
-+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
-+		 int32x4_t)
-+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
-+		 int64x2_t)
- 
-+#undef __ST3_LANE_FUNC
-+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-+__extension__ static __inline void					    \
-+__attribute__ ((__always_inline__))					    \
-+vst3q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-+			   intype __b, const int __c)			    \
-+{									    \
-+  union { intype __i;							    \
-+	  __builtin_aarch64_simd_ci __o; } __temp = { __b };		    \
-+  __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+				    __ptr, __temp.__o, __c);		    \
-+}
-+
-+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
-+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
-+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
-+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
-+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
-+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
-+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
-+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
-+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
-+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
-+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
-+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
-+
-+#define __ST4_LANE_FUNC(intype, largetype, ptrtype,			     \
-+			mode, ptr_mode, funcsuffix, signedtype)		     \
-+__extension__ static __inline void					     \
-+__attribute__ ((__always_inline__))					     \
-+vst4_lane_ ## funcsuffix (ptrtype *__ptr,				     \
-+			  intype __b, const int __c)			     \
-+{									     \
-+  __builtin_aarch64_simd_xi __o;					     \
-+  largetype __temp;							     \
-+  __temp.val[0]								     \
-+    = vcombine_##funcsuffix (__b.val[0],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[1]								     \
-+    = vcombine_##funcsuffix (__b.val[1],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[2]								     \
-+    = vcombine_##funcsuffix (__b.val[2],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __temp.val[3]								     \
-+    = vcombine_##funcsuffix (__b.val[3],				     \
-+			     vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
-+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
-+					    (signedtype) __temp.val[0], 0);  \
-+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
-+					    (signedtype) __temp.val[1], 1);  \
-+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
-+					    (signedtype) __temp.val[2], 2);  \
-+  __o = __builtin_aarch64_set_qregxi##mode (__o,			     \
-+					    (signedtype) __temp.val[3], 3);  \
-+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *)  \
-+				     __ptr, __o, __c);			     \
-+}
-+
-+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
-+		 float32x4_t)
-+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
-+		 float64x2_t)
-+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
-+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
-+		 int16x8_t)
-+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
-+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
-+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
-+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
-+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
-+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
-+		 int16x8_t)
-+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
-+		 int32x4_t)
-+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
-+		 int64x2_t)
-+
-+#undef __ST4_LANE_FUNC
-+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix)	    \
-+__extension__ static __inline void					    \
-+__attribute__ ((__always_inline__))					    \
-+vst4q_lane_ ## funcsuffix (ptrtype *__ptr,				    \
-+			   intype __b, const int __c)			    \
-+{									    \
-+  union { intype __i;							    \
-+	  __builtin_aarch64_simd_xi __o; } __temp = { __b };		    \
-+  __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
-+				    __ptr, __temp.__o, __c);		    \
-+}
-+
-+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
-+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
-+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
-+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
-+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
-+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
-+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
-+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
-+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
-+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
-+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
-+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
-+
- __extension__ static __inline int64_t __attribute__ ((__always_inline__))
- vaddlv_s32 (int32x2_t a)
- {
-@@ -14341,12 +12272,6 @@
-   return result;
- }
- 
--__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
--vpaddd_s64 (int64x2_t __a)
--{
--  return __builtin_aarch64_addpdi (__a);
--}
--
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
- {
-@@ -15706,7 +13631,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
-+  return (uint32x2_t) (__a == __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15718,26 +13643,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_p8 (poly8x8_t __a, poly8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (uint8x8_t) (__a == __b);
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
-+  return (uint8x8_t) (__a == __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceq_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
-+  return (uint16x4_t) (__a == __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
-+  return (uint32x2_t) (__a == __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15749,22 +13673,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceq_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceq_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceq_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15776,72 +13697,67 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
-+  return (uint32x4_t) (__a == __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_f64 (float64x2_t __a, float64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
-+  return (uint64x2_t) (__a == __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (uint8x16_t) (__a == __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
-+  return (uint8x16_t) (__a == __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
-+  return (uint16x8_t) (__a == __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
-+  return (uint32x4_t) (__a == __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
-+  return (uint64x2_t) (__a == __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
-+  return (__a == __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
-+  return (__a == __b);
- }
- 
- /* vceq - scalar.  */
-@@ -15875,8 +13791,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_f32 (float32x2_t __a)
- {
--  float32x2_t __b = {0.0f, 0.0f};
--  return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
-+  return (uint32x2_t) (__a == 0.0f);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15888,30 +13803,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_p8 (poly8x8_t __a)
- {
--  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (uint8x8_t) (__a == 0);
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_s8 (int8x8_t __a)
- {
--  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
-+  return (uint8x8_t) (__a == 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceqz_s16 (int16x4_t __a)
- {
--  int16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
-+  return (uint16x4_t) (__a == 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_s32 (int32x2_t __a)
- {
--  int32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
-+  return (uint32x2_t) (__a == 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15923,25 +13833,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vceqz_u8 (uint8x8_t __a)
- {
--  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vceqz_u16 (uint16x4_t __a)
- {
--  uint16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vceqz_u32 (uint32x2_t __a)
- {
--  uint32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -15953,86 +13857,67 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_f32 (float32x4_t __a)
- {
--  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
--  return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
-+  return (uint32x4_t) (__a == 0.0f);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_f64 (float64x2_t __a)
- {
--  float64x2_t __b = {0.0, 0.0};
--  return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
-+  return (uint64x2_t) (__a == 0.0f);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_p8 (poly8x16_t __a)
- {
--  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (uint8x16_t) (__a == 0);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_s8 (int8x16_t __a)
- {
--  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		   0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
-+  return (uint8x16_t) (__a == 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqzq_s16 (int16x8_t __a)
- {
--  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
-+  return (uint16x8_t) (__a == 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_s32 (int32x4_t __a)
- {
--  int32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
-+  return (uint32x4_t) (__a == 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_s64 (int64x2_t __a)
- {
--  int64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
-+  return (uint64x2_t) (__a == __AARCH64_INT64_C (0));
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vceqzq_u8 (uint8x16_t __a)
- {
--  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vceqzq_u16 (uint16x8_t __a)
- {
--  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vceqzq_u32 (uint32x4_t __a)
- {
--  uint32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
-+  return (__a == 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vceqzq_u64 (uint64x2_t __a)
- {
--  uint64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
-+  return (__a == __AARCH64_UINT64_C (0));
- }
- 
- /* vceqz - scalar.  */
-@@ -16066,7 +13951,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
-+  return (uint32x2_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16076,28 +13961,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcge_p8 (poly8x8_t __a, poly8x8_t __b)
--{
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcge_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
-+  return (uint8x8_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcge_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
-+  return (uint16x4_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
-+  return (uint32x2_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16109,22 +13987,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcge_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcge_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcge_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16136,72 +14011,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
-+  return (uint32x4_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_f64 (float64x2_t __a, float64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
-+  return (uint64x2_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgeq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
-+  return (uint8x16_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgeq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
-+  return (uint16x8_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
-+  return (uint32x4_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
-+  return (uint64x2_t) (__a >= __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
-+  return (__a >= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
-+  return (__a >= __b);
- }
- 
- /* vcge - scalar.  */
-@@ -16235,8 +14099,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgez_f32 (float32x2_t __a)
- {
--  float32x2_t __b = {0.0f, 0.0f};
--  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
-+  return (uint32x2_t) (__a >= 0.0f);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16246,32 +14109,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgez_p8 (poly8x8_t __a)
--{
--  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgez_s8 (int8x8_t __a)
- {
--  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
-+  return (uint8x8_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgez_s16 (int16x4_t __a)
- {
--  int16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
-+  return (uint16x4_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgez_s32 (int32x2_t __a)
- {
--  int32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
-+  return (uint32x2_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16280,121 +14132,42 @@
-   return __a >= 0ll ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgez_u8 (uint8x8_t __a)
--{
--  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vcgez_u16 (uint16x4_t __a)
--{
--  uint16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vcgez_u32 (uint32x2_t __a)
--{
--  uint32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgez_u64 (uint64x1_t __a)
--{
--  return __a >= 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgezq_f32 (float32x4_t __a)
- {
--  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
--  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
-+  return (uint32x4_t) (__a >= 0.0f);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgezq_f64 (float64x2_t __a)
- {
--  float64x2_t __b = {0.0, 0.0};
--  return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
-+  return (uint64x2_t) (__a >= 0.0);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgezq_p8 (poly8x16_t __a)
--{
--  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgezq_s8 (int8x16_t __a)
- {
--  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		   0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
-+  return (uint8x16_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgezq_s16 (int16x8_t __a)
- {
--  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
-+  return (uint16x8_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgezq_s32 (int32x4_t __a)
- {
--  int32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
-+  return (uint32x4_t) (__a >= 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgezq_s64 (int64x2_t __a)
- {
--  int64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
-+  return (uint64x2_t) (__a >= __AARCH64_INT64_C (0));
- }
- 
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgezq_u8 (uint8x16_t __a)
--{
--  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vcgezq_u16 (uint16x8_t __a)
--{
--  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vcgezq_u32 (uint32x4_t __a)
--{
--  uint32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vcgezq_u64 (uint64x2_t __a)
--{
--  uint64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
--}
--
- /* vcgez - scalar.  */
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-@@ -16409,12 +14182,6 @@
-   return __a >= 0 ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgezd_u64 (int64x1_t __a)
--{
--  return __a >= 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcgezd_f64 (float64_t __a)
- {
-@@ -16426,7 +14193,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
-+  return (uint32x2_t) (__a > __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16436,28 +14203,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
--{
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgt_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
-+  return (uint8x8_t) (__a > __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgt_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
-+  return (uint16x4_t) (__a > __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
-+  return (uint32x2_t) (__a > __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16469,22 +14229,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16496,72 +14253,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
-+  return (uint32x4_t) (__a > __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_f64 (float64x2_t __a, float64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
-+  return (uint64x2_t) (__a > __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
-+  return (uint8x16_t) (__a > __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
-+  return (uint16x8_t) (__a > __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
-+  return (uint32x4_t) (__a > __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
-+  return (uint64x2_t) (__a > __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
-+  return (__a > __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
-+  return (__a > __b);
- }
- 
- /* vcgt - scalar.  */
-@@ -16595,8 +14341,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgtz_f32 (float32x2_t __a)
- {
--  float32x2_t __b = {0.0f, 0.0f};
--  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
-+  return (uint32x2_t) (__a > 0.0f);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16606,32 +14351,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgtz_p8 (poly8x8_t __a)
--{
--  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcgtz_s8 (int8x8_t __a)
- {
--  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
-+  return (uint8x8_t) (__a > 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcgtz_s16 (int16x4_t __a)
- {
--  int16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
-+  return (uint16x4_t) (__a > 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcgtz_s32 (int32x2_t __a)
- {
--  int32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
-+  return (uint32x2_t) (__a > 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16640,121 +14374,42 @@
-   return __a > 0ll ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcgtz_u8 (uint8x8_t __a)
--{
--  uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
--vcgtz_u16 (uint16x4_t __a)
--{
--  uint16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
--}
--
--__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
--vcgtz_u32 (uint32x2_t __a)
--{
--  uint32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
--}
--
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgtz_u64 (uint64x1_t __a)
--{
--  return __a > 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtzq_f32 (float32x4_t __a)
- {
--  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
--  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
-+  return (uint32x4_t) (__a > 0.0f);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtzq_f64 (float64x2_t __a)
- {
--  float64x2_t __b = {0.0, 0.0};
--  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
-+    return (uint64x2_t) (__a > 0.0);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtzq_p8 (poly8x16_t __a)
--{
--  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcgtzq_s8 (int8x16_t __a)
- {
--  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		   0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
-+  return (uint8x16_t) (__a > 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcgtzq_s16 (int16x8_t __a)
- {
--  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
-+  return (uint16x8_t) (__a > 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcgtzq_s32 (int32x4_t __a)
- {
--  int32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
-+  return (uint32x4_t) (__a > 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcgtzq_s64 (int64x2_t __a)
- {
--  int64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
-+  return (uint64x2_t) (__a > __AARCH64_INT64_C (0));
- }
- 
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcgtzq_u8 (uint8x16_t __a)
--{
--  uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
--vcgtzq_u16 (uint16x8_t __a)
--{
--  uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
--}
--
--__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
--vcgtzq_u32 (uint32x4_t __a)
--{
--  uint32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
--}
--
--__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
--vcgtzq_u64 (uint64x2_t __a)
--{
--  uint64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
--}
--
- /* vcgtz - scalar.  */
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-@@ -16769,12 +14424,6 @@
-   return __a > 0 ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcgtzd_u64 (int64x1_t __a)
--{
--  return __a > 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcgtzd_f64 (float64_t __a)
- {
-@@ -16786,7 +14435,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
-+  return (uint32x2_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16796,28 +14445,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcle_p8 (poly8x8_t __a, poly8x8_t __b)
--{
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
--						 (int8x8_t) __a);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcle_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
-+  return (uint8x8_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcle_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
-+  return (uint16x4_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
-+  return (uint32x2_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16829,22 +14471,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcle_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
--						 (int8x8_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcle_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
--						  (int16x4_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcle_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
--						  (int32x2_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16856,72 +14495,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
-+  return (uint32x4_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_f64 (float64x2_t __a, float64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
-+  return (uint64x2_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
--						   (int8x16_t) __a);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcleq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
-+  return (uint8x16_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcleq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
-+  return (uint16x8_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
-+  return (uint32x4_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
-+  return (uint64x2_t) (__a <= __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
--						   (int8x16_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
--						  (int16x8_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
--						  (int32x4_t) __a);
-+  return (__a <= __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
--						  (int64x2_t) __a);
-+  return (__a <= __b);
- }
- 
- /* vcle - scalar.  */
-@@ -16955,8 +14583,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclez_f32 (float32x2_t __a)
- {
--  float32x2_t __b = {0.0f, 0.0f};
--  return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
-+  return (uint32x2_t) (__a <= 0.0f);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -16966,32 +14593,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vclez_p8 (poly8x8_t __a)
--{
--  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclez_s8 (int8x8_t __a)
- {
--  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
-+  return (uint8x8_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclez_s16 (int16x4_t __a)
- {
--  int16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
-+  return (uint16x4_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclez_s32 (int32x2_t __a)
- {
--  int32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
-+  return (uint32x2_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17000,62 +14616,40 @@
-   return __a <= 0ll ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vclez_u64 (uint64x1_t __a)
--{
--  return __a <= 0ll ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vclezq_f32 (float32x4_t __a)
- {
--  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
--  return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
-+  return (uint32x4_t) (__a <= 0.0f);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vclezq_f64 (float64x2_t __a)
- {
--  float64x2_t __b = {0.0, 0.0};
--  return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
-+  return (uint64x2_t) (__a <= 0.0);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vclezq_p8 (poly8x16_t __a)
--{
--  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vclezq_s8 (int8x16_t __a)
- {
--  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		   0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
-+  return (uint8x16_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vclezq_s16 (int16x8_t __a)
- {
--  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
-+  return (uint16x8_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vclezq_s32 (int32x4_t __a)
- {
--  int32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
-+  return (uint32x4_t) (__a <= 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vclezq_s64 (int64x2_t __a)
- {
--  int64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
-+  return (uint64x2_t) (__a <= __AARCH64_INT64_C (0));
- }
- 
- /* vclez - scalar.  */
-@@ -17072,12 +14666,6 @@
-   return __a <= 0 ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vclezd_u64 (int64x1_t __a)
--{
--  return __a <= 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vclezd_f64 (float64_t __a)
- {
-@@ -17089,7 +14677,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
-+  return (uint32x2_t) (__a < __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17099,28 +14687,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vclt_p8 (poly8x8_t __a, poly8x8_t __b)
--{
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
--						 (int8x8_t) __a);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclt_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
-+  return (uint8x8_t) (__a < __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclt_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
-+  return (uint16x4_t) (__a < __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
-+  return (uint32x2_t) (__a < __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17132,22 +14713,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vclt_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
--						 (int8x8_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vclt_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
--						  (int16x4_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vclt_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
--						  (int32x2_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17159,72 +14737,61 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
-+  return (uint32x4_t) (__a < __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_f64 (float64x2_t __a, float64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
-+  return (uint64x2_t) (__a < __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
--{
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
--						   (int8x16_t) __a);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
-+  return (uint8x16_t) (__a < __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
-+  return (uint16x8_t) (__a < __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
-+  return (uint32x4_t) (__a < __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
-+  return (uint64x2_t) (__a < __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
--						   (int8x16_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
--						  (int16x8_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
--						  (int32x4_t) __a);
-+  return (__a < __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
--						  (int64x2_t) __a);
-+  return (__a < __b);
- }
- 
- /* vclt - scalar.  */
-@@ -17258,8 +14825,7 @@
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcltz_f32 (float32x2_t __a)
- {
--  float32x2_t __b = {0.0f, 0.0f};
--  return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
-+  return (uint32x2_t) (__a < 0.0f);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17269,32 +14835,21 @@
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
--vcltz_p8 (poly8x8_t __a)
--{
--  poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
--}
--
--__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vcltz_s8 (int8x8_t __a)
- {
--  int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
-+  return (uint8x8_t) (__a < 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vcltz_s16 (int16x4_t __a)
- {
--  int16x4_t __b = {0, 0, 0, 0};
--  return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
-+  return (uint16x4_t) (__a < 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vcltz_s32 (int32x2_t __a)
- {
--  int32x2_t __b = {0, 0};
--  return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
-+  return (uint32x2_t) (__a < 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -17306,53 +14861,37 @@
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltzq_f32 (float32x4_t __a)
- {
--  float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
--  return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
-+  return (uint32x4_t) (__a < 0.0f);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltzq_f64 (float64x2_t __a)
- {
--  float64x2_t __b = {0.0, 0.0};
--  return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
-+  return (uint64x2_t) (__a < 0.0);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
--vcltzq_p8 (poly8x16_t __a)
--{
--  poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		    0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
--}
--
--__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vcltzq_s8 (int8x16_t __a)
- {
--  int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
--		   0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
-+  return (uint8x16_t) (__a < 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vcltzq_s16 (int16x8_t __a)
- {
--  int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
--  return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
-+  return (uint16x8_t) (__a < 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vcltzq_s32 (int32x4_t __a)
- {
--  int32x4_t __b = {0, 0, 0, 0};
--  return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
-+  return (uint32x4_t) (__a < 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vcltzq_s64 (int64x2_t __a)
- {
--  int64x2_t __b = {0, 0};
--  return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
-+  return (uint64x2_t) (__a < __AARCH64_INT64_C (0));
- }
- 
- /* vcltz - scalar.  */
-@@ -17369,12 +14908,6 @@
-   return __a < 0 ? -1ll : 0ll;
- }
- 
--__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
--vcltzd_u64 (int64x1_t __a)
--{
--  return __a < 0 ? -1ll : 0ll;
--}
--
- __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
- vcltzd_f64 (float64_t __a)
- {
-@@ -18483,6 +16016,292 @@
-   return __aarch64_vgetq_lane_u64 (__a, __b);
- }
- 
-+/* vext  */
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
-+{
-+  /* The only possible index to the assembler instruction returns element 0.  */
-+  __builtin_aarch64_im_lane_boundsi (__c, 1);
-+  return __a;
-+}
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
-+{
-+  /* The only possible index to the assembler instruction returns element 0.  */
-+  __builtin_aarch64_im_lane_boundsi (__c, 1);
-+  return __a;
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
-+{
-+  /* The only possible index to the assembler instruction returns element 0.  */
-+  __builtin_aarch64_im_lane_boundsi (__c, 1);
-+  return __a;
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x16_t)
-+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint16x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x16_t)
-+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint16x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 16);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint8x16_t)
-+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
-+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
-+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 8);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint16x8_t)
-+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 4);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a,
-+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
-+{
-+  __builtin_aarch64_im_lane_boundsi (__c, 2);
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
-+#endif
-+}
-+
- /* vfma_lane  */
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -19712,6 +17531,872 @@
-   return ret;
- }
- 
-+/* vldn_dup */
-+
-+__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s8 (const int8_t * __a)
-+{
-+  int8x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s16 (const int16_t * __a)
-+{
-+  int16x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s32 (const int32_t * __a)
-+{
-+  int32x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_f32 (const float32_t * __a)
-+{
-+  float32x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
-+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_f64 (const float64_t * __a)
-+{
-+  float64x1x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
-+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
-+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
-+vld2_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
-+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
-+vld2_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
-+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_s64 (const int64_t * __a)
-+{
-+  int64x1x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
-+vld2_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x1x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
-+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s8 (const int8_t * __a)
-+{
-+  int8x16x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x16x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s16 (const int16_t * __a)
-+{
-+  int16x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s32 (const int32_t * __a)
-+{
-+  int32x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_s64 (const int64_t * __a)
-+{
-+  int64x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x16x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
-+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x8x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
-+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
-+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
-+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_f32 (const float32_t * __a)
-+{
-+  float32x4x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
-+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
-+vld2q_dup_f64 (const float64_t * __a)
-+{
-+  float64x2x2_t ret;
-+  __builtin_aarch64_simd_oi __o;
-+  __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
-+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s64 (const int64_t * __a)
-+{
-+  int64x1x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x1x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
-+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
-+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
-+vld3_dup_f64 (const float64_t * __a)
-+{
-+  float64x1x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
-+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
-+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
-+  return ret;
-+}
-+
-+__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s8 (const int8_t * __a)
-+{
-+  int8x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s16 (const int16_t * __a)
-+{
-+  int16x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_s32 (const int32_t * __a)
-+{
-+  int32x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
-+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
-+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
-+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
-+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
-+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
-+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
-+vld3_dup_f32 (const float32_t * __a)
-+{
-+  float32x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
-+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
-+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s8 (const int8_t * __a)
-+{
-+  int8x16x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x16x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s16 (const int16_t * __a)
-+{
-+  int16x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s32 (const int32_t * __a)
-+{
-+  int32x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_s64 (const int64_t * __a)
-+{
-+  int64x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x16x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
-+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
-+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x8x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
-+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
-+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
-+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
-+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
-+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
-+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_f32 (const float32_t * __a)
-+{
-+  float32x4x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
-+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
-+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
-+vld3q_dup_f64 (const float64_t * __a)
-+{
-+  float64x2x3_t ret;
-+  __builtin_aarch64_simd_ci __o;
-+  __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
-+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
-+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s64 (const int64_t * __a)
-+{
-+  int64x1x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-+  ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-+  ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-+  ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x1x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
-+  ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
-+  ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
-+  ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
-+vld4_dup_f64 (const float64_t * __a)
-+{
-+  float64x1x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
-+  ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
-+  ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
-+  ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
-+  return ret;
-+}
-+
-+__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s8 (const int8_t * __a)
-+{
-+  int8x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+  ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+  ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+  ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+  ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+  ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+  ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s16 (const int16_t * __a)
-+{
-+  int16x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+  ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+  ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+  ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+  ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+  ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+  ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_s32 (const int32_t * __a)
-+{
-+  int32x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-+  ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-+  ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-+  ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
-+  ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
-+  ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
-+  ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
-+  ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
-+  ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
-+  ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
-+  ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
-+  ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
-+  ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
-+vld4_dup_f32 (const float32_t * __a)
-+{
-+  float32x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
-+  ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
-+  ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
-+  ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s8 (const int8_t * __a)
-+{
-+  int8x16x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+  ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+  ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+  ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_p8 (const poly8_t * __a)
-+{
-+  poly8x16x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+  ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+  ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+  ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s16 (const int16_t * __a)
-+{
-+  int16x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+  ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+  ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+  ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_p16 (const poly16_t * __a)
-+{
-+  poly16x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+  ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+  ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+  ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s32 (const int32_t * __a)
-+{
-+  int32x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-+  ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-+  ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-+  ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_s64 (const int64_t * __a)
-+{
-+  int64x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-+  ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-+  ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-+  ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u8 (const uint8_t * __a)
-+{
-+  uint8x16x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a);
-+  ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
-+  ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
-+  ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
-+  ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u16 (const uint16_t * __a)
-+{
-+  uint16x8x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a);
-+  ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
-+  ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
-+  ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
-+  ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u32 (const uint32_t * __a)
-+{
-+  uint32x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a);
-+  ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
-+  ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
-+  ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
-+  ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_u64 (const uint64_t * __a)
-+{
-+  uint64x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a);
-+  ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
-+  ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
-+  ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
-+  ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_f32 (const float32_t * __a)
-+{
-+  float32x4x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a);
-+  ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
-+  ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
-+  ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
-+  ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
-+  return ret;
-+}
-+
-+__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
-+vld4q_dup_f64 (const float64_t * __a)
-+{
-+  float64x2x4_t ret;
-+  __builtin_aarch64_simd_xi __o;
-+  __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a);
-+  ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
-+  ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
-+  ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
-+  ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
-+  return ret;
-+}
-+
- /* vmax */
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -20911,6 +19596,65 @@
-   return -__a;
- }
- 
-+/* vpadd  */
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vpadd_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+  return __builtin_aarch64_addpv8qi (__a, __b);
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vpadd_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+  return __builtin_aarch64_addpv4hi (__a, __b);
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vpadd_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+  return __builtin_aarch64_addpv2si (__a, __b);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+  return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
-+						 (int8x8_t) __b);
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+  return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
-+						  (int16x4_t) __b);
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+  return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
-+						  (int32x2_t) __b);
-+}
-+
-+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-+vpaddd_f64 (float64x2_t __a)
-+{
-+  return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
-+}
-+
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vpaddd_s64 (int64x2_t __a)
-+{
-+  return __builtin_aarch64_addpdi (__a);
-+}
-+
-+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
-+vpaddd_u64 (uint64x2_t __a)
-+{
-+  return __builtin_aarch64_addpdi ((int64x2_t) __a);
-+}
-+
- /* vqabs */
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-@@ -20937,6 +19681,12 @@
-   return (int32_t) __builtin_aarch64_sqabssi (__a);
- }
- 
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vqabsd_s64 (int64_t __a)
-+{
-+  return __builtin_aarch64_sqabsdi (__a);
-+}
-+
- /* vqadd */
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -20966,25 +19716,26 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqaddb_u8 (uint8_t __a, uint8_t __b)
- {
--  return (uint8_t) __builtin_aarch64_uqaddqi (__a, __b);
-+  return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqaddh_u16 (uint16_t __a, uint16_t __b)
- {
--  return (uint16_t) __builtin_aarch64_uqaddhi (__a, __b);
-+  return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqadds_u32 (uint32_t __a, uint32_t __b)
- {
--  return (uint32_t) __builtin_aarch64_uqaddsi (__a, __b);
-+  return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
-+  return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a,
-+						     (uint64_t) __b);
- }
- 
- /* vqdmlal */
-@@ -21549,6 +20300,12 @@
-   return (int32_t) __builtin_aarch64_sqnegsi (__a);
- }
- 
-+__extension__ static __inline int64_t __attribute__ ((__always_inline__))
-+vqnegd_s64 (int64_t __a)
-+{
-+  return __builtin_aarch64_sqnegdi (__a);
-+}
-+
- /* vqrdmulh */
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-@@ -21628,25 +20385,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_uqrshldi_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -21676,25 +20433,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21724,25 +20481,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqrshlb_u8 (uint8_t __a, uint8_t __b)
- {
--  return (uint8_t) __builtin_aarch64_uqrshlqi (__a, __b);
-+  return __builtin_aarch64_uqrshlqi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqrshlh_u16 (uint16_t __a, uint16_t __b)
- {
--  return (uint16_t) __builtin_aarch64_uqrshlhi (__a, __b);
-+  return __builtin_aarch64_uqrshlhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqrshls_u32 (uint32_t __a, uint32_t __b)
- {
--  return (uint32_t) __builtin_aarch64_uqrshlsi (__a, __b);
-+  return __builtin_aarch64_uqrshlsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
-+  return __builtin_aarch64_uqrshldi_uus (__a, __b);
- }
- 
- /* vqrshrn */
-@@ -21768,19 +20525,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqrshrn_n_u16 (uint16x8_t __a, const int __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqrshrn_n_u32 (uint32x4_t __a, const int __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqrshrn_n_u64 (uint64x2_t __a, const int __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21804,19 +20561,19 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqrshrnh_n_u16 (uint16_t __a, const int __b)
- {
--  return (uint8_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
-+  return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqrshrns_n_u32 (uint32_t __a, const int __b)
- {
--  return (uint16_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
-+  return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint32_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
-+  return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
- }
- 
- /* vqrshrun */
-@@ -21886,25 +20643,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_uqshldi_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -21934,25 +20691,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -21982,25 +20739,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshlb_u8 (uint8_t __a, uint8_t __b)
- {
--  return (uint8_t) __builtin_aarch64_uqshlqi (__a, __b);
-+  return __builtin_aarch64_uqshlqi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshlh_u16 (uint16_t __a, uint16_t __b)
- {
--  return (uint16_t) __builtin_aarch64_uqshlhi (__a, __b);
-+  return __builtin_aarch64_uqshlhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshls_u32 (uint32_t __a, uint32_t __b)
- {
--  return (uint32_t) __builtin_aarch64_uqshlsi (__a, __b);
-+  return __builtin_aarch64_uqshlsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
-+  return __builtin_aarch64_uqshldi_uus (__a, __b);
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-@@ -22030,25 +20787,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshl_n_u8 (uint8x8_t __a, const int __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshl_n_u16 (uint16x4_t __a, const int __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshl_n_u32 (uint32x2_t __a, const int __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshl_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22078,25 +20835,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshlq_n_u8 (uint8x16_t __a, const int __b)
- {
--  return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshlq_n_u16 (uint16x8_t __a, const int __b)
- {
--  return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshlq_n_u32 (uint32x4_t __a, const int __b)
- {
--  return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshlq_n_u64 (uint64x2_t __a, const int __b)
- {
--  return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -22126,25 +20883,25 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshlb_n_u8 (uint8_t __a, const int __b)
- {
--  return (uint8_t) __builtin_aarch64_uqshl_nqi (__a, __b);
-+  return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshlh_n_u16 (uint16_t __a, const int __b)
- {
--  return (uint16_t) __builtin_aarch64_uqshl_nhi (__a, __b);
-+  return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshls_n_u32 (uint32_t __a, const int __b)
- {
--  return (uint32_t) __builtin_aarch64_uqshl_nsi (__a, __b);
-+  return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshld_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
-+  return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
- }
- 
- /* vqshlu */
-@@ -22152,73 +20909,73 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshlu_n_s8 (int8x8_t __a, const int __b)
- {
--  return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshlu_n_s16 (int16x4_t __a, const int __b)
- {
--  return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshlu_n_s32 (int32x2_t __a, const int __b)
- {
--  return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqshlu_n_s64 (int64x1_t __a, const int __b)
- {
--  return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
-+  return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vqshluq_n_s8 (int8x16_t __a, const int __b)
- {
--  return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vqshluq_n_s16 (int16x8_t __a, const int __b)
- {
--  return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vqshluq_n_s32 (int32x4_t __a, const int __b)
- {
--  return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vqshluq_n_s64 (int64x2_t __a, const int __b)
- {
--  return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
-+  return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
- vqshlub_n_s8 (int8_t __a, const int __b)
- {
--  return (int8_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
-+  return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
- }
- 
- __extension__ static __inline int16_t __attribute__ ((__always_inline__))
- vqshluh_n_s16 (int16_t __a, const int __b)
- {
--  return (int16_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
-+  return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
- }
- 
- __extension__ static __inline int32_t __attribute__ ((__always_inline__))
- vqshlus_n_s32 (int32_t __a, const int __b)
- {
--  return (int32_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
-+  return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vqshlud_n_s64 (int64x1_t __a, const int __b)
- {
--  return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
-+  return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
- }
- 
- /* vqshrn */
-@@ -22244,19 +21001,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vqshrn_n_u16 (uint16x8_t __a, const int __b)
- {
--  return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vqshrn_n_u32 (uint32x4_t __a, const int __b)
- {
--  return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vqshrn_n_u64 (uint64x2_t __a, const int __b)
- {
--  return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
-@@ -22280,19 +21037,19 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqshrnh_n_u16 (uint16_t __a, const int __b)
- {
--  return (uint8_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
-+  return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqshrns_n_u32 (uint32_t __a, const int __b)
- {
--  return (uint16_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
-+  return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqshrnd_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint32_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
-+  return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
- }
- 
- /* vqshrun */
-@@ -22362,27 +21119,66 @@
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vqsubb_u8 (uint8_t __a, uint8_t __b)
- {
--  return (uint8_t) __builtin_aarch64_uqsubqi (__a, __b);
-+  return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vqsubh_u16 (uint16_t __a, uint16_t __b)
- {
--  return (uint16_t) __builtin_aarch64_uqsubhi (__a, __b);
-+  return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vqsubs_u32 (uint32_t __a, uint32_t __b)
- {
--  return (uint32_t) __builtin_aarch64_uqsubsi (__a, __b);
-+  return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
-+  return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a,
-+						     (uint64_t) __b);
- }
- 
-+/* vrbit  */
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrbit_p8 (poly8x8_t __a)
-+{
-+  return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrbit_s8 (int8x8_t __a)
-+{
-+  return __builtin_aarch64_rbitv8qi (__a);
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrbit_u8 (uint8x8_t __a)
-+{
-+  return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrbitq_p8 (poly8x16_t __a)
-+{
-+  return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrbitq_s8 (int8x16_t __a)
-+{
-+  return __builtin_aarch64_rbitv16qi (__a);
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrbitq_u8 (uint8x16_t __a)
-+{
-+  return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
-+}
-+
- /* vrecpe  */
- 
- __extension__ static __inline float32_t __attribute__ ((__always_inline__))
-@@ -22461,6 +21257,234 @@
-   return __builtin_aarch64_frecpxdf (__a);
- }
- 
-+
-+/* vrev  */
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev16_p8 (poly8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev16_s8 (int8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev16_u8 (uint8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev16q_p8 (poly8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev16q_s8 (int8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev16q_u8 (uint8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev32_p8 (poly8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vrev32_p16 (poly16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev32_s8 (int8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vrev32_s16 (int16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev32_u8 (uint8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vrev32_u16 (uint16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev32q_p8 (poly8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vrev32q_p16 (poly16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev32q_s8 (int8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vrev32q_s16 (int16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev32q_u8 (uint8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vrev32q_u16 (uint16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vrev64_f32 (float32x2_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vrev64_p8 (poly8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vrev64_p16 (poly16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vrev64_s8 (int8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vrev64_s16 (int16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vrev64_s32 (int32x2_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vrev64_u8 (uint8x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vrev64_u16 (uint16x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vrev64_u32 (uint32x2_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vrev64q_f32 (float32x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vrev64q_p8 (poly8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vrev64q_p16 (poly16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vrev64q_s8 (int8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vrev64q_s16 (int16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vrev64q_s32 (int32x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vrev64q_u8 (uint8x16_t a)
-+{
-+  return __builtin_shuffle (a,
-+      (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vrev64q_u16 (uint16x8_t a)
-+{
-+  return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vrev64q_u32 (uint32x4_t a)
-+{
-+  return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
-+}
-+
- /* vrnd  */
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-@@ -22469,6 +21493,12 @@
-   return __builtin_aarch64_btruncv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrnd_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndq_f32 (float32x4_t __a)
- {
-@@ -22489,6 +21519,12 @@
-   return __builtin_aarch64_roundv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrnda_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndaq_f32 (float32x4_t __a)
- {
-@@ -22509,6 +21545,12 @@
-   return __builtin_aarch64_nearbyintv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndi_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndiq_f32 (float32x4_t __a)
- {
-@@ -22529,6 +21571,12 @@
-   return __builtin_aarch64_floorv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndm_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndmq_f32 (float32x4_t __a)
- {
-@@ -22548,6 +21596,13 @@
- {
-   return __builtin_aarch64_frintnv2sf (__a);
- }
-+
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndn_f64 (float64x1_t __a)
-+{
-+  return __builtin_aarch64_frintndf (__a);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndnq_f32 (float32x4_t __a)
- {
-@@ -22568,6 +21623,12 @@
-   return __builtin_aarch64_ceilv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndp_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndpq_f32 (float32x4_t __a)
- {
-@@ -22588,6 +21649,12 @@
-   return __builtin_aarch64_rintv2sf (__a);
- }
- 
-+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
-+vrndx_f64 (float64x1_t __a)
-+{
-+  return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
-+}
-+
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vrndxq_f32 (float32x4_t __a)
- {
-@@ -22629,25 +21696,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_urshlv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_urshlv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_urshlv2si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_urshldi_uus (__a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22677,25 +21744,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_urshlv16qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_urshlv8hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_urshlv4si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_urshlv2di_uus (__a, __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22707,7 +21774,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
-+  return __builtin_aarch64_urshldi_uus (__a, __b);
- }
- 
- /* vrshr */
-@@ -22739,25 +21806,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrshr_n_u8 (uint8x8_t __a, const int __b)
- {
--  return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrshr_n_u16 (uint16x4_t __a, const int __b)
- {
--  return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrshr_n_u32 (uint32x2_t __a, const int __b)
- {
--  return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshr_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22787,25 +21854,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrshrq_n_u8 (uint8x16_t __a, const int __b)
- {
--  return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrshrq_n_u16 (uint16x8_t __a, const int __b)
- {
--  return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrshrq_n_u32 (uint32x4_t __a, const int __b)
- {
--  return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrshrq_n_u64 (uint64x2_t __a, const int __b)
- {
--  return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22817,7 +21884,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrshrd_n_u64 (uint64x1_t __a, const int __b)
- {
--  return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
-+  return __builtin_aarch64_urshr_ndi_uus (__a, __b);
- }
- 
- /* vrsra */
-@@ -22849,29 +21916,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
--  return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
--						    (int8x8_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
--  return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
--						     (int16x4_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
--  return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
--						     (int32x2_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
--						   (int64x1_t) __b, __c);
-+  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -22901,29 +21964,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
--  return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
--						      (int8x16_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
--  return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
--						     (int16x8_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
--  return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
--						     (int32x4_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
--  return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
--						     (int64x2_t) __b, __c);
-+  return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -22935,7 +21994,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
-+  return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
- }
- 
- #ifdef __ARM_FEATURE_CRYPTO
-@@ -23128,109 +22187,109 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vshl_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
-+  return __builtin_aarch64_sshlv8qi (__a, __b);
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vshl_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
-+  return __builtin_aarch64_sshlv4hi (__a, __b);
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vshl_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
-+  return __builtin_aarch64_sshlv2si (__a, __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vshl_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
-+  return __builtin_aarch64_sshldi (__a, __b);
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vshl_u8 (uint8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_ushlv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vshl_u16 (uint16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_ushlv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vshl_u32 (uint32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_ushlv2si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vshl_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_ushldi_uus (__a, __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vshlq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
-+  return __builtin_aarch64_sshlv16qi (__a, __b);
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vshlq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
-+  return __builtin_aarch64_sshlv8hi (__a, __b);
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vshlq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
-+  return __builtin_aarch64_sshlv4si (__a, __b);
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vshlq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
-+  return __builtin_aarch64_sshlv2di (__a, __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vshlq_u8 (uint8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
-+  return __builtin_aarch64_ushlv16qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vshlq_u16 (uint16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
-+  return __builtin_aarch64_ushlv8hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vshlq_u32 (uint32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
-+  return __builtin_aarch64_ushlv4si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vshlq_u64 (uint64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
-+  return __builtin_aarch64_ushlv2di_uus (__a, __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vshld_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
-+  return __builtin_aarch64_sshldi (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vshld_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
-+  return __builtin_aarch64_ushldi_uus (__a, __b);
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -23290,19 +22349,19 @@
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vshll_n_u8 (uint8x8_t __a, const int __b)
- {
--  return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
-+  return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vshll_n_u16 (uint16x4_t __a, const int __b)
- {
--  return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
-+  return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vshll_n_u32 (uint32x2_t __a, const int __b)
- {
--  return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
-+  return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
- }
- 
- /* vshr */
-@@ -23444,29 +22503,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
--  return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
--						   (int8x8_t) __b, __c);
-+  return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
--  return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
--						    (int16x4_t) __b, __c);
-+  return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
--  return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
--						    (int32x2_t) __b, __c);
-+  return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
--						  (int64x1_t) __b, __c);
-+  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23496,29 +22551,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
--  return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
--						     (int8x16_t) __b, __c);
-+  return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
--  return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
--						    (int16x8_t) __b, __c);
-+  return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
--  return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
--						    (int32x4_t) __b, __c);
-+  return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
--  return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
--						    (int64x2_t) __b, __c);
-+  return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23530,7 +22581,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
-+  return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
- }
- 
- /* vsqadd */
-@@ -23538,80 +22589,73 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
--						   (int8x8_t) __b);
-+  return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
--						    (int16x4_t) __b);
-+  return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
--						    (int32x2_t) __b);
-+  return __builtin_aarch64_usqaddv2si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_usqadddi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
--						     (int8x16_t) __b);
-+  return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
--						    (int16x8_t) __b);
-+  return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
--						    (int32x4_t) __b);
-+  return __builtin_aarch64_usqaddv4si_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
--						    (int64x2_t) __b);
-+  return __builtin_aarch64_usqaddv2di_uus (__a, __b);
- }
- 
- __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
- vsqaddb_u8 (uint8_t __a, int8_t __b)
- {
--  return (uint8_t) __builtin_aarch64_usqaddqi ((int8_t) __a, __b);
-+  return __builtin_aarch64_usqaddqi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
- vsqaddh_u16 (uint16_t __a, int16_t __b)
- {
--  return (uint16_t) __builtin_aarch64_usqaddhi ((int16_t) __a, __b);
-+  return __builtin_aarch64_usqaddhi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
- vsqadds_u32 (uint32_t __a, int32_t __b)
- {
--  return (uint32_t) __builtin_aarch64_usqaddsi ((int32_t) __a, __b);
-+  return __builtin_aarch64_usqaddsi_uus (__a, __b);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
- {
--  return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
-+  return __builtin_aarch64_usqadddi_uus (__a, __b);
- }
- 
- /* vsqrt */
-@@ -23662,29 +22706,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
--  return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
--						   (int8x8_t) __b, __c);
-+  return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
--  return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
--						    (int16x4_t) __b, __c);
-+  return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
--  return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
--						    (int32x2_t) __b, __c);
-+  return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
--						  (int64x1_t) __b, __c);
-+  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23714,29 +22754,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
--  return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
--						     (int8x16_t) __b, __c);
-+  return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
--  return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
--						    (int16x8_t) __b, __c);
-+  return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
--  return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
--						    (int32x4_t) __b, __c);
-+  return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
--  return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
--						    (int64x2_t) __b, __c);
-+  return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23748,7 +22784,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
-+  return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
- }
- 
- /* vsri */
-@@ -23780,29 +22816,25 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
- {
--  return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
--						   (int8x8_t) __b, __c);
-+  return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
- {
--  return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
--						    (int16x4_t) __b, __c);
-+  return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
- {
--  return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
--						    (int32x2_t) __b, __c);
-+  return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
--						  (int64x1_t) __b, __c);
-+  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-@@ -23832,29 +22864,25 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
- {
--  return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
--						     (int8x16_t) __b, __c);
-+  return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
- {
--  return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
--						    (int16x8_t) __b, __c);
-+  return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
- {
--  return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
--						    (int32x4_t) __b, __c);
-+  return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
- {
--  return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
--						    (int64x2_t) __b, __c);
-+  return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
-@@ -23866,7 +22894,7 @@
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
- {
--  return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
-+  return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
- }
- 
- /* vst1 */
-@@ -24970,6 +23998,438 @@
- 
- /* vtrn */
- 
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vtrn1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vtrn1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vtrn1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vtrn1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vtrn2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vtrn2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vtrn2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vtrn2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
- vtrn_f32 (float32x2_t a, float32x2_t b)
- {
-@@ -25083,19 +24543,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
-+  return (uint8x8_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vtst_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
-+  return (uint16x4_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vtst_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
-+  return (uint32x2_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25107,22 +24567,19 @@
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vtst_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
--						 (int8x8_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vtst_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
--						  (int16x4_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vtst_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
--						  (int32x2_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25134,53 +24591,49 @@
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vtstq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
-+  return (uint8x16_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vtstq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
-+  return (uint16x8_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vtstq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
-+  return (uint32x4_t) ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vtstq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
-+  return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0));
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
--						   (int8x16_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
--						  (int16x8_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
--						  (int32x4_t) __b);
-+  return ((__a & __b) != 0);
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
--						  (int64x2_t) __b);
-+  return ((__a & __b) != __AARCH64_UINT64_C (0));
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-@@ -25200,73 +24653,73 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
- {
--  return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
-+  return __builtin_aarch64_suqaddv8qi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
- {
--  return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
-+  return __builtin_aarch64_suqaddv4hi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
- {
--  return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
-+  return __builtin_aarch64_suqaddv2si_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
- {
--  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
-+  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
- {
--  return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
-+  return __builtin_aarch64_suqaddv16qi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
- {
--  return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
-+  return __builtin_aarch64_suqaddv8hi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
- {
--  return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
-+  return __builtin_aarch64_suqaddv4si_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
- {
--  return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
-+  return __builtin_aarch64_suqaddv2di_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int8_t __attribute__ ((__always_inline__))
- vuqaddb_s8 (int8_t __a, uint8_t __b)
- {
--  return (int8_t) __builtin_aarch64_suqaddqi (__a, (int8_t) __b);
-+  return __builtin_aarch64_suqaddqi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int16_t __attribute__ ((__always_inline__))
- vuqaddh_s16 (int16_t __a, uint16_t __b)
- {
--  return (int16_t) __builtin_aarch64_suqaddhi (__a, (int16_t) __b);
-+  return __builtin_aarch64_suqaddhi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int32_t __attribute__ ((__always_inline__))
- vuqadds_s32 (int32_t __a, uint32_t __b)
- {
--  return (int32_t) __builtin_aarch64_suqaddsi (__a, (int32_t) __b);
-+  return __builtin_aarch64_suqaddsi_ssu (__a,  __b);
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
- {
--  return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
-+  return __builtin_aarch64_suqadddi_ssu (__a,  __b);
- }
- 
- #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) 		\
-@@ -25300,10 +24753,880 @@
- 
- /* vuzp */
- 
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vuzp1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vuzp1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vuzp1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vuzp1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vuzp2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vuzp2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vuzp2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vuzp2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+  return __builtin_shuffle (__a, __b,
-+      (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __INTERLEAVE_LIST (uzp)
- 
- /* vzip */
- 
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vzip1_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vzip1_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vzip1_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vzip1_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vzip1q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vzip1q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vzip1q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vzip1q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vzip1q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vzip1q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {12, 4, 13, 5, 14, 6, 15, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
-+#endif
-+}
-+
-+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-+vzip2_f32 (float32x2_t __a, float32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
-+vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-+vzip2_s8 (int8x8_t __a, int8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-+vzip2_s16 (int16x4_t __a, int16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-+vzip2_s32 (int32x2_t __a, int32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-+vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
-+vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-+vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-+vzip2q_f32 (float32x4_t __a, float32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-+vzip2q_f64 (float64x2_t __a, float64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
-+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
-+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-+vzip2q_s8 (int8x16_t __a, int8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-+vzip2q_s16 (int16x8_t __a, int16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-+vzip2q_s32 (int32x4_t __a, int32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-+vzip2q_s64 (int64x2_t __a, int64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
-+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint8x16_t)
-+      {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
-+#endif
-+}
-+
-+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
-+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint16x8_t)
-+      {4, 12, 5, 13, 6, 14, 7, 15});
-+#endif
-+}
-+
-+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
-+#endif
-+}
-+
-+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
-+{
-+#ifdef __AARCH64EB__
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
-+#else
-+  return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
-+#endif
-+}
-+
- __INTERLEAVE_LIST (zip)
- 
- #undef __INTERLEAVE_LIST
---- a/src/gcc/config/aarch64/t-aarch64-linux
-+++ b/src/gcc/config/aarch64/t-aarch64-linux
-@@ -22,10 +22,7 @@
- LIB1ASMFUNCS = _aarch64_sync_cache_range
- 
- AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be)
--MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
-+MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu)
- MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu)
- 
--# Disable the multilib for linux-gnu targets for the time being; focus
--# on the baremetal targets.
--MULTILIB_OPTIONS    =
--MULTILIB_DIRNAMES   =
-+MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32
---- a/src/gcc/config/aarch64/aarch64.md
-+++ b/src/gcc/config/aarch64/aarch64.md
-@@ -67,7 +67,14 @@
- 
- (define_c_enum "unspec" [
-     UNSPEC_CASESI
--    UNSPEC_CLS
-+    UNSPEC_CRC32B
-+    UNSPEC_CRC32CB
-+    UNSPEC_CRC32CH
-+    UNSPEC_CRC32CW
-+    UNSPEC_CRC32CX
-+    UNSPEC_CRC32H
-+    UNSPEC_CRC32W
-+    UNSPEC_CRC32X
-     UNSPEC_FRECPE
-     UNSPEC_FRECPS
-     UNSPEC_FRECPX
-@@ -83,8 +90,11 @@
-     UNSPEC_GOTTINYPIC
-     UNSPEC_LD1
-     UNSPEC_LD2
-+    UNSPEC_LD2_DUP
-     UNSPEC_LD3
-+    UNSPEC_LD3_DUP
-     UNSPEC_LD4
-+    UNSPEC_LD4_DUP
-     UNSPEC_MB
-     UNSPEC_NOP
-     UNSPEC_PRLG_STK
-@@ -98,15 +108,24 @@
-     UNSPEC_ST2
-     UNSPEC_ST3
-     UNSPEC_ST4
-+    UNSPEC_ST2_LANE
-+    UNSPEC_ST3_LANE
-+    UNSPEC_ST4_LANE
-     UNSPEC_TLS
-     UNSPEC_TLSDESC
-     UNSPEC_USHL_2S
-     UNSPEC_USHR64
-     UNSPEC_VSTRUCTDUMMY
-+    UNSPEC_SP_SET
-+    UNSPEC_SP_TEST
- ])
- 
- (define_c_enum "unspecv" [
-     UNSPECV_EH_RETURN		; Represent EH_RETURN
-+    UNSPECV_GET_FPCR		; Represent fetch of FPCR content.
-+    UNSPECV_SET_FPCR		; Represent assign of FPCR content.
-+    UNSPECV_GET_FPSR		; Represent fetch of FPSR content.
-+    UNSPECV_SET_FPSR		; Represent assign of FPSR content.
-   ]
- )
- 
-@@ -159,7 +178,7 @@
- 
- (define_attr "generic_sched" "yes,no"
-   (const (if_then_else
--          (eq_attr "tune" "cortexa53,cortexa15")
-+          (eq_attr "tune" "cortexa53,cortexa15,thunderx")
-           (const_string "no")
-           (const_string "yes"))))
- 
-@@ -166,6 +185,7 @@
- ;; Scheduling
- (include "../arm/cortex-a53.md")
- (include "../arm/cortex-a15.md")
-+(include "thunderx.md")
- 
- ;; -------------------------------------------------------------------
- ;; Jumps and other miscellaneous insns
-@@ -514,6 +534,10 @@
- 	      (use (match_operand 2 "" ""))])]
-   ""
-   {
-+    if (!REG_P (XEXP (operands[0], 0))
-+       && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
-+     XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0));
-+
-     if (operands[2] == NULL_RTX)
-       operands[2] = const0_rtx;
-   }
-@@ -527,6 +551,10 @@
- 	      (use (match_operand 3 "" ""))])]
-   ""
-   {
-+    if (!REG_P (XEXP (operands[1], 0))
-+       && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF))
-+     XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0));
-+
-     if (operands[3] == NULL_RTX)
-       operands[3] = const0_rtx;
-   }
-@@ -533,25 +561,29 @@
- )
- 
- (define_insn "*sibcall_insn"
--  [(call (mem:DI (match_operand:DI 0 "" "X"))
-+  [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
- 	 (match_operand 1 "" ""))
-    (return)
-    (use (match_operand 2 "" ""))]
--  "GET_CODE (operands[0]) == SYMBOL_REF"
--  "b\\t%a0"
--  [(set_attr "type" "branch")]
--
-+  "SIBLING_CALL_P (insn)"
-+  "@
-+   br\\t%0
-+   b\\t%a0"
-+  [(set_attr "type" "branch, branch")]
- )
- 
- (define_insn "*sibcall_value_insn"
-   [(set (match_operand 0 "" "")
--	(call (mem:DI (match_operand 1 "" "X"))
-+	(call (mem:DI
-+		(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
- 	      (match_operand 2 "" "")))
-    (return)
-    (use (match_operand 3 "" ""))]
--  "GET_CODE (operands[1]) == SYMBOL_REF"
--  "b\\t%a1"
--  [(set_attr "type" "branch")]
-+  "SIBLING_CALL_P (insn)"
-+  "@
-+   br\\t%1
-+   b\\t%a1"
-+  [(set_attr "type" "branch, branch")]
- )
- 
- ;; Call subroutine returning any type.
-@@ -641,17 +673,20 @@
-     if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
-       operands[1] = force_reg (<MODE>mode, operands[1]);
- 
--    if (CONSTANT_P (operands[1]))
--      {
--	aarch64_expand_mov_immediate (operands[0], operands[1]);
--	DONE;
--      }
-+    /* FIXME: RR we still need to fix up what we are doing with
-+       symbol_refs and other types of constants.  */
-+    if (CONSTANT_P (operands[1])
-+        && !CONST_INT_P (operands[1]))
-+     {
-+       aarch64_expand_mov_immediate (operands[0], operands[1]);
-+       DONE;
-+     }
-   "
- )
- 
--(define_insn "*movsi_aarch64"
--  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
--	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
-+(define_insn_and_split "*movsi_aarch64"
-+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r  ,*w, r,*w")
-+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
-   "(register_operand (operands[0], SImode)
-     || aarch64_reg_or_zero (operands[1], SImode))"
-   "@
-@@ -659,6 +694,7 @@
-    mov\\t%w0, %w1
-    mov\\t%w0, %w1
-    mov\\t%w0, %1
-+   #
-    ldr\\t%w0, %1
-    ldr\\t%s0, %1
-    str\\t%w1, %0
-@@ -668,14 +704,20 @@
-    fmov\\t%s0, %w1
-    fmov\\t%w0, %s1
-    fmov\\t%s0, %s1"
--  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
--                     adr,adr,fmov,fmov,fmov")
--   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
-+   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)"
-+   [(const_int 0)]
-+   "{
-+       aarch64_expand_mov_immediate (operands[0], operands[1]);
-+       DONE;
-+    }"
-+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
-+                     adr,adr,f_mcr,f_mrc,fmov")
-+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
- )
- 
--(define_insn "*movdi_aarch64"
--  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
--	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
-+(define_insn_and_split "*movdi_aarch64"
-+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m,  m,r,r,  *w, r,*w,w")
-+	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
-   "(register_operand (operands[0], DImode)
-     || aarch64_reg_or_zero (operands[1], DImode))"
-   "@
-@@ -683,6 +725,7 @@
-    mov\\t%0, %x1
-    mov\\t%x0, %1
-    mov\\t%x0, %1
-+   #
-    ldr\\t%x0, %1
-    ldr\\t%d0, %1
-    str\\t%x1, %0
-@@ -693,10 +736,16 @@
-    fmov\\t%x0, %d1
-    fmov\\t%d0, %d1
-    movi\\t%d0, %1"
--  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
--                     adr,adr,fmov,fmov,fmov,fmov")
--   (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
--   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
-+   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))"
-+   [(const_int 0)]
-+   "{
-+       aarch64_expand_mov_immediate (operands[0], operands[1]);
-+       DONE;
-+    }"
-+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
-+                     adr,adr,f_mcr,f_mrc,fmov,fmov")
-+   (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
-+   (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
- )
- 
- (define_insn "insv_imm<mode>"
-@@ -789,7 +838,7 @@
-    str\\t%w1, %0
-    mov\\t%w0, %w1"
-   [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\
--                     f_loads,f_stores,f_loads,f_stores,fmov")]
-+                     f_loads,f_stores,f_loads,f_stores,mov_reg")]
- )
- 
- (define_insn "*movdf_aarch64"
-@@ -863,6 +912,24 @@
-   }
- )
- 
-+;; 0 is dst
-+;; 1 is src
-+;; 2 is size of move in bytes
-+;; 3 is alignment
-+
-+(define_expand "movmemdi"
-+  [(match_operand:BLK 0 "memory_operand")
-+   (match_operand:BLK 1 "memory_operand")
-+   (match_operand:DI 2 "immediate_operand")
-+   (match_operand:DI 3 "immediate_operand")]
-+   "!STRICT_ALIGNMENT"
-+{
-+  if (aarch64_expand_movmem (operands))
-+    DONE;
-+  FAIL;
-+}
-+)
-+
- ;; Operands 1 and 3 are tied together by the final condition; so we allow
- ;; fairly lax checking on the second memory operation.
- (define_insn "load_pair<mode>"
-@@ -923,31 +990,45 @@
-   [(set_attr "type" "neon_store1_2reg<q>")]
- )
- 
--;; Load pair with writeback.  This is primarily used in function epilogues
--;; when restoring [fp,lr]
-+;; Load pair with post-index writeback.  This is primarily used in function
-+;; epilogues.
- (define_insn "loadwb_pair<GPI:mode>_<P:mode>"
-   [(parallel
-     [(set (match_operand:P 0 "register_operand" "=k")
-           (plus:P (match_operand:P 1 "register_operand" "0")
--                  (match_operand:P 4 "const_int_operand" "n")))
-+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-      (set (match_operand:GPI 2 "register_operand" "=r")
--          (mem:GPI (plus:P (match_dup 1)
--                   (match_dup 4))))
-+          (mem:GPI (match_dup 1)))
-      (set (match_operand:GPI 3 "register_operand" "=r")
-           (mem:GPI (plus:P (match_dup 1)
-                    (match_operand:P 5 "const_int_operand" "n"))))])]
--  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
-+  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
-   "ldp\\t%<w>2, %<w>3, [%1], %4"
-   [(set_attr "type" "load2")]
- )
- 
--;; Store pair with writeback.  This is primarily used in function prologues
--;; when saving [fp,lr]
-+(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
-+  [(parallel
-+    [(set (match_operand:P 0 "register_operand" "=k")
-+          (plus:P (match_operand:P 1 "register_operand" "0")
-+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-+     (set (match_operand:GPF 2 "register_operand" "=w")
-+          (mem:GPF (match_dup 1)))
-+     (set (match_operand:GPF 3 "register_operand" "=w")
-+          (mem:GPF (plus:P (match_dup 1)
-+                   (match_operand:P 5 "const_int_operand" "n"))))])]
-+  "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)"
-+  "ldp\\t%<w>2, %<w>3, [%1], %4"
-+  [(set_attr "type" "neon_load1_2reg")]
-+)
-+
-+;; Store pair with pre-index writeback.  This is primarily used in function
-+;; prologues.
- (define_insn "storewb_pair<GPI:mode>_<P:mode>"
-   [(parallel
-     [(set (match_operand:P 0 "register_operand" "=&k")
-           (plus:P (match_operand:P 1 "register_operand" "0")
--                  (match_operand:P 4 "const_int_operand" "n")))
-+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-      (set (mem:GPI (plus:P (match_dup 0)
-                    (match_dup 4)))
-           (match_operand:GPI 2 "register_operand" "r"))
-@@ -959,6 +1040,22 @@
-   [(set_attr "type" "store2")]
- )
- 
-+(define_insn "storewb_pair<GPF:mode>_<P:mode>"
-+  [(parallel
-+    [(set (match_operand:P 0 "register_operand" "=&k")
-+          (plus:P (match_operand:P 1 "register_operand" "0")
-+                  (match_operand:P 4 "aarch64_mem_pair_offset" "n")))
-+     (set (mem:GPF (plus:P (match_dup 0)
-+                   (match_dup 4)))
-+          (match_operand:GPF 2 "register_operand" "w"))
-+     (set (mem:GPF (plus:P (match_dup 0)
-+                   (match_operand:P 5 "const_int_operand" "n")))
-+          (match_operand:GPF 3 "register_operand" "w"))])]
-+  "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)"
-+  "stp\\t%<w>2, %<w>3, [%0, %4]!"
-+  [(set_attr "type" "neon_store1_2reg<q>")]
-+)
-+
- ;; -------------------------------------------------------------------
- ;; Sign/Zero extension
- ;; -------------------------------------------------------------------
-@@ -1063,16 +1160,18 @@
- 
- (define_insn "*addsi3_aarch64"
-   [(set
--    (match_operand:SI 0 "register_operand" "=rk,rk,rk")
-+    (match_operand:SI 0 "register_operand" "=rk,rk,w,rk")
-     (plus:SI
--     (match_operand:SI 1 "register_operand" "%rk,rk,rk")
--     (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))]
-+     (match_operand:SI 1 "register_operand" "%rk,rk,w,rk")
-+     (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))]
-   ""
-   "@
-   add\\t%w0, %w1, %2
-   add\\t%w0, %w1, %w2
-+  add\\t%0.2s, %1.2s, %2.2s
-   sub\\t%w0, %w1, #%n2"
--  [(set_attr "type" "alu_imm,alu_reg,alu_imm")]
-+  [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm")
-+   (set_attr "simd" "*,*,yes,*")]
- )
- 
- ;; zero_extend version of above
-@@ -1106,7 +1205,26 @@
-    (set_attr "simd" "*,*,*,yes")]
- )
- 
--(define_insn "*add<mode>3_compare0"
-+(define_expand "addti3"
-+  [(set (match_operand:TI 0 "register_operand" "")
-+	(plus:TI (match_operand:TI 1 "register_operand" "")
-+		 (match_operand:TI 2 "register_operand" "")))]
-+  ""
-+{
-+  rtx low = gen_reg_rtx (DImode);
-+  emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
-+				  gen_lowpart (DImode, operands[2])));
-+
-+  rtx high = gen_reg_rtx (DImode);
-+  emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
-+				 gen_highpart (DImode, operands[2])));
-+
-+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+  DONE;
-+})
-+
-+(define_insn "add<mode>3_compare0"
-   [(set (reg:CC_NZ CC_REGNUM)
- 	(compare:CC_NZ
- 	 (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r")
-@@ -1390,7 +1508,7 @@
-   [(set_attr "type" "alu_ext")]
- )
- 
--(define_insn "*add<mode>3_carryin"
-+(define_insn "add<mode>3_carryin"
-   [(set
-     (match_operand:GPI 0 "register_operand" "=r")
-     (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-@@ -1558,8 +1676,26 @@
-    (set_attr "simd" "*,yes")]
- )
- 
-+(define_expand "subti3"
-+  [(set (match_operand:TI 0 "register_operand" "")
-+	(minus:TI (match_operand:TI 1 "register_operand" "")
-+		  (match_operand:TI 2 "register_operand" "")))]
-+  ""
-+{
-+  rtx low = gen_reg_rtx (DImode);
-+  emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
-+				  gen_lowpart (DImode, operands[2])));
- 
--(define_insn "*sub<mode>3_compare0"
-+  rtx high = gen_reg_rtx (DImode);
-+  emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
-+				 gen_highpart (DImode, operands[2])));
-+
-+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+  DONE;
-+})
-+
-+(define_insn "sub<mode>3_compare0"
-   [(set (reg:CC_NZ CC_REGNUM)
- 	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
- 				  (match_operand:GPI 2 "register_operand" "r"))
-@@ -1706,7 +1842,7 @@
-   [(set_attr "type" "alu_ext")]
- )
- 
--(define_insn "*sub<mode>3_carryin"
-+(define_insn "sub<mode>3_carryin"
-   [(set
-     (match_operand:GPI 0 "register_operand" "=r")
-     (minus:GPI (minus:GPI
-@@ -1935,7 +2071,7 @@
-   [(set_attr "type" "mul")]
- )
- 
--(define_insn "*madd<mode>"
-+(define_insn "madd<mode>"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
- 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
- 			    (match_operand:GPI 2 "register_operand" "r"))
-@@ -2045,6 +2181,48 @@
-   [(set_attr "type" "<su>mull")]
- )
- 
-+(define_expand "<su_optab>mulditi3"
-+  [(set (match_operand:TI 0 "register_operand")
-+	(mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand"))
-+		 (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))]
-+  ""
-+{
-+  rtx low = gen_reg_rtx (DImode);
-+  emit_insn (gen_muldi3 (low, operands[1], operands[2]));
-+
-+  rtx high = gen_reg_rtx (DImode);
-+  emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
-+
-+  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-+  emit_move_insn (gen_highpart (DImode, operands[0]), high);
-+  DONE;
-+})
-+
-+;; The default expansion of multi3 using umuldi3_highpart will perform
-+;; the additions in an order that fails to combine into two madd insns.
-+(define_expand "multi3"
-+  [(set (match_operand:TI 0 "register_operand")
-+	(mult:TI (match_operand:TI 1 "register_operand")
-+		 (match_operand:TI 2 "register_operand")))]
-+  ""
-+{
-+  rtx l0 = gen_reg_rtx (DImode);
-+  rtx l1 = gen_lowpart (DImode, operands[1]);
-+  rtx l2 = gen_lowpart (DImode, operands[2]);
-+  rtx h0 = gen_reg_rtx (DImode);
-+  rtx h1 = gen_highpart (DImode, operands[1]);
-+  rtx h2 = gen_highpart (DImode, operands[2]);
-+
-+  emit_insn (gen_muldi3 (l0, l1, l2));
-+  emit_insn (gen_umuldi3_highpart (h0, l1, l2));
-+  emit_insn (gen_madddi (h0, h1, l2, h0));
-+  emit_insn (gen_madddi (h0, l1, h2, h0));
-+
-+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
-+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
-+  DONE;
-+})
-+
- (define_insn "<su>muldi3_highpart"
-   [(set (match_operand:DI 0 "register_operand" "=r")
- 	(truncate:DI
-@@ -2345,11 +2523,46 @@
-   }
- )
- 
-+(define_expand "mov<mode>cc"
-+  [(set (match_operand:GPF 0 "register_operand" "")
-+	(if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "")
-+			  (match_operand:GPF 2 "register_operand" "")
-+			  (match_operand:GPF 3 "register_operand" "")))]
-+  ""
-+  {
-+    rtx ccreg;
-+    enum rtx_code code = GET_CODE (operands[1]);
-+
-+    if (code == UNEQ || code == LTGT)
-+      FAIL;
-+
-+    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-+				  XEXP (operands[1], 1));
-+    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
-+  }
-+)
-+
-+
-+;; CRC32 instructions.
-+(define_insn "aarch64_<crc_variant>"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+        (unspec:SI [(match_operand:SI 1 "register_operand" "r")
-+                    (match_operand:<crc_mode> 2 "register_operand" "r")]
-+         CRC))]
-+  "TARGET_CRC32"
-+  {
-+    if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64)
-+      return "<crc_variant>\\t%w0, %w1, %x2";
-+    else
-+      return "<crc_variant>\\t%w0, %w1, %w2";
-+  }
-+  [(set_attr "type" "crc")]
-+)
-+
- (define_insn "*csinc2<mode>_insn"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
--        (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator"
--		  [(match_operand:CC 3 "cc_register" "") (const_int 0)])
--		 (match_operand:GPI 1 "register_operand" "r")))]
-+        (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
-+                  (match_operand:GPI 1 "register_operand" "r")))]
-   ""
-   "csinc\\t%<w>0, %<w>1, %<w>1, %M2"
-   [(set_attr "type" "csel")]
-@@ -2358,13 +2571,12 @@
- (define_insn "csinc3<mode>_insn"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
-         (if_then_else:GPI
--	  (match_operator:GPI 1 "aarch64_comparison_operator"
--	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
--	  (plus:GPI (match_operand:GPI 3 "register_operand" "r")
-+	  (match_operand 1 "aarch64_comparison_operation" "")
-+	  (plus:GPI (match_operand:GPI 2 "register_operand" "r")
- 		    (const_int 1))
--	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
-   ""
--  "csinc\\t%<w>0, %<w>4, %<w>3, %M1"
-+  "csinc\\t%<w>0, %<w>3, %<w>2, %M1"
-   [(set_attr "type" "csel")]
- )
- 
-@@ -2371,12 +2583,11 @@
- (define_insn "*csinv3<mode>_insn"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
-         (if_then_else:GPI
--	  (match_operator:GPI 1 "aarch64_comparison_operator"
--	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
--	  (not:GPI (match_operand:GPI 3 "register_operand" "r"))
--	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+	  (match_operand 1 "aarch64_comparison_operation" "")
-+	  (not:GPI (match_operand:GPI 2 "register_operand" "r"))
-+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
-   ""
--  "csinv\\t%<w>0, %<w>4, %<w>3, %M1"
-+  "csinv\\t%<w>0, %<w>3, %<w>2, %M1"
-   [(set_attr "type" "csel")]
- )
- 
-@@ -2383,12 +2594,11 @@
- (define_insn "*csneg3<mode>_insn"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
-         (if_then_else:GPI
--	  (match_operator:GPI 1 "aarch64_comparison_operator"
--	   [(match_operand:CC 2 "cc_register" "") (const_int 0)])
--	  (neg:GPI (match_operand:GPI 3 "register_operand" "r"))
--	  (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))]
-+	  (match_operand 1 "aarch64_comparison_operation" "")
-+	  (neg:GPI (match_operand:GPI 2 "register_operand" "r"))
-+	  (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")))]
-   ""
--  "csneg\\t%<w>0, %<w>4, %<w>3, %M1"
-+  "csneg\\t%<w>0, %<w>3, %<w>2, %M1"
-   [(set_attr "type" "csel")]
- )
- 
-@@ -2486,7 +2696,18 @@
-   [(set_attr "type" "logic_shift_imm")]
- )
- 
--;; zero_extend version of above
-+(define_insn "*<optab>_rol<mode>3"
-+  [(set (match_operand:GPI 0 "register_operand" "=r")
-+	(LOGICAL:GPI (rotate:GPI
-+		      (match_operand:GPI 1 "register_operand" "r")
-+		      (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
-+		     (match_operand:GPI 3 "register_operand" "r")))]
-+  ""
-+  "<logical>\\t%<w>0, %<w>3, %<w>1, ror (<sizen> - %2)"
-+  [(set_attr "type" "logic_shift_imm")]
-+)
-+
-+;; zero_extend versions of above
- (define_insn "*<LOGICAL:optab>_<SHIFT:optab>si3_uxtw"
-   [(set (match_operand:DI 0 "register_operand" "=r")
- 	(zero_extend:DI
-@@ -2499,6 +2720,18 @@
-   [(set_attr "type" "logic_shift_imm")]
- )
- 
-+(define_insn "*<optab>_rolsi3_uxtw"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(zero_extend:DI
-+	 (LOGICAL:SI (rotate:SI
-+		      (match_operand:SI 1 "register_operand" "r")
-+		      (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
-+		     (match_operand:SI 3 "register_operand" "r"))))]
-+  ""
-+  "<logical>\\t%w0, %w3, %w1, ror (32 - %2)"
-+  [(set_attr "type" "logic_shift_imm")]
-+)
-+
- (define_insn "one_cmpl<mode>2"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
- 	(not:GPI (match_operand:GPI 1 "register_operand" "r")))]
-@@ -2622,7 +2855,7 @@
- 
-     emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
-     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
--    emit_insn (gen_csinc3<mode>_insn (operands[0], x, ccreg, operands[0], const0_rtx));
-+    emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx));
-     DONE;
-   }
- )
-@@ -2629,7 +2862,7 @@
- 
- (define_insn "clrsb<mode>2"
-   [(set (match_operand:GPI 0 "register_operand" "=r")
--	(unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))]
-+        (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))]
-   ""
-   "cls\\t%<w>0, %<w>1"
-   [(set_attr "type" "clz")]
-@@ -3125,7 +3358,7 @@
-   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
- 			  (match_operand 1 "const_int_operand" "n")
- 			  (const_int 0))
--	(zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r")
-+	(zero_extract:GPI (match_operand:GPI 2 "register_operand" "r")
- 			  (match_dup 1)
- 			  (match_operand 3 "const_int_operand" "n")))]
-   "!(UINTVAL (operands[1]) == 0
-@@ -3180,6 +3413,38 @@
-   [(set_attr "type" "rev")]
- )
- 
-+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
-+;; operations within an IOR/AND RTX, therefore we have two patterns matching
-+;; each valid permutation.
-+
-+(define_insn "rev16<mode>2"
-+  [(set (match_operand:GPI 0 "register_operand" "=r")
-+        (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-+                                      (const_int 8))
-+                          (match_operand:GPI 3 "const_int_operand" "n"))
-+                 (and:GPI (lshiftrt:GPI (match_dup 1)
-+                                        (const_int 8))
-+                          (match_operand:GPI 2 "const_int_operand" "n"))))]
-+  "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
-+   && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
-+  "rev16\\t%<w>0, %<w>1"
-+  [(set_attr "type" "rev")]
-+)
-+
-+(define_insn "rev16<mode>2_alt"
-+  [(set (match_operand:GPI 0 "register_operand" "=r")
-+        (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r")
-+                                        (const_int 8))
-+                          (match_operand:GPI 2 "const_int_operand" "n"))
-+                 (and:GPI (ashift:GPI (match_dup 1)
-+                                      (const_int 8))
-+                          (match_operand:GPI 3 "const_int_operand" "n"))))]
-+  "aarch_rev16_shleft_mask_imm_p (operands[3], <MODE>mode)
-+   && aarch_rev16_shright_mask_imm_p (operands[2], <MODE>mode)"
-+  "rev16\\t%<w>0, %<w>1"
-+  [(set_attr "type" "rev")]
-+)
-+
- ;; zero_extend version of above
- (define_insn "*bswapsi2_uxtw"
-   [(set (match_operand:DI 0 "register_operand" "=r")
-@@ -3194,7 +3459,7 @@
- ;; -------------------------------------------------------------------
- 
- ;; frint floating-point round to integral standard patterns.
--;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
-+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
- 
- (define_insn "<frint_pattern><mode>2"
-   [(set (match_operand:GPF 0 "register_operand" "=w")
-@@ -3305,20 +3570,24 @@
-   [(set_attr "type" "f_cvtf2i")]
- )
- 
--(define_insn "float<GPI:mode><GPF:mode>2"
--  [(set (match_operand:GPF 0 "register_operand" "=w")
--        (float:GPF (match_operand:GPI 1 "register_operand" "r")))]
--  "TARGET_FLOAT"
--  "scvtf\\t%<GPF:s>0, %<GPI:w>1"
--  [(set_attr "type" "f_cvti2f")]
-+(define_insn "<optab><fcvt_target><GPF:mode>2"
-+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
-+        (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))]
-+  ""
-+  "@
-+   <su_optab>cvtf\t%<GPF:s>0, %<s>1
-+   <su_optab>cvtf\t%<GPF:s>0, %<w1>1"
-+  [(set_attr "simd" "yes,no")
-+   (set_attr "fp" "no,yes")
-+   (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")]
- )
- 
--(define_insn "floatuns<GPI:mode><GPF:mode>2"
-+(define_insn "<optab><fcvt_iesize><GPF:mode>2"
-   [(set (match_operand:GPF 0 "register_operand" "=w")
--        (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))]
-+        (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
-   "TARGET_FLOAT"
--  "ucvtf\\t%<GPF:s>0, %<GPI:w>1"
--  [(set_attr "type" "f_cvt")]
-+  "<su_optab>cvtf\t%<GPF:s>0, %<w2>1"
-+  [(set_attr "type" "f_cvti2f")]
- )
- 
- ;; -------------------------------------------------------------------
-@@ -3490,7 +3759,7 @@
- 	  (truncate:DI (match_operand:TI 1 "register_operand" "w"))))]
-   "reload_completed || reload_in_progress"
-   "fmov\\t%d0, %d1"
--  [(set_attr "type" "f_mcr")
-+  [(set_attr "type" "fmov")
-    (set_attr "length" "4")
-   ])
- 
-@@ -3588,36 +3857,63 @@
-   [(set_attr "type" "call")
-    (set_attr "length" "16")])
- 
--(define_insn "tlsie_small"
--  [(set (match_operand:DI 0 "register_operand" "=r")
--        (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")]
-+(define_insn "tlsie_small_<mode>"
-+  [(set (match_operand:PTR 0 "register_operand" "=r")
-+        (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")]
- 		   UNSPEC_GOTSMALLTLS))]
-   ""
--  "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]"
-+  "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]"
-   [(set_attr "type" "load1")
-    (set_attr "length" "8")]
- )
- 
--(define_insn "tlsle_small"
-+(define_insn "tlsie_small_sidi"
-   [(set (match_operand:DI 0 "register_operand" "=r")
--        (unspec:DI [(match_operand:DI 1 "register_operand" "r")
--                   (match_operand:DI 2 "aarch64_tls_le_symref" "S")]
-+	(zero_extend:DI
-+          (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")]
-+		      UNSPEC_GOTSMALLTLS)))]
-+  ""
-+  "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]"
-+  [(set_attr "type" "load1")
-+   (set_attr "length" "8")]
-+)
-+
-+(define_expand "tlsle_small"
-+  [(set (match_operand 0 "register_operand" "=r")
-+        (unspec [(match_operand 1 "register_operand" "r")
-+                   (match_operand 2 "aarch64_tls_le_symref" "S")]
-+                   UNSPEC_GOTSMALLTLS))]
-+  ""
-+{
-+  enum machine_mode mode = GET_MODE (operands[0]);
-+  emit_insn ((mode == DImode
-+	      ? gen_tlsle_small_di
-+	      : gen_tlsle_small_si) (operands[0],
-+				     operands[1],
-+				     operands[2]));
-+  DONE;
-+})
-+
-+(define_insn "tlsle_small_<mode>"
-+  [(set (match_operand:P 0 "register_operand" "=r")
-+        (unspec:P [(match_operand:P 1 "register_operand" "r")
-+                   (match_operand 2 "aarch64_tls_le_symref" "S")]
- 		   UNSPEC_GOTSMALLTLS))]
-   ""
--  "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2"
-+  "add\\t%<w>0, %<w>1, #%G2\;add\\t%<w>0, %<w>0, #%L2"
-   [(set_attr "type" "alu_reg")
-    (set_attr "length" "8")]
- )
- 
--(define_insn "tlsdesc_small"
--  [(set (reg:DI R0_REGNUM)
--        (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")]
-+(define_insn "tlsdesc_small_<mode>"
-+  [(set (reg:PTR R0_REGNUM)
-+        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
- 		   UNSPEC_TLSDESC))
-    (clobber (reg:DI LR_REGNUM))
-    (clobber (reg:CC CC_REGNUM))
-    (clobber (match_scratch:DI 1 "=r"))]
-   "TARGET_TLS_DESC"
--  "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
-+  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
-   [(set_attr "type" "call")
-    (set_attr "length" "16")])
- 
-@@ -3642,6 +3938,135 @@
-   DONE;
- })
- 
-+;; Named patterns for stack smashing protection.
-+(define_expand "stack_protect_set"
-+  [(match_operand 0 "memory_operand")
-+   (match_operand 1 "memory_operand")]
-+  ""
-+{
-+  enum machine_mode mode = GET_MODE (operands[0]);
-+
-+  emit_insn ((mode == DImode
-+	      ? gen_stack_protect_set_di
-+	      : gen_stack_protect_set_si) (operands[0], operands[1]));
-+  DONE;
-+})
-+
-+(define_insn "stack_protect_set_<mode>"
-+  [(set (match_operand:PTR 0 "memory_operand" "=m")
-+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")]
-+	 UNSPEC_SP_SET))
-+   (set (match_scratch:PTR 2 "=&r") (const_int 0))]
-+  ""
-+  "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0"
-+  [(set_attr "length" "12")
-+   (set_attr "type" "multiple")])
-+
-+(define_expand "stack_protect_test"
-+  [(match_operand 0 "memory_operand")
-+   (match_operand 1 "memory_operand")
-+   (match_operand 2)]
-+  ""
-+{
-+  rtx result;
-+  enum machine_mode mode = GET_MODE (operands[0]);
-+
-+  result = gen_reg_rtx(mode);
-+
-+  emit_insn ((mode == DImode
-+	      ? gen_stack_protect_test_di
-+	      : gen_stack_protect_test_si) (result,
-+					    operands[0],
-+					    operands[1]));
-+
-+  if (mode == DImode)
-+    emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-+				    result, const0_rtx, operands[2]));
-+  else
-+    emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx),
-+				    result, const0_rtx, operands[2]));
-+  DONE;
-+})
-+
-+(define_insn "stack_protect_test_<mode>"
-+  [(set (match_operand:PTR 0 "register_operand" "=r")
-+	(unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")
-+		     (match_operand:PTR 2 "memory_operand" "m")]
-+	 UNSPEC_SP_TEST))
-+   (clobber (match_scratch:PTR 3 "=&r"))]
-+  ""
-+  "ldr\t%<w>3, %x1\;ldr\t%<w>0, %x2\;eor\t%<w>0, %<w>3, %<w>0"
-+  [(set_attr "length" "12")
-+   (set_attr "type" "multiple")])
-+
-+;; Write Floating-point Control Register.
-+(define_insn "set_fpcr"
-+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)]
-+  ""
-+  "msr\\tfpcr, %0"
-+  [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Control Register.
-+(define_insn "get_fpcr"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))]
-+  ""
-+  "mrs\\t%0, fpcr"
-+  [(set_attr "type" "mrs")])
-+
-+;; Write Floating-point Status Register.
-+(define_insn "set_fpsr"
-+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)]
-+  ""
-+  "msr\\tfpsr, %0"
-+  [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Status Register.
-+(define_insn "get_fpsr"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+        (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))]
-+  ""
-+  "mrs\\t%0, fpsr"
-+  [(set_attr "type" "mrs")])
-+
-+
-+;; Define the subtract-one-and-jump insns so loop.c
-+;; knows what to generate.
-+(define_expand "doloop_end"
-+  [(use (match_operand 0 "" ""))      ; loop pseudo
-+   (use (match_operand 1 "" ""))]     ; label
-+  "optimize > 0 && flag_modulo_sched"
-+{
-+  rtx s0;
-+  rtx bcomp;
-+  rtx loc_ref;
-+  rtx cc_reg;
-+  rtx insn;
-+  rtx cmp;
-+
-+  /* Currently SMS relies on the do-loop pattern to recognize loops
-+     where (1) the control part consists of all insns defining and/or
-+     using a certain 'count' register and (2) the loop count can be
-+     adjusted by modifying this register prior to the loop.
-+     ??? The possible introduction of a new block to initialize the
-+     new IV can potentially affect branch optimizations.  */
-+
-+  if (GET_MODE (operands[0]) != DImode)
-+    FAIL;
-+
-+  s0 = operands [0];
-+  insn = emit_insn (gen_adddi3_compare0 (s0, s0, GEN_INT (-1)));
-+
-+  cmp = XVECEXP (PATTERN (insn), 0, 0);
-+  cc_reg = SET_DEST (cmp);
-+  bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
-+  loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]);
-+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
-+			       gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
-+						     loc_ref, pc_rtx)));
-+  DONE;
-+})
-+
- ;; AdvSIMD Stuff
- (include "aarch64-simd.md")
- 
---- a/src/gcc/config/aarch64/t-aarch64
-+++ b/src/gcc/config/aarch64/t-aarch64
-@@ -31,10 +31,17 @@
-   $(SYSTEM_H) coretypes.h $(TM_H) \
-   $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \
-   $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \
--  $(srcdir)/config/aarch64/aarch64-simd-builtins.def
-+  $(srcdir)/config/aarch64/aarch64-simd-builtins.def \
-+  aarch64-builtin-iterators.h
- 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
- 		$(srcdir)/config/aarch64/aarch64-builtins.c
- 
-+aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \
-+	$(srcdir)/config/aarch64/iterators.md
-+	$(SHELL) $(srcdir)/config/aarch64/geniterators.sh \
-+		$(srcdir)/config/aarch64/iterators.md > \
-+		aarch64-builtin-iterators.h
-+
- aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \
-     coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H)
- 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
---- a/src/gcc/config/aarch64/arm_acle.h
-+++ b/src/gcc/config/aarch64/arm_acle.h
-@@ -0,0 +1,90 @@
-+/* AArch64 Non-NEON ACLE intrinsics include file.
-+
-+   Copyright (C) 2014 Free Software Foundation, Inc.
-+   Contributed by ARM Ltd.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify it
-+   under the terms of the GNU General Public License as published
-+   by the Free Software Foundation; either version 3, or (at your
-+   option) any later version.
-+
-+   GCC is distributed in the hope that it will be useful, but WITHOUT
-+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-+   License for more details.
-+
-+   Under Section 7 of GPL version 3, you are granted additional
-+   permissions described in the GCC Runtime Library Exception, version
-+   3.1, as published by the Free Software Foundation.
-+
-+   You should have received a copy of the GNU General Public License and
-+   a copy of the GCC Runtime Library Exception along with this program;
-+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#ifndef _GCC_ARM_ACLE_H
-+#define _GCC_ARM_ACLE_H
-+
-+#include <stdint.h>
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+#ifdef __ARM_FEATURE_CRC32
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32b (uint32_t __a, uint8_t __b)
-+{
-+  return __builtin_aarch64_crc32b (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cb (uint32_t __a, uint8_t __b)
-+{
-+  return __builtin_aarch64_crc32cb (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32ch (uint32_t __a, uint16_t __b)
-+{
-+  return __builtin_aarch64_crc32ch (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cw (uint32_t __a, uint32_t __b)
-+{
-+  return __builtin_aarch64_crc32cw (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32cd (uint32_t __a, uint64_t __b)
-+{
-+  return __builtin_aarch64_crc32cx (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32h (uint32_t __a, uint16_t __b)
-+{
-+  return __builtin_aarch64_crc32h (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32w (uint32_t __a, uint32_t __b)
-+{
-+  return __builtin_aarch64_crc32w (__a, __b);
-+}
-+
-+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
-+__crc32d (uint32_t __a, uint64_t __b)
-+{
-+  return __builtin_aarch64_crc32x (__a, __b);
-+}
-+
-+#endif
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
---- a/src/gcc/config/aarch64/aarch64-cost-tables.h
-+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
-@@ -0,0 +1,131 @@
-+/* RTX cost tables for AArch64.
-+
-+   Copyright (C) 2014 Free Software Foundation, Inc.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify it
-+   under the terms of the GNU General Public License as published
-+   by the Free Software Foundation; either version 3, or (at your
-+   option) any later version.
-+
-+   GCC is distributed in the hope that it will be useful, but WITHOUT
-+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-+   License for more details.
-+
-+   You should have received a copy of the GNU General Public License
-+   along with GCC; see the file COPYING3.  If not see
-+   <http://www.gnu.org/licenses/>.  */
-+
-+#ifndef GCC_AARCH64_COST_TABLES_H
-+#define GCC_AARCH64_COST_TABLES_H
-+
-+#include "config/arm/aarch-cost-tables.h"
-+
-+/* ThunderX does not have implement AArch32.  */
-+const struct cpu_cost_table thunderx_extra_costs =
-+{
-+  /* ALU */
-+  {
-+    0,			/* Arith.  */
-+    0,			/* Logical.  */
-+    0,			/* Shift.  */
-+    0,			/* Shift_reg.  */
-+    COSTS_N_INSNS (1),	/* Arith_shift.  */
-+    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* UNUSED: Log_shift.  */
-+    COSTS_N_INSNS (1),	/* UNUSED: Log_shift_reg.  */
-+    0,			/* Extend.  */
-+    COSTS_N_INSNS (1),	/* Extend_arith.  */
-+    0,			/* Bfi.  */
-+    0,			/* Bfx.  */
-+    COSTS_N_INSNS (5),	/* Clz.  */
-+    0,			/* rev.  */
-+    0,			/* UNUSED: non_exec.  */
-+    false		/* UNUSED: non_exec_costs_exec.  */
-+  },
-+  {
-+    /* MULT SImode */
-+    {
-+      COSTS_N_INSNS (3),	/* Simple.  */
-+      0,			/* Flag_setting.  */
-+      0,			/* Extend.  */
-+      0,			/* Add.  */
-+      COSTS_N_INSNS (1),	/* Extend_add.  */
-+      COSTS_N_INSNS (21)	/* Idiv.  */
-+    },
-+    /* MULT DImode */
-+    {
-+      COSTS_N_INSNS (3),	/* Simple.  */
-+      0,			/* Flag_setting.  */
-+      0,			/* Extend.  */
-+      0,			/* Add.  */
-+      COSTS_N_INSNS (1),	/* Extend_add.  */
-+      COSTS_N_INSNS (37)	/* Idiv.  */
-+    },
-+  },
-+  /* LD/ST */
-+  {
-+    COSTS_N_INSNS (2),	/* Load.  */
-+    COSTS_N_INSNS (2),	/* Load_sign_extend.  */
-+    COSTS_N_INSNS (2),	/* Ldrd.  */
-+    0,			/* N/A: Ldm_1st.  */
-+    0,			/* N/A: Ldm_regs_per_insn_1st.  */
-+    0,			/* N/A: Ldm_regs_per_insn_subsequent.  */
-+    COSTS_N_INSNS (3),	/* Loadf.  */
-+    COSTS_N_INSNS (3),	/* Loadd.  */
-+    0,  		/* N/A: Load_unaligned.  */
-+    0,			/* Store.  */
-+    0,			/* Strd.  */
-+    0,			/* N/A: Stm_1st.  */
-+    0,			/* N/A: Stm_regs_per_insn_1st.  */
-+    0,			/* N/A: Stm_regs_per_insn_subsequent.  */
-+    0,			/* Storef.  */
-+    0,			/* Stored.  */
-+    COSTS_N_INSNS (1)  /* Store_unaligned.  */
-+  },
-+  {
-+    /* FP SFmode */
-+    {
-+      COSTS_N_INSNS (11),	/* Div.  */
-+      COSTS_N_INSNS (5),	/* Mult.  */
-+      COSTS_N_INSNS (5),	/* Mult_addsub.  */
-+      COSTS_N_INSNS (5),	/* Fma.  */
-+      COSTS_N_INSNS (3),	/* Addsub.  */
-+      0,			/* Fpconst.  */
-+      COSTS_N_INSNS (1),	/* Neg.  */
-+      0,			/* Compare.  */
-+      COSTS_N_INSNS (5),	/* Widen.  */
-+      COSTS_N_INSNS (5),	/* Narrow.  */
-+      COSTS_N_INSNS (5),	/* Toint.  */
-+      COSTS_N_INSNS (5),	/* Fromint.  */
-+      COSTS_N_INSNS (1)		/* Roundint.  */
-+    },
-+    /* FP DFmode */
-+    {
-+      COSTS_N_INSNS (21),	/* Div.  */
-+      COSTS_N_INSNS (5),	/* Mult.  */
-+      COSTS_N_INSNS (5),	/* Mult_addsub.  */
-+      COSTS_N_INSNS (5),	/* Fma.  */
-+      COSTS_N_INSNS (3),	/* Addsub.  */
-+      0,			/* Fpconst.  */
-+      COSTS_N_INSNS (1),	/* Neg.  */
-+      0,			/* Compare.  */
-+      COSTS_N_INSNS (5),	/* Widen.  */
-+      COSTS_N_INSNS (5),	/* Narrow.  */
-+      COSTS_N_INSNS (5),	/* Toint.  */
-+      COSTS_N_INSNS (5),	/* Fromint.  */
-+      COSTS_N_INSNS (1)		/* Roundint.  */
-+    }
-+  },
-+  /* Vector */
-+  {
-+    COSTS_N_INSNS (1)	/* Alu.  */
-+  }
-+};
-+
-+
-+
-+#endif
-+
---- a/src/gcc/config/aarch64/aarch64-cores.def
-+++ b/src/gcc/config/aarch64/aarch64-cores.def
-@@ -34,9 +34,10 @@
- 
- /* V8 Architecture Processors.  */
- 
--AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53)
--AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
-+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa53)
-+AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57)
-+AARCH64_CORE("thunderx",    thunderx,  thunderx, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
- 
- /* V8 big.LITTLE implementations.  */
- 
--AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57)
-+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57)
---- a/src/gcc/config/aarch64/atomics.md
-+++ b/src/gcc/config/aarch64/atomics.md
-@@ -119,7 +119,7 @@
-   [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
-     (unspec_volatile:ALLI
-       [(atomic_op:ALLI (match_dup 0)
--	(match_operand:ALLI 1 "<atomic_op_operand>" "rn"))
-+	(match_operand:ALLI 1 "<atomic_op_operand>" "r<lconst_atomic>"))
-        (match_operand:SI 2 "const_int_operand")]		;; model
-       UNSPECV_ATOMIC_OP))
-        (clobber (reg:CC CC_REGNUM))
-@@ -141,7 +141,7 @@
-     (unspec_volatile:ALLI
-       [(not:ALLI
- 	(and:ALLI (match_dup 0)
--	  (match_operand:ALLI 1 "aarch64_logical_operand" "rn")))
-+	  (match_operand:ALLI 1 "aarch64_logical_operand" "r<lconst_atomic>")))
-        (match_operand:SI 2 "const_int_operand")]		;; model
-       UNSPECV_ATOMIC_OP))
-    (clobber (reg:CC CC_REGNUM))
-@@ -164,7 +164,7 @@
-    (set (match_dup 1)
-     (unspec_volatile:ALLI
-       [(atomic_op:ALLI (match_dup 1)
--	(match_operand:ALLI 2 "<atomic_op_operand>" "rn"))
-+	(match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>"))
-        (match_operand:SI 3 "const_int_operand")]		;; model
-       UNSPECV_ATOMIC_OP))
-    (clobber (reg:CC CC_REGNUM))
-@@ -188,7 +188,7 @@
-     (unspec_volatile:ALLI
-       [(not:ALLI
- 	 (and:ALLI (match_dup 1)
--	   (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))
-+	   (match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>")))
-        (match_operand:SI 3 "const_int_operand")]		;; model
-       UNSPECV_ATOMIC_OP))
-    (clobber (reg:CC CC_REGNUM))
-@@ -209,7 +209,7 @@
-   [(set (match_operand:ALLI 0 "register_operand" "=&r")
-     (atomic_op:ALLI
-       (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
--      (match_operand:ALLI 2 "<atomic_op_operand>" "rn")))
-+      (match_operand:ALLI 2 "<atomic_op_operand>" "r<lconst_atomic>")))
-    (set (match_dup 1)
-     (unspec_volatile:ALLI
-       [(match_dup 1) (match_dup 2)
-@@ -233,7 +233,7 @@
-     (not:ALLI
-       (and:ALLI
- 	(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
--	(match_operand:ALLI 2 "aarch64_logical_operand" "rn"))))
-+	(match_operand:ALLI 2 "aarch64_logical_operand" "r<lconst_atomic>"))))
-    (set (match_dup 1)
-     (unspec_volatile:ALLI
-       [(match_dup 1) (match_dup 2)
---- a/src/gcc/config/aarch64/aarch64-tune.md
-+++ b/src/gcc/config/aarch64/aarch64-tune.md
-@@ -1,5 +1,5 @@
- ;; -*- buffer-read-only: t -*-
- ;; Generated automatically by gentune.sh from aarch64-cores.def
- (define_attr "tune"
--	"cortexa53,cortexa15,cortexa57cortexa53"
-+	"cortexa53,cortexa15,thunderx,cortexa57cortexa53"
- 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
---- a/src/gcc/config/aarch64/aarch64-builtins.c
-+++ b/src/gcc/config/aarch64/aarch64-builtins.c
-@@ -47,52 +47,27 @@
- #include "gimple.h"
- #include "gimple-iterator.h"
- 
--enum aarch64_simd_builtin_type_mode
--{
--  T_V8QI,
--  T_V4HI,
--  T_V2SI,
--  T_V2SF,
--  T_DI,
--  T_DF,
--  T_V16QI,
--  T_V8HI,
--  T_V4SI,
--  T_V4SF,
--  T_V2DI,
--  T_V2DF,
--  T_TI,
--  T_EI,
--  T_OI,
--  T_XI,
--  T_SI,
--  T_SF,
--  T_HI,
--  T_QI,
--  T_MAX
--};
--
--#define v8qi_UP  T_V8QI
--#define v4hi_UP  T_V4HI
--#define v2si_UP  T_V2SI
--#define v2sf_UP  T_V2SF
--#define di_UP    T_DI
--#define df_UP    T_DF
--#define v16qi_UP T_V16QI
--#define v8hi_UP  T_V8HI
--#define v4si_UP  T_V4SI
--#define v4sf_UP  T_V4SF
--#define v2di_UP  T_V2DI
--#define v2df_UP  T_V2DF
--#define ti_UP	 T_TI
--#define ei_UP	 T_EI
--#define oi_UP	 T_OI
--#define xi_UP	 T_XI
--#define si_UP    T_SI
--#define sf_UP    T_SF
--#define hi_UP    T_HI
--#define qi_UP    T_QI
--
-+#define v8qi_UP  V8QImode
-+#define v4hi_UP  V4HImode
-+#define v2si_UP  V2SImode
-+#define v2sf_UP  V2SFmode
-+#define di_UP    DImode
-+#define df_UP    DFmode
-+#define v16qi_UP V16QImode
-+#define v8hi_UP  V8HImode
-+#define v4si_UP  V4SImode
-+#define v4sf_UP  V4SFmode
-+#define v2di_UP  V2DImode
-+#define v2df_UP  V2DFmode
-+#define ti_UP	 TImode
-+#define ei_UP	 EImode
-+#define oi_UP	 OImode
-+#define ci_UP	 CImode
-+#define xi_UP	 XImode
-+#define si_UP    SImode
-+#define sf_UP    SFmode
-+#define hi_UP    HImode
-+#define qi_UP    QImode
- #define UP(X) X##_UP
- 
- #define SIMD_MAX_BUILTIN_ARGS 5
-@@ -107,8 +82,6 @@
-   qualifier_const = 0x2, /* 1 << 1  */
-   /* T *foo.  */
-   qualifier_pointer = 0x4, /* 1 << 2  */
--  /* const T *foo.  */
--  qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer  */
-   /* Used when expanding arguments if an operand could
-      be an immediate.  */
-   qualifier_immediate = 0x8, /* 1 << 3  */
-@@ -123,7 +96,7 @@
-   qualifier_map_mode = 0x80, /* 1 << 7  */
-   /* qualifier_pointer | qualifier_map_mode  */
-   qualifier_pointer_map_mode = 0x84,
--  /* qualifier_const_pointer | qualifier_map_mode  */
-+  /* qualifier_const | qualifier_pointer | qualifier_map_mode  */
-   qualifier_const_pointer_map_mode = 0x86,
-   /* Polynomial types.  */
-   qualifier_poly = 0x100
-@@ -132,7 +105,7 @@
- typedef struct
- {
-   const char *name;
--  enum aarch64_simd_builtin_type_mode mode;
-+  enum machine_mode mode;
-   const enum insn_code code;
-   unsigned int fcode;
-   enum aarch64_type_qualifiers *qualifiers;
-@@ -147,16 +120,49 @@
-   = { qualifier_unsigned, qualifier_unsigned };
- #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
- #define TYPES_CREATE (aarch64_types_unop_qualifiers)
--#define TYPES_REINTERP (aarch64_types_unop_qualifiers)
-+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_none, qualifier_unsigned };
-+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_none, qualifier_poly };
-+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_unsigned, qualifier_none };
-+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_poly, qualifier_none };
-+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
- #define TYPES_BINOP (aarch64_types_binop_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_none, qualifier_none, qualifier_none,
-+      qualifier_internal, qualifier_internal };
-+#define TYPES_TST (aarch64_types_cmtst_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_void, qualifier_none, qualifier_none };
-+#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
- #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
-+#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_none, qualifier_none, qualifier_unsigned };
-+#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_poly, qualifier_poly, qualifier_poly };
- #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
-@@ -172,10 +178,10 @@
- #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
- 
- static enum aarch64_type_qualifiers
--aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_none, qualifier_none, qualifier_none,
--      qualifier_none, qualifier_none };
--#define TYPES_QUADOP (aarch64_types_quadop_qualifiers)
-+      qualifier_none, qualifier_immediate };
-+#define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
- 
- static enum aarch64_type_qualifiers
- aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-@@ -183,9 +189,14 @@
- #define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
- #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
- static enum aarch64_type_qualifiers
-+aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_unsigned, qualifier_none, qualifier_immediate };
-+#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
-+static enum aarch64_type_qualifiers
- aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
- #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
-+
- static enum aarch64_type_qualifiers
- aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
-@@ -194,6 +205,13 @@
- #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers)
- 
- static enum aarch64_type_qualifiers
-+aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
-+      qualifier_immediate };
-+#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
-+
-+
-+static enum aarch64_type_qualifiers
- aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-   = { qualifier_none, qualifier_none, qualifier_none };
- #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
-@@ -230,6 +248,11 @@
-   = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
- #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
- #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
-+static enum aarch64_type_qualifiers
-+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-+  = { qualifier_void, qualifier_pointer_map_mode,
-+      qualifier_none, qualifier_none };
-+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
- 
- #define CF0(N, X) CODE_FOR_aarch64_##N##X
- #define CF1(N, X) CODE_FOR_##N##X##1
-@@ -239,7 +262,7 @@
- #define CF10(N, X) CODE_FOR_##N##X
- 
- #define VAR1(T, N, MAP, A) \
--  {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T},
-+  {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
- #define VAR2(T, N, MAP, A, B) \
-   VAR1 (T, N, MAP, A) \
-   VAR1 (T, N, MAP, B)
-@@ -274,96 +297,34 @@
-   VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
-   VAR1 (T, N, MAP, L)
- 
--/* BUILTIN_<ITERATOR> macros should expand to cover the same range of
--   modes as is given for each define_mode_iterator in
--   config/aarch64/iterators.md.  */
-+#include "aarch64-builtin-iterators.h"
- 
--#define BUILTIN_DX(T, N, MAP) \
--  VAR2 (T, N, MAP, di, df)
--#define BUILTIN_GPF(T, N, MAP) \
--  VAR2 (T, N, MAP, sf, df)
--#define BUILTIN_SDQ_I(T, N, MAP) \
--  VAR4 (T, N, MAP, qi, hi, si, di)
--#define BUILTIN_SD_HSI(T, N, MAP) \
--  VAR2 (T, N, MAP, hi, si)
--#define BUILTIN_V2F(T, N, MAP) \
--  VAR2 (T, N, MAP, v2sf, v2df)
--#define BUILTIN_VALL(T, N, MAP) \
--  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
--	 v4si, v2di, v2sf, v4sf, v2df)
--#define BUILTIN_VALLDI(T, N, MAP) \
--  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
--	 v4si, v2di, v2sf, v4sf, v2df, di)
--#define BUILTIN_VALLDIF(T, N, MAP) \
--  VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
--	 v4si, v2di, v2sf, v4sf, v2df, di, df)
--#define BUILTIN_VB(T, N, MAP) \
--  VAR2 (T, N, MAP, v8qi, v16qi)
--#define BUILTIN_VD(T, N, MAP) \
--  VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf)
--#define BUILTIN_VDC(T, N, MAP) \
--  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
--#define BUILTIN_VDIC(T, N, MAP) \
--  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VDN(T, N, MAP) \
--  VAR3 (T, N, MAP, v4hi, v2si, di)
--#define BUILTIN_VDQ(T, N, MAP) \
--  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
--#define BUILTIN_VDQF(T, N, MAP) \
--  VAR3 (T, N, MAP, v2sf, v4sf, v2df)
--#define BUILTIN_VDQH(T, N, MAP) \
--  VAR2 (T, N, MAP, v4hi, v8hi)
--#define BUILTIN_VDQHS(T, N, MAP) \
--  VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQIF(T, N, MAP) \
--  VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df)
--#define BUILTIN_VDQM(T, N, MAP) \
--  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQV(T, N, MAP) \
--  VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si)
--#define BUILTIN_VDQQH(T, N, MAP) \
--  VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi)
--#define BUILTIN_VDQ_BHSI(T, N, MAP) \
--  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VDQ_I(T, N, MAP) \
--  VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di)
--#define BUILTIN_VDW(T, N, MAP) \
--  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VD_BHSI(T, N, MAP) \
--  VAR3 (T, N, MAP, v8qi, v4hi, v2si)
--#define BUILTIN_VD_HSI(T, N, MAP) \
--  VAR2 (T, N, MAP, v4hi, v2si)
--#define BUILTIN_VD_RE(T, N, MAP) \
--  VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df)
--#define BUILTIN_VQ(T, N, MAP) \
--  VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df)
--#define BUILTIN_VQN(T, N, MAP) \
--  VAR3 (T, N, MAP, v8hi, v4si, v2di)
--#define BUILTIN_VQW(T, N, MAP) \
--  VAR3 (T, N, MAP, v16qi, v8hi, v4si)
--#define BUILTIN_VQ_HSI(T, N, MAP) \
--  VAR2 (T, N, MAP, v8hi, v4si)
--#define BUILTIN_VQ_S(T, N, MAP) \
--  VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si)
--#define BUILTIN_VSDQ_HSI(T, N, MAP) \
--  VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si)
--#define BUILTIN_VSDQ_I(T, N, MAP) \
--  VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di)
--#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \
--  VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si)
--#define BUILTIN_VSDQ_I_DI(T, N, MAP) \
--  VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di)
--#define BUILTIN_VSD_HSI(T, N, MAP) \
--  VAR4 (T, N, MAP, v4hi, v2si, hi, si)
--#define BUILTIN_VSQN_HSDI(T, N, MAP) \
--  VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di)
--#define BUILTIN_VSTRUCT(T, N, MAP) \
--  VAR3 (T, N, MAP, oi, ci, xi)
--
- static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
- #include "aarch64-simd-builtins.def"
- };
- 
-+/* There's only 8 CRC32 builtins.  Probably not worth their own .def file.  */
-+#define AARCH64_CRC32_BUILTINS \
-+  CRC32_BUILTIN (crc32b, QI) \
-+  CRC32_BUILTIN (crc32h, HI) \
-+  CRC32_BUILTIN (crc32w, SI) \
-+  CRC32_BUILTIN (crc32x, DI) \
-+  CRC32_BUILTIN (crc32cb, QI) \
-+  CRC32_BUILTIN (crc32ch, HI) \
-+  CRC32_BUILTIN (crc32cw, SI) \
-+  CRC32_BUILTIN (crc32cx, DI)
-+
-+typedef struct
-+{
-+  const char *name;
-+  enum machine_mode mode;
-+  const enum insn_code icode;
-+  unsigned int fcode;
-+} aarch64_crc_builtin_datum;
-+
-+#define CRC32_BUILTIN(N, M) \
-+  AARCH64_BUILTIN_##N,
-+
- #undef VAR1
- #define VAR1(T, N, MAP, A) \
-   AARCH64_SIMD_BUILTIN_##T##_##N##A,
-@@ -371,13 +332,32 @@
- enum aarch64_builtins
- {
-   AARCH64_BUILTIN_MIN,
-+
-+  AARCH64_BUILTIN_GET_FPCR,
-+  AARCH64_BUILTIN_SET_FPCR,
-+  AARCH64_BUILTIN_GET_FPSR,
-+  AARCH64_BUILTIN_SET_FPSR,
-+
-   AARCH64_SIMD_BUILTIN_BASE,
- #include "aarch64-simd-builtins.def"
-   AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE
- 			      + ARRAY_SIZE (aarch64_simd_builtin_data),
-+  AARCH64_CRC32_BUILTIN_BASE,
-+  AARCH64_CRC32_BUILTINS
-+  AARCH64_CRC32_BUILTIN_MAX,
-   AARCH64_BUILTIN_MAX
- };
- 
-+#undef CRC32_BUILTIN
-+#define CRC32_BUILTIN(N, M) \
-+  {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
-+
-+static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
-+  AARCH64_CRC32_BUILTINS
-+};
-+
-+#undef CRC32_BUILTIN
-+
- static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
- 
- #define NUM_DREG_TYPES 6
-@@ -639,25 +619,10 @@
-       bool print_type_signature_p = false;
-       char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 };
-       aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
--      const char *const modenames[] =
--	{
--	  "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
--	  "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
--	  "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
--	};
--      const enum machine_mode modes[] =
--	{
--	  V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode,
--	  V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode,
--	  V2DFmode, TImode, EImode, OImode, XImode, SImode,
--	  SFmode, HImode, QImode
--	};
-       char namebuf[60];
-       tree ftype = NULL;
-       tree fndecl = NULL;
- 
--      gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
--
-       d->fcode = fcode;
- 
-       /* We must track two variables here.  op_num is
-@@ -705,7 +670,7 @@
- 	  /* Some builtins have different user-facing types
- 	     for certain arguments, encoded in d->mode.  */
- 	  if (qualifiers & qualifier_map_mode)
--	      op_mode = modes[d->mode];
-+	      op_mode = d->mode;
- 
- 	  /* For pointers, we want a pointer to the basic type
- 	     of the vector.  */
-@@ -737,11 +702,11 @@
-       gcc_assert (ftype != NULL);
- 
-       if (print_type_signature_p)
--	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s",
--		  d->name, modenames[d->mode], type_signature);
-+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
-+		  d->name, type_signature);
-       else
--	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s",
--		  d->name, modenames[d->mode]);
-+	snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
-+		  d->name);
- 
-       fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD,
- 				     NULL, NULL_TREE);
-@@ -749,11 +714,49 @@
-     }
- }
- 
-+static void
-+aarch64_init_crc32_builtins ()
-+{
-+  tree usi_type = aarch64_build_unsigned_type (SImode);
-+  unsigned int i = 0;
-+
-+  for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
-+    {
-+      aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
-+      tree argtype = aarch64_build_unsigned_type (d->mode);
-+      tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
-+      tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
-+                                          BUILT_IN_MD, NULL, NULL_TREE);
-+
-+      aarch64_builtin_decls[d->fcode] = fndecl;
-+    }
-+}
-+
- void
- aarch64_init_builtins (void)
- {
-+  tree ftype_set_fpr
-+    = build_function_type_list (void_type_node, unsigned_type_node, NULL);
-+  tree ftype_get_fpr
-+    = build_function_type_list (unsigned_type_node, NULL);
-+
-+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
-+    = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr,
-+			    AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
-+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
-+    = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr,
-+			    AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE);
-+  aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
-+    = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr,
-+			    AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
-+  aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
-+    = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
-+			    AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
-+
-   if (TARGET_SIMD)
-     aarch64_init_simd_builtins ();
-+  if (TARGET_CRC32)
-+    aarch64_init_crc32_builtins ();
- }
- 
- tree
-@@ -774,9 +777,8 @@
- 
- static rtx
- aarch64_simd_expand_args (rtx target, int icode, int have_retval,
--			  tree exp, ...)
-+			  tree exp, builtin_simd_arg *args)
- {
--  va_list ap;
-   rtx pat;
-   tree arg[SIMD_MAX_BUILTIN_ARGS];
-   rtx op[SIMD_MAX_BUILTIN_ARGS];
-@@ -790,11 +792,9 @@
- 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
-     target = gen_reg_rtx (tmode);
- 
--  va_start (ap, exp);
--
-   for (;;)
-     {
--      builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int);
-+      builtin_simd_arg thisarg = args[argc];
- 
-       if (thisarg == SIMD_ARG_STOP)
- 	break;
-@@ -818,8 +818,11 @@
- 	    case SIMD_ARG_CONSTANT:
- 	      if (!(*insn_data[icode].operand[argc + have_retval].predicate)
- 		  (op[argc], mode[argc]))
-+	      {
- 		error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
- 		       "expected %<const int%>", argc + 1);
-+		return const0_rtx;
-+	      }
- 	      break;
- 
- 	    case SIMD_ARG_STOP:
-@@ -830,8 +833,6 @@
- 	}
-     }
- 
--  va_end (ap);
--
-   if (have_retval)
-     switch (argc)
-       {
-@@ -886,7 +887,7 @@
-       }
- 
-   if (!pat)
--    return 0;
-+    return NULL_RTX;
- 
-   emit_insn (pat);
- 
-@@ -945,14 +946,45 @@
-   /* The interface to aarch64_simd_expand_args expects a 0 if
-      the function is void, and a 1 if it is not.  */
-   return aarch64_simd_expand_args
--	  (target, icode, !is_void, exp,
--	   args[1],
--	   args[2],
--	   args[3],
--	   args[4],
--	   SIMD_ARG_STOP);
-+	  (target, icode, !is_void, exp, &args[1]);
- }
- 
-+rtx
-+aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
-+{
-+  rtx pat;
-+  aarch64_crc_builtin_datum *d
-+    = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
-+  enum insn_code icode = d->icode;
-+  tree arg0 = CALL_EXPR_ARG (exp, 0);
-+  tree arg1 = CALL_EXPR_ARG (exp, 1);
-+  rtx op0 = expand_normal (arg0);
-+  rtx op1 = expand_normal (arg1);
-+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
-+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
-+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
-+
-+  if (! target
-+      || GET_MODE (target) != tmode
-+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-+    target = gen_reg_rtx (tmode);
-+
-+  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
-+	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-+
-+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-+    op0 = copy_to_mode_reg (mode0, op0);
-+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-+    op1 = copy_to_mode_reg (mode1, op1);
-+
-+  pat = GEN_FCN (icode) (target, op0, op1);
-+  if (!pat)
-+    return NULL_RTX;
-+
-+  emit_insn (pat);
-+  return target;
-+}
-+
- /* Expand an expression EXP that calls a built-in function,
-    with result going to TARGET if that's convenient.  */
- rtx
-@@ -964,11 +996,43 @@
- {
-   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-   int fcode = DECL_FUNCTION_CODE (fndecl);
-+  int icode;
-+  rtx pat, op0;
-+  tree arg0;
- 
--  if (fcode >= AARCH64_SIMD_BUILTIN_BASE)
-+  switch (fcode)
-+    {
-+    case AARCH64_BUILTIN_GET_FPCR:
-+    case AARCH64_BUILTIN_SET_FPCR:
-+    case AARCH64_BUILTIN_GET_FPSR:
-+    case AARCH64_BUILTIN_SET_FPSR:
-+      if ((fcode == AARCH64_BUILTIN_GET_FPCR)
-+	  || (fcode == AARCH64_BUILTIN_GET_FPSR))
-+	{
-+	  icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
-+	    CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
-+	  target = gen_reg_rtx (SImode);
-+	  pat = GEN_FCN (icode) (target);
-+	}
-+      else
-+	{
-+	  target = NULL_RTX;
-+	  icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
-+	    CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
-+	  arg0 = CALL_EXPR_ARG (exp, 0);
-+	  op0 = expand_normal (arg0);
-+	  pat = GEN_FCN (icode) (op0);
-+	}
-+      emit_insn (pat);
-+      return target;
-+    }
-+
-+  if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
-     return aarch64_simd_expand_builtin (fcode, exp, target);
-+  else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
-+    return aarch64_crc32_expand_builtin (fcode, exp, target);
- 
--  return NULL_RTX;
-+  gcc_unreachable ();
- }
- 
- tree
-@@ -1086,7 +1150,29 @@
- 
- 	    return aarch64_builtin_decls[builtin];
- 	  }
--
-+	case BUILT_IN_BSWAP16:
-+#undef AARCH64_CHECK_BUILTIN_MODE
-+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-+  (out_mode == N##Imode && out_n == C \
-+   && in_mode == N##Imode && in_n == C)
-+	  if (AARCH64_CHECK_BUILTIN_MODE (4, H))
-+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
-+	  else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
-+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
-+	  else
-+	    return NULL_TREE;
-+	case BUILT_IN_BSWAP32:
-+	  if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
-+	  else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
-+	  else
-+	    return NULL_TREE;
-+	case BUILT_IN_BSWAP64:
-+	  if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-+	    return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
-+	  else
-+	    return NULL_TREE;
- 	default:
- 	  return NULL_TREE;
-       }
-@@ -1111,22 +1197,25 @@
-       BUILTIN_VALLDI (UNOP, abs, 2)
- 	return fold_build1 (ABS_EXPR, type, args[0]);
- 	break;
--      BUILTIN_VALLDI (BINOP, cmge, 0)
--	return fold_build2 (GE_EXPR, type, args[0], args[1]);
--	break;
--      BUILTIN_VALLDI (BINOP, cmgt, 0)
--	return fold_build2 (GT_EXPR, type, args[0], args[1]);
--	break;
--      BUILTIN_VALLDI (BINOP, cmeq, 0)
--	return fold_build2 (EQ_EXPR, type, args[0], args[1]);
--	break;
--      BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
--	{
--	  tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
--	  tree vec_zero_node = build_zero_cst (type);
--	  return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
--	  break;
--	}
-+      VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-+      VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-+      VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-+      VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-+      VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-+      BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
-+      BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-+      VAR1 (REINTERP_US, reinterpretdi, 0, df)
-+      VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-+      VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-+      VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-+      VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-+      BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-+      VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-+      VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-+      VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-+      VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-+      VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
-+	return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
-       VAR1 (UNOP, floatv2si, 2, v2sf)
-       VAR1 (UNOP, floatv4si, 2, v4sf)
-       VAR1 (UNOP, floatv2di, 2, v2df)
-@@ -1146,6 +1235,20 @@
-   tree call = gimple_call_fn (stmt);
-   tree fndecl;
-   gimple new_stmt = NULL;
-+
-+  /* The operations folded below are reduction operations.  These are
-+     defined to leave their result in the 0'th element (from the perspective
-+     of GCC).  The architectural instruction we are folding will leave the
-+     result in the 0'th element (from the perspective of the architecture).
-+     For big-endian systems, these perspectives are not aligned.
-+
-+     It is therefore wrong to perform this fold on big-endian.  There
-+     are some tricks we could play with shuffling, but the mid-end is
-+     inconsistent in the way it treats reduction operations, so we will
-+     end up in difficulty.  Until we fix the ambiguity - just bail out.  */
-+  if (BYTES_BIG_ENDIAN)
-+    return false;
-+
-   if (call)
-     {
-       fndecl = gimple_call_fndecl (stmt);
-@@ -1196,43 +1299,108 @@
-   return changed;
- }
- 
-+void
-+aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
-+{
-+  const unsigned AARCH64_FE_INVALID = 1;
-+  const unsigned AARCH64_FE_DIVBYZERO = 2;
-+  const unsigned AARCH64_FE_OVERFLOW = 4;
-+  const unsigned AARCH64_FE_UNDERFLOW = 8;
-+  const unsigned AARCH64_FE_INEXACT = 16;
-+  const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
-+							| AARCH64_FE_DIVBYZERO
-+							| AARCH64_FE_OVERFLOW
-+							| AARCH64_FE_UNDERFLOW
-+							| AARCH64_FE_INEXACT);
-+  const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
-+  tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
-+  tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
-+  tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
-+  tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
-+
-+  /* Generate the equivalence of :
-+       unsigned int fenv_cr;
-+       fenv_cr = __builtin_aarch64_get_fpcr ();
-+
-+       unsigned int fenv_sr;
-+       fenv_sr = __builtin_aarch64_get_fpsr ();
-+
-+       Now set all exceptions to non-stop
-+       unsigned int mask_cr
-+		= ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
-+       unsigned int masked_cr;
-+       masked_cr = fenv_cr & mask_cr;
-+
-+       And clear all exception flags
-+       unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
-+       unsigned int masked_cr;
-+       masked_sr = fenv_sr & mask_sr;
-+
-+       __builtin_aarch64_set_cr (masked_cr);
-+       __builtin_aarch64_set_sr (masked_sr);  */
-+
-+  fenv_cr = create_tmp_var (unsigned_type_node, NULL);
-+  fenv_sr = create_tmp_var (unsigned_type_node, NULL);
-+
-+  get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
-+  set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
-+  get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
-+  set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
-+
-+  mask_cr = build_int_cst (unsigned_type_node,
-+			   ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
-+  mask_sr = build_int_cst (unsigned_type_node,
-+			   ~(AARCH64_FE_ALL_EXCEPT));
-+
-+  ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
-+		    fenv_cr, build_call_expr (get_fpcr, 0));
-+  ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
-+		    fenv_sr, build_call_expr (get_fpsr, 0));
-+
-+  masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
-+  masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
-+
-+  hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
-+  hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
-+
-+  hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
-+			hold_fnclex_sr);
-+  masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
-+			masked_fenv_sr);
-+  ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
-+
-+  *hold = build2 (COMPOUND_EXPR, void_type_node,
-+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
-+		  hold_fnclex);
-+
-+  /* Store the value of masked_fenv to clear the exceptions:
-+     __builtin_aarch64_set_fpsr (masked_fenv_sr);  */
-+
-+  *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
-+
-+  /* Generate the equivalent of :
-+       unsigned int new_fenv_var;
-+       new_fenv_var = __builtin_aarch64_get_fpsr ();
-+
-+       __builtin_aarch64_set_fpsr (fenv_sr);
-+
-+       __atomic_feraiseexcept (new_fenv_var);  */
-+
-+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
-+			new_fenv_var, build_call_expr (get_fpsr, 0));
-+  restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
-+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
-+  update_call = build_call_expr (atomic_feraiseexcept, 1,
-+				 fold_convert (integer_type_node, new_fenv_var));
-+  *update = build2 (COMPOUND_EXPR, void_type_node,
-+		    build2 (COMPOUND_EXPR, void_type_node,
-+			    reload_fenv, restore_fnenv), update_call);
-+}
-+
-+
- #undef AARCH64_CHECK_BUILTIN_MODE
- #undef AARCH64_FIND_FRINT_VARIANT
--#undef BUILTIN_DX
--#undef BUILTIN_SDQ_I
--#undef BUILTIN_SD_HSI
--#undef BUILTIN_V2F
--#undef BUILTIN_VALL
--#undef BUILTIN_VB
--#undef BUILTIN_VD
--#undef BUILTIN_VDC
--#undef BUILTIN_VDIC
--#undef BUILTIN_VDN
--#undef BUILTIN_VDQ
--#undef BUILTIN_VDQF
--#undef BUILTIN_VDQH
--#undef BUILTIN_VDQHS
--#undef BUILTIN_VDQIF
--#undef BUILTIN_VDQM
--#undef BUILTIN_VDQV
--#undef BUILTIN_VDQ_BHSI
--#undef BUILTIN_VDQ_I
--#undef BUILTIN_VDW
--#undef BUILTIN_VD_BHSI
--#undef BUILTIN_VD_HSI
--#undef BUILTIN_VD_RE
--#undef BUILTIN_VQ
--#undef BUILTIN_VQN
--#undef BUILTIN_VQW
--#undef BUILTIN_VQ_HSI
--#undef BUILTIN_VQ_S
--#undef BUILTIN_VSDQ_HSI
--#undef BUILTIN_VSDQ_I
--#undef BUILTIN_VSDQ_I_BHSI
--#undef BUILTIN_VSDQ_I_DI
--#undef BUILTIN_VSD_HSI
--#undef BUILTIN_VSQN_HSDI
--#undef BUILTIN_VSTRUCT
- #undef CF0
- #undef CF1
- #undef CF2
-@@ -1251,3 +1419,4 @@
- #undef VAR10
- #undef VAR11
- 
-+#include "gt-aarch64-builtins.h"
---- a/src/gcc/config/aarch64/thunderx.md
-+++ b/src/gcc/config/aarch64/thunderx.md
-@@ -0,0 +1,260 @@
-+;; Cavium ThunderX pipeline description
-+;; Copyright (C) 2014 Free Software Foundation, Inc.
-+;;
-+;; Written by Andrew Pinski  <apinski@cavium.com>
-+
-+;; This file is part of GCC.
-+
-+;; GCC is free software; you can redistribute it and/or modify
-+;; it under the terms of the GNU General Public License as published by
-+;; the Free Software Foundation; either version 3, or (at your option)
-+;; any later version.
-+
-+;; GCC is distributed in the hope that it will be useful,
-+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+;; GNU General Public License for more details.
-+
-+;; You should have received a copy of the GNU General Public License
-+;; along with GCC; see the file COPYING3.  If not see
-+;; <http://www.gnu.org/licenses/>.
-+;;   Copyright (C) 2004, 2005, 2006 Cavium Networks.
-+
-+
-+;; Thunder is a dual-issue processor that can issue all instructions on
-+;; pipe0 and a subset on pipe1.
-+
-+
-+(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
-+
-+(define_cpu_unit "thunderx_pipe0" "thunderx_main")
-+(define_cpu_unit "thunderx_pipe1" "thunderx_main")
-+(define_cpu_unit "thunderx_mult" "thunderx_mult")
-+(define_cpu_unit "thunderx_divide" "thunderx_divide")
-+(define_cpu_unit "thunderx_simd" "thunderx_simd")
-+
-+(define_insn_reservation "thunderx_add" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_reg,alus_imm,alus_reg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
-+  "thunderx_pipe0 | thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_shift" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
-+  "thunderx_pipe0 | thunderx_pipe1")
-+
-+
-+;; Arthimentic instructions with an extra shift or extend is two cycles.
-+;; FIXME: This needs more attributes on aarch64 than what is currently there;
-+;;    this is conserative for now.
-+;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
-+;; Except this is not correct as this is only for !(zero extend)
-+
-+(define_insn_reservation "thunderx_arith_shift" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
-+  "thunderx_pipe0 | thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_csel" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "csel"))
-+  "thunderx_pipe0 | thunderx_pipe1")
-+
-+;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
-+
-+(define_insn_reservation "thunderx_mul" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
-+  "thunderx_pipe1 + thunderx_mult")
-+
-+;; Multiply high instructions take an extra cycle and cause the muliply unit to
-+;; be busy for an extra cycle.
-+
-+;(define_insn_reservation "thunderx_mul_high" 5
-+;  (and (eq_attr "tune" "thunderx")
-+;       (eq_attr "type" "smull,umull"))
-+;  "thunderx_pipe1 + thunderx_mult")
-+
-+(define_insn_reservation "thunderx_div32" 22
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "udiv,sdiv"))
-+  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
-+
-+;(define_insn_reservation "thunderx_div64" 38
-+;  (and (eq_attr "tune" "thunderx")
-+;       (eq_attr "type" "udiv,sdiv")
-+;       (eq_attr "mode" "DI"))
-+;  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
-+
-+;; Stores take one cycle in pipe 0
-+(define_insn_reservation "thunderx_store" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "store1"))
-+  "thunderx_pipe0")
-+
-+;; Store pair are single issued
-+(define_insn_reservation "thunderx_storepair" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "store2"))
-+  "thunderx_pipe0 + thunderx_pipe1")
-+
-+
-+;; loads (and load pairs) from L1 take 3 cycles in pipe 0
-+(define_insn_reservation "thunderx_load" 3
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "load1, load2"))
-+  "thunderx_pipe0")
-+
-+(define_insn_reservation "thunderx_brj" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "branch,trap,call"))
-+  "thunderx_pipe1")
-+
-+;; FPU
-+
-+(define_insn_reservation "thunderx_fadd" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "faddd,fadds"))
-+  "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fconst" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fconsts,fconstd"))
-+  "thunderx_pipe1")
-+
-+;; Moves between fp are 2 cycles including min/max/select/abs/neg
-+(define_insn_reservation "thunderx_fmov" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
-+  "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fmovgpr" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "f_mrc, f_mcr"))
-+  "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fmul" 6
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
-+  "thunderx_pipe1")
-+
-+(define_insn_reservation "thunderx_fdivs" 12
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fdivs"))
-+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
-+
-+(define_insn_reservation "thunderx_fdivd" 22
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fdivd"))
-+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
-+
-+(define_insn_reservation "thunderx_fsqrts" 17
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fsqrts"))
-+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
-+
-+(define_insn_reservation "thunderx_fsqrtd" 28
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "fsqrtd"))
-+  "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
-+
-+;; The rounding conversion inside fp is 4 cycles
-+(define_insn_reservation "thunderx_frint" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "f_rints,f_rintd"))
-+  "thunderx_pipe1")
-+
-+;; Float to integer with a move from int to/from float is 6 cycles
-+(define_insn_reservation "thunderx_f_cvt" 6
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
-+  "thunderx_pipe1")
-+
-+;; FP/SIMD load/stores happen in pipe 0
-+;; 64bit Loads register/pairs are 4 cycles from L1
-+(define_insn_reservation "thunderx_64simd_fp_load" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
-+			neon_load1_1reg_q,neon_load1_2reg"))
-+  "thunderx_pipe0")
-+
-+;; 128bit load pair is singled issue and 4 cycles from L1
-+(define_insn_reservation "thunderx_128simd_pair_load" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_load1_2reg_q"))
-+  "thunderx_pipe0+thunderx_pipe1")
-+
-+;; FP/SIMD Stores takes one cycle in pipe 0
-+(define_insn_reservation "thunderx_simd_fp_store" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
-+  "thunderx_pipe0")
-+
-+;; 64bit neon store pairs are single issue for one cycle
-+(define_insn_reservation "thunderx_64neon_storepair" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_store1_2reg"))
-+  "thunderx_pipe0 + thunderx_pipe1")
-+
-+;; 128bit neon store pair are single issued for two cycles
-+(define_insn_reservation "thunderx_128neon_storepair" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_store1_2reg_q"))
-+  "(thunderx_pipe0 + thunderx_pipe1)*2")
-+
-+
-+;; SIMD/NEON (q forms take an extra cycle)
-+
-+;; Thunder simd move instruction types - 2/3 cycles
-+(define_insn_reservation "thunderx_neon_move" 2
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
-+			neon_fp_compare_d, neon_move"))
-+  "thunderx_pipe1 + thunderx_simd")
-+
-+(define_insn_reservation "thunderx_neon_move_q" 3
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
-+			neon_fp_compare_d_q, neon_move_q"))
-+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
-+
-+
-+;; Thunder simd simple/add instruction types - 4/5 cycles
-+
-+(define_insn_reservation "thunderx_neon_add" 4
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
-+			neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
-+			neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
-+			neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
-+			neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
-+			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
-+  "thunderx_pipe1 + thunderx_simd")
-+
-+;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
-+
-+(define_insn_reservation "thunderx_neon_add_q" 5
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
-+			neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
-+			neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
-+			neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
-+			neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
-+			neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
-+			neon_add_long, neon_sub_long"))
-+  "thunderx_pipe1 + thunderx_simd, thunderx_simd")
-+
-+
-+;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
-+(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-+(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
-+
-+;; Assume both pipes are needed for unknown and multiple-instruction
-+;; patterns.
-+
-+(define_insn_reservation "thunderx_unknown" 1
-+  (and (eq_attr "tune" "thunderx")
-+       (eq_attr "type" "untyped,multiple"))
-+  "thunderx_pipe0 + thunderx_pipe1")
-+
-+
---- a/src/gcc/config/aarch64/aarch64-protos.h
-+++ b/src/gcc/config/aarch64/aarch64-protos.h
-@@ -108,9 +108,22 @@
-    cost models and vectors for address cost calculations, register
-    move costs and memory move costs.  */
- 
-+/* Scaled addressing modes can vary cost depending on the mode of the
-+   value to be loaded/stored.  QImode values cannot use scaled
-+   addressing modes.  */
-+
-+struct scale_addr_mode_cost
-+{
-+  const int hi;
-+  const int si;
-+  const int di;
-+  const int ti;
-+};
-+
- /* Additional cost for addresses.  */
- struct cpu_addrcost_table
- {
-+  const struct scale_addr_mode_cost addr_scale_costs;
-   const int pre_modify;
-   const int post_modify;
-   const int register_offset;
-@@ -160,6 +173,7 @@
- };
- 
- HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
-+int aarch64_get_condition_code (rtx);
- bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
- bool aarch64_cannot_change_mode_class (enum machine_mode,
- 				       enum machine_mode,
-@@ -166,7 +180,9 @@
- 				       enum reg_class);
- enum aarch64_symbol_type
- aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
-+bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
- bool aarch64_constant_address_p (rtx);
-+bool aarch64_expand_movmem (rtx *);
- bool aarch64_float_const_zero_rtx_p (rtx);
- bool aarch64_function_arg_regno_p (unsigned);
- bool aarch64_gen_movmemqi (rtx *);
-@@ -175,9 +191,12 @@
- bool aarch64_is_long_call_p (rtx);
- bool aarch64_label_mentioned_p (rtx);
- bool aarch64_legitimate_pic_operand_p (rtx);
-+bool aarch64_modes_tieable_p (enum machine_mode mode1,
-+			      enum machine_mode mode2);
- bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
- bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
- 			    enum machine_mode);
-+bool aarch64_offset_7bit_signed_scaled_p (enum machine_mode, HOST_WIDE_INT);
- char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
- char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
- bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
-@@ -184,6 +203,8 @@
- bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
- bool aarch64_regno_ok_for_base_p (int, bool);
- bool aarch64_regno_ok_for_index_p (int, bool);
-+bool aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
-+					    bool high);
- bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
- bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
- bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
-@@ -200,6 +221,8 @@
- enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
- enum reg_class aarch64_regno_regclass (unsigned);
- int aarch64_asm_preferred_eh_data_format (int, int);
-+enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
-+						       enum machine_mode);
- int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode);
- int aarch64_hard_regno_nregs (unsigned, enum machine_mode);
- int aarch64_simd_attr_length_move (rtx);
-@@ -291,4 +314,5 @@
- extern void aarch64_final_prescan_insn (rtx);
- extern bool
- aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-+void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
- #endif /* GCC_AARCH64_PROTOS_H */
---- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
-+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
-@@ -47,36 +47,44 @@
-   VAR1 (UNOP, addp, 0, di)
-   BUILTIN_VDQ_BHSI (UNOP, clz, 2)
- 
--  BUILTIN_VALL (GETLANE, get_lane, 0)
--  VAR1 (GETLANE, get_lane, 0, di)
-   BUILTIN_VALL (GETLANE, be_checked_get_lane, 0)
- 
--  BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
--  BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
--  BUILTIN_VDC (REINTERP, reinterpretv4hi, 0)
--  BUILTIN_VDC (REINTERP, reinterpretv2si, 0)
--  BUILTIN_VDC (REINTERP, reinterpretv2sf, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv16qi, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv8hi, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv4si, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv4sf, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv2di, 0)
--  BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
-+  VAR1 (REINTERP_SS, reinterpretdi, 0, df)
-+  VAR1 (REINTERP_SS, reinterpretv8qi, 0, df)
-+  VAR1 (REINTERP_SS, reinterpretv4hi, 0, df)
-+  VAR1 (REINTERP_SS, reinterpretv2si, 0, df)
-+  VAR1 (REINTERP_SS, reinterpretv2sf, 0, df)
-+  BUILTIN_VD (REINTERP_SS, reinterpretdf, 0)
- 
--  BUILTIN_VDQ_I (BINOP, dup_lane, 0)
-+  BUILTIN_VD (REINTERP_SU, reinterpretdf, 0)
-+
-+  VAR1 (REINTERP_US, reinterpretdi, 0, df)
-+  VAR1 (REINTERP_US, reinterpretv8qi, 0, df)
-+  VAR1 (REINTERP_US, reinterpretv4hi, 0, df)
-+  VAR1 (REINTERP_US, reinterpretv2si, 0, df)
-+  VAR1 (REINTERP_US, reinterpretv2sf, 0, df)
-+
-+  BUILTIN_VD (REINTERP_SP, reinterpretdf, 0)
-+
-+  VAR1 (REINTERP_PS, reinterpretdi, 0, df)
-+  VAR1 (REINTERP_PS, reinterpretv8qi, 0, df)
-+  VAR1 (REINTERP_PS, reinterpretv4hi, 0, df)
-+  VAR1 (REINTERP_PS, reinterpretv2si, 0, df)
-+  VAR1 (REINTERP_PS, reinterpretv2sf, 0, df)
-+
-   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
-   BUILTIN_VSDQ_I (BINOP, sqshl, 0)
--  BUILTIN_VSDQ_I (BINOP, uqshl, 0)
-+  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0)
-   BUILTIN_VSDQ_I (BINOP, sqrshl, 0)
--  BUILTIN_VSDQ_I (BINOP, uqrshl, 0)
-+  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0)
-   /* Implemented by aarch64_<su_optab><optab><mode>.  */
-   BUILTIN_VSDQ_I (BINOP, sqadd, 0)
--  BUILTIN_VSDQ_I (BINOP, uqadd, 0)
-+  BUILTIN_VSDQ_I (BINOPU, uqadd, 0)
-   BUILTIN_VSDQ_I (BINOP, sqsub, 0)
--  BUILTIN_VSDQ_I (BINOP, uqsub, 0)
-+  BUILTIN_VSDQ_I (BINOPU, uqsub, 0)
-   /* Implemented by aarch64_<sur>qadd<mode>.  */
--  BUILTIN_VSDQ_I (BINOP, suqadd, 0)
--  BUILTIN_VSDQ_I (BINOP, usqadd, 0)
-+  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0)
-+  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0)
- 
-   /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
-   BUILTIN_VDC (GETLANE, get_dregoi, 0)
-@@ -98,6 +106,10 @@
-   BUILTIN_VQ (LOADSTRUCT, ld2, 0)
-   BUILTIN_VQ (LOADSTRUCT, ld3, 0)
-   BUILTIN_VQ (LOADSTRUCT, ld4, 0)
-+  /* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>.  */
-+  BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0)
-+  BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0)
-+  BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0)
-   /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>.  */
-   BUILTIN_VDC (STORESTRUCT, st2, 0)
-   BUILTIN_VDC (STORESTRUCT, st3, 0)
-@@ -107,6 +119,10 @@
-   BUILTIN_VQ (STORESTRUCT, st3, 0)
-   BUILTIN_VQ (STORESTRUCT, st4, 0)
- 
-+  BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0)
-+  BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0)
-+  BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0)
-+
-   BUILTIN_VQW (BINOP, saddl2, 0)
-   BUILTIN_VQW (BINOP, uaddl2, 0)
-   BUILTIN_VQW (BINOP, ssubl2, 0)
-@@ -142,19 +158,19 @@
-   BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0)
-   BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0)
-   /* Implemented by aarch64_s<optab><mode>.  */
--  BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0)
--  BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0)
-+  BUILTIN_VSDQ_I (UNOP, sqabs, 0)
-+  BUILTIN_VSDQ_I (UNOP, sqneg, 0)
- 
--  BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0)
--  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0)
--  BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0)
--  BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0)
-+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_lane, 0)
-+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_lane, 0)
-+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlal_laneq, 0)
-+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmlsl_laneq, 0)
-   BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0)
-   BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0)
--  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0)
--  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0)
--  BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0)
--  BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0)
-+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_lane, 0)
-+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_lane, 0)
-+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlal2_laneq, 0)
-+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmlsl2_laneq, 0)
-   BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0)
-   BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0)
-   /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
-@@ -186,9 +202,9 @@
-   BUILTIN_VSDQ_I_DI (BINOP, ashl, 3)
-   /* Implemented by aarch64_<sur>shl<mode>.  */
-   BUILTIN_VSDQ_I_DI (BINOP, sshl, 0)
--  BUILTIN_VSDQ_I_DI (BINOP, ushl, 0)
-+  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0)
-   BUILTIN_VSDQ_I_DI (BINOP, srshl, 0)
--  BUILTIN_VSDQ_I_DI (BINOP, urshl, 0)
-+  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0)
- 
-   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
-   VAR1 (SHIFTIMM, ashr_simd, 0, di)
-@@ -196,15 +212,15 @@
-   VAR1 (USHIFTIMM, lshr_simd, 0, di)
-   /* Implemented by aarch64_<sur>shr_n<mode>.  */
-   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
--  BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
-+  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0)
-   /* Implemented by aarch64_<sur>sra_n<mode>.  */
-   BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0)
--  BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0)
-+  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0)
-   BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0)
--  BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0)
-+  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0)
-   /* Implemented by aarch64_<sur>shll_n<mode>.  */
-   BUILTIN_VDW (SHIFTIMM, sshll_n, 0)
--  BUILTIN_VDW (SHIFTIMM, ushll_n, 0)
-+  BUILTIN_VDW (USHIFTIMM, ushll_n, 0)
-   /* Implemented by aarch64_<sur>shll2_n<mode>.  */
-   BUILTIN_VQW (SHIFTIMM, sshll2_n, 0)
-   BUILTIN_VQW (SHIFTIMM, ushll2_n, 0)
-@@ -212,30 +228,19 @@
-   BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0)
-   BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0)
-   BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0)
--  BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0)
-+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0)
-   BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0)
--  BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0)
-+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0)
-   /* Implemented by aarch64_<sur>s<lr>i_n<mode>.  */
-   BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
--  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0)
-+  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
-   BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
--  BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0)
-+  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
-   /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
--  BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0)
-+  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
-   BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0)
--  BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
-+  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0)
- 
--  /* Implemented by aarch64_cm<cmp><mode>.  */
--  BUILTIN_VALLDI (BINOP, cmeq, 0)
--  BUILTIN_VALLDI (BINOP, cmge, 0)
--  BUILTIN_VALLDI (BINOP, cmgt, 0)
--  BUILTIN_VALLDI (BINOP, cmle, 0)
--  BUILTIN_VALLDI (BINOP, cmlt, 0)
--  /* Implemented by aarch64_cm<cmp><mode>.  */
--  BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
--  BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
--  BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
--
-   /* Implemented by reduc_<sur>plus_<mode>.  */
-   BUILTIN_VALL (UNOP, reduc_splus_, 10)
-   BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
-@@ -265,7 +270,7 @@
-   BUILTIN_VDQF (UNOP, nearbyint, 2)
-   BUILTIN_VDQF (UNOP, rint, 2)
-   BUILTIN_VDQF (UNOP, round, 2)
--  BUILTIN_VDQF (UNOP, frintn, 2)
-+  BUILTIN_VDQF_DF (UNOP, frintn, 2)
- 
-   /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2.  */
-   VAR1 (UNOP, lbtruncv2sf, 2, v2si)
-@@ -330,6 +335,10 @@
-   VAR1 (UNOP, floatunsv4si, 2, v4sf)
-   VAR1 (UNOP, floatunsv2di, 2, v2df)
- 
-+  VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
-+
-+  BUILTIN_VB (UNOP, rbit, 0)
-+
-   /* Implemented by
-      aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>.  */
-   BUILTIN_VALL (BINOP, zip1, 0)
-@@ -393,3 +402,6 @@
-   /* Implemented by aarch64_crypto_pmull<mode>.  */
-   VAR1 (BINOPP, crypto_pmull, 0, di)
-   VAR1 (BINOPP, crypto_pmull, 0, v2di)
-+
-+  /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin.  */
-+  VAR1 (BINOPV, im_lane_bound, 0, si)
---- a/src/gcc/config/aarch64/constraints.md
-+++ b/src/gcc/config/aarch64/constraints.md
-@@ -21,6 +21,9 @@
- (define_register_constraint "k" "STACK_REG"
-   "@internal The stack register.")
- 
-+(define_register_constraint "Ucs" "CALLER_SAVE_REGS"
-+  "@internal The caller save registers.")
-+
- (define_register_constraint "w" "FP_REGS"
-   "Floating point and SIMD vector registers.")
- 
-@@ -92,6 +95,10 @@
-   (and (match_code "const_int")
-        (match_test "(unsigned HOST_WIDE_INT) ival < 64")))
- 
-+(define_constraint "Usf"
-+  "@internal Usf is a symbol reference."
-+  (match_code "symbol_ref"))
-+
- (define_constraint "UsM"
-   "@internal
-   A constraint that matches the immediate constant -1."
---- a/src/gcc/config/aarch64/aarch64.c
-+++ b/src/gcc/config/aarch64/aarch64.c
-@@ -62,7 +62,8 @@
- #include "dwarf2.h"
- #include "cfgloop.h"
- #include "tree-vectorizer.h"
--#include "config/arm/aarch-cost-tables.h"
-+#include "aarch64-cost-tables.h"
-+#include "dumpfile.h"
- 
- /* Defined for convenience.  */
- #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
-@@ -136,12 +137,13 @@
- static void aarch64_override_options_after_change (void);
- static bool aarch64_vector_mode_supported_p (enum machine_mode);
- static unsigned bit_count (unsigned HOST_WIDE_INT);
--static bool aarch64_const_vec_all_same_int_p (rtx,
--					      HOST_WIDE_INT, HOST_WIDE_INT);
--
- static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
- 						 const unsigned char *sel);
-+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
- 
-+/* Major revision number of the ARM Architecture implemented by the target.  */
-+unsigned aarch64_architecture_version;
-+
- /* The processor for which instructions should be scheduled.  */
- enum aarch64_processor aarch64_tune = cortexa53;
- 
-@@ -171,6 +173,15 @@
- #endif
- static const struct cpu_addrcost_table generic_addrcost_table =
- {
-+#if HAVE_DESIGNATED_INITIALIZERS
-+  .addr_scale_costs =
-+#endif
-+    {
-+      NAMED_PARAM (hi, 0),
-+      NAMED_PARAM (si, 0),
-+      NAMED_PARAM (di, 0),
-+      NAMED_PARAM (ti, 0),
-+    },
-   NAMED_PARAM (pre_modify, 0),
-   NAMED_PARAM (post_modify, 0),
-   NAMED_PARAM (register_offset, 0),
-@@ -181,14 +192,60 @@
- #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
- __extension__
- #endif
-+static const struct cpu_addrcost_table cortexa57_addrcost_table =
-+{
-+#if HAVE_DESIGNATED_INITIALIZERS
-+  .addr_scale_costs =
-+#endif
-+    {
-+      NAMED_PARAM (hi, 1),
-+      NAMED_PARAM (si, 0),
-+      NAMED_PARAM (di, 0),
-+      NAMED_PARAM (ti, 1),
-+    },
-+  NAMED_PARAM (pre_modify, 0),
-+  NAMED_PARAM (post_modify, 0),
-+  NAMED_PARAM (register_offset, 0),
-+  NAMED_PARAM (register_extend, 0),
-+  NAMED_PARAM (imm_offset, 0),
-+};
-+
-+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
-+__extension__
-+#endif
- static const struct cpu_regmove_cost generic_regmove_cost =
- {
-   NAMED_PARAM (GP2GP, 1),
-   NAMED_PARAM (GP2FP, 2),
-   NAMED_PARAM (FP2GP, 2),
--  /* We currently do not provide direct support for TFmode Q->Q move.
--     Therefore we need to raise the cost above 2 in order to have
--     reload handle the situation.  */
-+  NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost cortexa57_regmove_cost =
-+{
-+  NAMED_PARAM (GP2GP, 1),
-+  /* Avoid the use of slow int<->fp moves for spilling by setting
-+     their cost higher than memmov_cost.  */
-+  NAMED_PARAM (GP2FP, 5),
-+  NAMED_PARAM (FP2GP, 5),
-+  NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost cortexa53_regmove_cost =
-+{
-+  NAMED_PARAM (GP2GP, 1),
-+  /* Avoid the use of slow int<->fp moves for spilling by setting
-+     their cost higher than memmov_cost.  */
-+  NAMED_PARAM (GP2FP, 5),
-+  NAMED_PARAM (FP2GP, 5),
-+  NAMED_PARAM (FP2FP, 2)
-+};
-+
-+static const struct cpu_regmove_cost thunderx_regmove_cost =
-+{
-+  NAMED_PARAM (GP2GP, 2),
-+  NAMED_PARAM (GP2FP, 2),
-+  NAMED_PARAM (FP2GP, 6),
-   NAMED_PARAM (FP2FP, 4)
- };
- 
-@@ -212,9 +269,29 @@
-   NAMED_PARAM (cond_not_taken_branch_cost, 1)
- };
- 
-+/* Generic costs for vector insn classes.  */
- #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
- __extension__
- #endif
-+static const struct cpu_vector_cost cortexa57_vector_cost =
-+{
-+  NAMED_PARAM (scalar_stmt_cost, 1),
-+  NAMED_PARAM (scalar_load_cost, 4),
-+  NAMED_PARAM (scalar_store_cost, 1),
-+  NAMED_PARAM (vec_stmt_cost, 3),
-+  NAMED_PARAM (vec_to_scalar_cost, 8),
-+  NAMED_PARAM (scalar_to_vec_cost, 8),
-+  NAMED_PARAM (vec_align_load_cost, 5),
-+  NAMED_PARAM (vec_unalign_load_cost, 5),
-+  NAMED_PARAM (vec_unalign_store_cost, 1),
-+  NAMED_PARAM (vec_store_cost, 1),
-+  NAMED_PARAM (cond_taken_branch_cost, 1),
-+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
-+};
-+
-+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
-+__extension__
-+#endif
- static const struct tune_params generic_tunings =
- {
-   &cortexa57_extra_costs,
-@@ -229,7 +306,7 @@
- {
-   &cortexa53_extra_costs,
-   &generic_addrcost_table,
--  &generic_regmove_cost,
-+  &cortexa53_regmove_cost,
-   &generic_vector_cost,
-   NAMED_PARAM (memmov_cost, 4),
-   NAMED_PARAM (issue_rate, 2)
-@@ -238,13 +315,23 @@
- static const struct tune_params cortexa57_tunings =
- {
-   &cortexa57_extra_costs,
--  &generic_addrcost_table,
--  &generic_regmove_cost,
--  &generic_vector_cost,
-+  &cortexa57_addrcost_table,
-+  &cortexa57_regmove_cost,
-+  &cortexa57_vector_cost,
-   NAMED_PARAM (memmov_cost, 4),
-   NAMED_PARAM (issue_rate, 3)
- };
- 
-+static const struct tune_params thunderx_tunings =
-+{
-+  &thunderx_extra_costs,
-+  &generic_addrcost_table,
-+  &thunderx_regmove_cost,
-+  &generic_vector_cost,
-+  NAMED_PARAM (memmov_cost, 6),
-+  NAMED_PARAM (issue_rate, 2)
-+};
-+
- /* A processor implementing AArch64.  */
- struct processor
- {
-@@ -251,6 +338,7 @@
-   const char *const name;
-   enum aarch64_processor core;
-   const char *arch;
-+  unsigned architecture_version;
-   const unsigned long flags;
-   const struct tune_params *const tune;
- };
-@@ -259,11 +347,13 @@
- static const struct processor all_cores[] =
- {
- #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
--  {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
-+  {NAME, IDENT, #ARCH, ARCH,\
-+    FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
- #include "aarch64-cores.def"
- #undef AARCH64_CORE
--  {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
--  {NULL, aarch64_none, NULL, 0, NULL}
-+  {"generic", cortexa53, "8", 8,\
-+    AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
-+  {NULL, aarch64_none, NULL, 0, 0, NULL}
- };
- 
- /* Architectures implementing AArch64.  */
-@@ -270,10 +360,10 @@
- static const struct processor all_architectures[] =
- {
- #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
--  {NAME, CORE, #ARCH, FLAGS, NULL},
-+  {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
- #include "aarch64-arches.def"
- #undef AARCH64_ARCH
--  {NULL, aarch64_none, NULL, 0, NULL}
-+  {NULL, aarch64_none, NULL, 0, 0, NULL}
- };
- 
- /* Target specification.  These are populated as commandline arguments
-@@ -424,6 +514,24 @@
-   return 0;
- }
- 
-+/* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
-+enum machine_mode
-+aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
-+				     enum machine_mode mode)
-+{
-+  /* Handle modes that fit within single registers.  */
-+  if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
-+    {
-+      if (GET_MODE_SIZE (mode) >= 4)
-+        return mode;
-+      else
-+        return SImode;
-+    }
-+  /* Fall back to generic for multi-reg and very large modes.  */
-+  else
-+    return choose_hard_reg_mode (regno, nregs, false);
-+}
-+
- /* Return true if calls to DECL should be treated as
-    long-calls (ie called via a register).  */
- static bool
-@@ -444,7 +552,7 @@
-    represent an expression that matches an extend operation.  The
-    operands represent the paramters from
- 
--   (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
-+   (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)).  */
- bool
- aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
- 				rtx extract_imm)
-@@ -636,12 +744,24 @@
- 
-     case SYMBOL_SMALL_TLSDESC:
-       {
--	rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
-+	enum machine_mode mode = GET_MODE (dest);
-+	rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
- 	rtx tp;
- 
--	emit_insn (gen_tlsdesc_small (imm));
-+	gcc_assert (mode == Pmode || mode == ptr_mode);
-+
-+	/* In ILP32, the got entry is always of SImode size.  Unlike
-+	   small GOT, the dest is fixed at reg 0.  */
-+	if (TARGET_ILP32)
-+	  emit_insn (gen_tlsdesc_small_si (imm));
-+	else
-+	  emit_insn (gen_tlsdesc_small_di (imm));
- 	tp = aarch64_load_tp (NULL);
--	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
-+
-+	if (mode != Pmode)
-+	  tp = gen_lowpart (mode, tp);
-+
-+	emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
- 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
- 	return;
-       }
-@@ -648,10 +768,34 @@
- 
-     case SYMBOL_SMALL_GOTTPREL:
-       {
--	rtx tmp_reg = gen_reg_rtx (Pmode);
-+	/* In ILP32, the mode of dest can be either SImode or DImode,
-+	   while the got entry is always of SImode size.  The mode of
-+	   dest depends on how dest is used: if dest is assigned to a
-+	   pointer (e.g. in the memory), it has SImode; it may have
-+	   DImode if dest is dereferenced to access the memeory.
-+	   This is why we have to handle three different tlsie_small
-+	   patterns here (two patterns for ILP32).  */
-+	enum machine_mode mode = GET_MODE (dest);
-+	rtx tmp_reg = gen_reg_rtx (mode);
- 	rtx tp = aarch64_load_tp (NULL);
--	emit_insn (gen_tlsie_small (tmp_reg, imm));
--	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
-+
-+	if (mode == ptr_mode)
-+	  {
-+	    if (mode == DImode)
-+	      emit_insn (gen_tlsie_small_di (tmp_reg, imm));
-+	    else
-+	      {
-+		emit_insn (gen_tlsie_small_si (tmp_reg, imm));
-+		tp = gen_lowpart (mode, tp);
-+	      }
-+	  }
-+	else
-+	  {
-+	    gcc_assert (mode == Pmode);
-+	    emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
-+	  }
-+
-+	emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
- 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
- 	return;
-       }
-@@ -889,10 +1033,10 @@
-   return plus_constant (mode, reg, offset);
- }
- 
--void
--aarch64_expand_mov_immediate (rtx dest, rtx imm)
-+static int
-+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
-+				machine_mode mode)
- {
--  enum machine_mode mode = GET_MODE (dest);
-   unsigned HOST_WIDE_INT mask;
-   int i;
-   bool first;
-@@ -899,86 +1043,15 @@
-   unsigned HOST_WIDE_INT val;
-   bool subtargets;
-   rtx subtarget;
--  int one_match, zero_match;
-+  int one_match, zero_match, first_not_ffff_match;
-+  int num_insns = 0;
- 
--  gcc_assert (mode == SImode || mode == DImode);
--
--  /* Check on what type of symbol it is.  */
--  if (GET_CODE (imm) == SYMBOL_REF
--      || GET_CODE (imm) == LABEL_REF
--      || GET_CODE (imm) == CONST)
--    {
--      rtx mem, base, offset;
--      enum aarch64_symbol_type sty;
--
--      /* If we have (const (plus symbol offset)), separate out the offset
--	 before we start classifying the symbol.  */
--      split_const (imm, &base, &offset);
--
--      sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
--      switch (sty)
--	{
--	case SYMBOL_FORCE_TO_MEM:
--	  if (offset != const0_rtx
--	      && targetm.cannot_force_const_mem (mode, imm))
--	    {
--	      gcc_assert (can_create_pseudo_p ());
--	      base = aarch64_force_temporary (mode, dest, base);
--	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
--	      aarch64_emit_move (dest, base);
--	      return;
--	    }
--	  mem = force_const_mem (ptr_mode, imm);
--	  gcc_assert (mem);
--	  if (mode != ptr_mode)
--	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
--	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
--	  return;
--
--        case SYMBOL_SMALL_TLSGD:
--        case SYMBOL_SMALL_TLSDESC:
--        case SYMBOL_SMALL_GOTTPREL:
--	case SYMBOL_SMALL_GOT:
--	case SYMBOL_TINY_GOT:
--	  if (offset != const0_rtx)
--	    {
--	      gcc_assert(can_create_pseudo_p ());
--	      base = aarch64_force_temporary (mode, dest, base);
--	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
--	      aarch64_emit_move (dest, base);
--	      return;
--	    }
--	  /* FALLTHRU */
--
--        case SYMBOL_SMALL_TPREL:
--	case SYMBOL_SMALL_ABSOLUTE:
--	case SYMBOL_TINY_ABSOLUTE:
--	  aarch64_load_symref_appropriately (dest, imm, sty);
--	  return;
--
--	default:
--	  gcc_unreachable ();
--	}
--    }
--
-   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
-     {
--      emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
--      return;
--    }
--
--  if (!CONST_INT_P (imm))
--    {
--      if (GET_CODE (imm) == HIGH)
-+      if (generate)
- 	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
--      else
--        {
--	  rtx mem = force_const_mem (mode, imm);
--	  gcc_assert (mem);
--	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
--	}
--
--      return;
-+      num_insns++;
-+      return num_insns;
-     }
- 
-   if (mode == SImode)
-@@ -986,10 +1059,15 @@
-       /* We know we can't do this in 1 insn, and we must be able to do it
- 	 in two; so don't mess around looking for sequences that don't buy
- 	 us anything.  */
--      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
--      emit_insn (gen_insv_immsi (dest, GEN_INT (16),
--				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
--      return;
-+      if (generate)
-+	{
-+	  emit_insn (gen_rtx_SET (VOIDmode, dest,
-+				  GEN_INT (INTVAL (imm) & 0xffff)));
-+	  emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-+				     GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-+	}
-+      num_insns += 2;
-+      return num_insns;
-     }
- 
-   /* Remaining cases are all for DImode.  */
-@@ -1000,29 +1078,34 @@
-   one_match = 0;
-   zero_match = 0;
-   mask = 0xffff;
-+  first_not_ffff_match = -1;
- 
-   for (i = 0; i < 64; i += 16, mask <<= 16)
-     {
--      if ((val & mask) == 0)
--	zero_match++;
--      else if ((val & mask) == mask)
-+      if ((val & mask) == mask)
- 	one_match++;
-+      else
-+	{
-+	  if (first_not_ffff_match < 0)
-+	    first_not_ffff_match = i;
-+	  if ((val & mask) == 0)
-+	    zero_match++;
-+	}
-     }
- 
-   if (one_match == 2)
-     {
--      mask = 0xffff;
--      for (i = 0; i < 64; i += 16, mask <<= 16)
-+      /* Set one of the quarters and then insert back into result.  */
-+      mask = 0xffffll << first_not_ffff_match;
-+      if (generate)
- 	{
--	  if ((val & mask) != mask)
--	    {
--	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
--	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
--					 GEN_INT ((val >> i) & 0xffff)));
--	      return;
--	    }
-+	  emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
-+	  emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-+				     GEN_INT ((val >> first_not_ffff_match)
-+					      & 0xffff)));
- 	}
--      gcc_unreachable ();
-+      num_insns += 2;
-+      return num_insns;
-     }
- 
-   if (zero_match == 2)
-@@ -1035,42 +1118,55 @@
- 
-       if (aarch64_uimm12_shift (val - (val & mask)))
- 	{
--	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
--	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
--	  emit_insn (gen_adddi3 (dest, subtarget,
--				 GEN_INT (val - (val & mask))));
--	  return;
-+	  if (generate)
-+	    {
-+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+				      GEN_INT (val & mask)));
-+	      emit_insn (gen_adddi3 (dest, subtarget,
-+				     GEN_INT (val - (val & mask))));
-+	    }
-+	  num_insns += 2;
-+	  return num_insns;
- 	}
-       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
- 	{
--	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
--	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--				  GEN_INT ((val + comp) & mask)));
--	  emit_insn (gen_adddi3 (dest, subtarget,
--				 GEN_INT (val - ((val + comp) & mask))));
--	  return;
-+	  if (generate)
-+	    {
-+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+				      GEN_INT ((val + comp) & mask)));
-+	      emit_insn (gen_adddi3 (dest, subtarget,
-+				     GEN_INT (val - ((val + comp) & mask))));
-+	    }
-+	  num_insns += 2;
-+	  return num_insns;
- 	}
-       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
- 	{
--	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
--	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--				  GEN_INT ((val - comp) | ~mask)));
--	  emit_insn (gen_adddi3 (dest, subtarget,
--				 GEN_INT (val - ((val - comp) | ~mask))));
--	  return;
-+	  if (generate)
-+	    {
-+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+				      GEN_INT ((val - comp) | ~mask)));
-+	      emit_insn (gen_adddi3 (dest, subtarget,
-+				     GEN_INT (val - ((val - comp) | ~mask))));
-+	    }
-+	  num_insns += 2;
-+	  return num_insns;
- 	}
-       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
- 	{
--	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--
--	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--				  GEN_INT (val | ~mask)));
--	  emit_insn (gen_adddi3 (dest, subtarget,
--				 GEN_INT (val - (val | ~mask))));
--	  return;
-+	  if (generate)
-+	    {
-+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+				      GEN_INT (val | ~mask)));
-+	      emit_insn (gen_adddi3 (dest, subtarget,
-+				     GEN_INT (val - (val | ~mask))));
-+	    }
-+	  num_insns += 2;
-+	  return num_insns;
- 	}
-     }
- 
-@@ -1084,12 +1180,16 @@
-       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
- 	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
- 	{
--	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
--	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--				  GEN_INT (aarch64_bitmasks[i])));
--	  emit_insn (gen_adddi3 (dest, subtarget,
--				 GEN_INT (val - aarch64_bitmasks[i])));
--	  return;
-+	  if (generate)
-+	    {
-+	      subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-+	      emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+				      GEN_INT (aarch64_bitmasks[i])));
-+	      emit_insn (gen_adddi3 (dest, subtarget,
-+				     GEN_INT (val - aarch64_bitmasks[i])));
-+	    }
-+	  num_insns += 2;
-+	  return num_insns;
- 	}
- 
-       for (j = 0; j < 64; j += 16, mask <<= 16)
-@@ -1096,11 +1196,15 @@
- 	{
- 	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
- 	    {
--	      emit_insn (gen_rtx_SET (VOIDmode, dest,
--				      GEN_INT (aarch64_bitmasks[i])));
--	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
--					 GEN_INT ((val >> j) & 0xffff)));
--	      return;
-+	      if (generate)
-+		{
-+		  emit_insn (gen_rtx_SET (VOIDmode, dest,
-+					  GEN_INT (aarch64_bitmasks[i])));
-+		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-+					     GEN_INT ((val >> j) & 0xffff)));
-+		}
-+	      num_insns += 2;
-+	      return num_insns;
- 	    }
- 	}
-     }
-@@ -1115,12 +1219,16 @@
- 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- 	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
- 	      {
--		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
--		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--					GEN_INT (aarch64_bitmasks[i])));
--		emit_insn (gen_iordi3 (dest, subtarget,
--				       GEN_INT (aarch64_bitmasks[j])));
--		return;
-+		if (generate)
-+		  {
-+		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+					    GEN_INT (aarch64_bitmasks[i])));
-+		    emit_insn (gen_iordi3 (dest, subtarget,
-+					   GEN_INT (aarch64_bitmasks[j])));
-+		  }
-+		num_insns += 2;
-+		return num_insns;
- 	      }
- 	}
-       else if ((val & aarch64_bitmasks[i]) == val)
-@@ -1130,17 +1238,44 @@
- 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
- 	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
- 	      {
--
--		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
--		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
--					GEN_INT (aarch64_bitmasks[j])));
--		emit_insn (gen_anddi3 (dest, subtarget,
--				       GEN_INT (aarch64_bitmasks[i])));
--		return;
-+		if (generate)
-+		  {
-+		    subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-+		    emit_insn (gen_rtx_SET (VOIDmode, subtarget,
-+					    GEN_INT (aarch64_bitmasks[j])));
-+		    emit_insn (gen_anddi3 (dest, subtarget,
-+					   GEN_INT (aarch64_bitmasks[i])));
-+		  }
-+		num_insns += 2;
-+		return num_insns;
- 	      }
- 	}
-     }
- 
-+  if (one_match > zero_match)
-+    {
-+      /* Set either first three quarters or all but the third.	 */
-+      mask = 0xffffll << (16 - first_not_ffff_match);
-+      if (generate)
-+	emit_insn (gen_rtx_SET (VOIDmode, dest,
-+				GEN_INT (val | mask | 0xffffffff00000000ull)));
-+      num_insns ++;
-+
-+      /* Now insert other two quarters.	 */
-+      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-+	   i < 64; i += 16, mask <<= 16)
-+	{
-+	  if ((val & mask) != mask)
-+	    {
-+	      if (generate)
-+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-+					   GEN_INT ((val >> i) & 0xffff)));
-+	      num_insns ++;
-+	    }
-+	}
-+      return num_insns;
-+    }
-+
-  simple_sequence:
-   first = true;
-   mask = 0xffff;
-@@ -1150,30 +1285,113 @@
- 	{
- 	  if (first)
- 	    {
--	      emit_insn (gen_rtx_SET (VOIDmode, dest,
--				      GEN_INT (val & mask)));
-+	      if (generate)
-+		emit_insn (gen_rtx_SET (VOIDmode, dest,
-+					GEN_INT (val & mask)));
-+	      num_insns ++;
- 	      first = false;
- 	    }
- 	  else
--	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
--				       GEN_INT ((val >> i) & 0xffff)));
-+	    {
-+	      if (generate)
-+		emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-+					   GEN_INT ((val >> i) & 0xffff)));
-+	      num_insns ++;
-+	    }
- 	}
-     }
-+
-+  return num_insns;
- }
- 
--static bool
--aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
-+
-+void
-+aarch64_expand_mov_immediate (rtx dest, rtx imm)
- {
--  /* Indirect calls are not currently supported.  */
--  if (decl == NULL)
--    return false;
-+  machine_mode mode = GET_MODE (dest);
- 
--  /* Cannot tail-call to long-calls, since these are outside of the
--     range of a branch instruction (we could handle this if we added
--     support for indirect tail-calls.  */
--  if (aarch64_decl_is_long_call_p (decl))
--    return false;
-+  gcc_assert (mode == SImode || mode == DImode);
- 
-+  /* Check on what type of symbol it is.  */
-+  if (GET_CODE (imm) == SYMBOL_REF
-+      || GET_CODE (imm) == LABEL_REF
-+      || GET_CODE (imm) == CONST)
-+    {
-+      rtx mem, base, offset;
-+      enum aarch64_symbol_type sty;
-+
-+      /* If we have (const (plus symbol offset)), separate out the offset
-+	 before we start classifying the symbol.  */
-+      split_const (imm, &base, &offset);
-+
-+      sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
-+      switch (sty)
-+	{
-+	case SYMBOL_FORCE_TO_MEM:
-+	  if (offset != const0_rtx
-+	      && targetm.cannot_force_const_mem (mode, imm))
-+	    {
-+	      gcc_assert (can_create_pseudo_p ());
-+	      base = aarch64_force_temporary (mode, dest, base);
-+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-+	      aarch64_emit_move (dest, base);
-+	      return;
-+	    }
-+	  mem = force_const_mem (ptr_mode, imm);
-+	  gcc_assert (mem);
-+	  if (mode != ptr_mode)
-+	    mem = gen_rtx_ZERO_EXTEND (mode, mem);
-+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-+	  return;
-+
-+        case SYMBOL_SMALL_TLSGD:
-+        case SYMBOL_SMALL_TLSDESC:
-+        case SYMBOL_SMALL_GOTTPREL:
-+	case SYMBOL_SMALL_GOT:
-+	case SYMBOL_TINY_GOT:
-+	  if (offset != const0_rtx)
-+	    {
-+	      gcc_assert(can_create_pseudo_p ());
-+	      base = aarch64_force_temporary (mode, dest, base);
-+	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
-+	      aarch64_emit_move (dest, base);
-+	      return;
-+	    }
-+	  /* FALLTHRU */
-+
-+        case SYMBOL_SMALL_TPREL:
-+	case SYMBOL_SMALL_ABSOLUTE:
-+	case SYMBOL_TINY_ABSOLUTE:
-+	  aarch64_load_symref_appropriately (dest, imm, sty);
-+	  return;
-+
-+	default:
-+	  gcc_unreachable ();
-+	}
-+    }
-+
-+  if (!CONST_INT_P (imm))
-+    {
-+      if (GET_CODE (imm) == HIGH)
-+	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
-+      else
-+        {
-+	  rtx mem = force_const_mem (mode, imm);
-+	  gcc_assert (mem);
-+	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
-+	}
-+
-+      return;
-+    }
-+
-+  aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
-+}
-+
-+static bool
-+aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
-+				 tree exp ATTRIBUTE_UNUSED)
-+{
-+  /* Currently, always true.  */
-   return true;
- }
- 
-@@ -1688,11 +1906,6 @@
- static bool
- aarch64_frame_pointer_required (void)
- {
--  /* If the function contains dynamic stack allocations, we need to
--     use the frame pointer to access the static parts of the frame.  */
--  if (cfun->calls_alloca)
--    return true;
--
-   /* In aarch64_override_options_after_change
-      flag_omit_leaf_frame_pointer turns off the frame pointer by
-      default.  Turn it back on now if we've not got a leaf
-@@ -1716,268 +1929,312 @@
-   if (reload_completed && cfun->machine->frame.laid_out)
-     return;
- 
--  cfun->machine->frame.fp_lr_offset = 0;
-+#define SLOT_NOT_REQUIRED (-2)
-+#define SLOT_REQUIRED     (-1)
- 
-+  cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
-+  cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
-+
-   /* First mark all the registers that really need to be saved...  */
-   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
--    cfun->machine->frame.reg_offset[regno] = -1;
-+    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
- 
-   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
--    cfun->machine->frame.reg_offset[regno] = -1;
-+    cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
- 
-   /* ... that includes the eh data registers (if needed)...  */
-   if (crtl->calls_eh_return)
-     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
--      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
-+      cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
-+	= SLOT_REQUIRED;
- 
-   /* ... and any callee saved register that dataflow says is live.  */
-   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-     if (df_regs_ever_live_p (regno)
- 	&& !call_used_regs[regno])
--      cfun->machine->frame.reg_offset[regno] = 0;
-+      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
- 
-   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-     if (df_regs_ever_live_p (regno)
- 	&& !call_used_regs[regno])
--      cfun->machine->frame.reg_offset[regno] = 0;
-+      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
- 
-   if (frame_pointer_needed)
-     {
--      cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
-+      /* FP and LR are placed in the linkage record.  */
-       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
-+      cfun->machine->frame.wb_candidate1 = R29_REGNUM;
-+      cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
-+      cfun->machine->frame.wb_candidate2 = R30_REGNUM;
-       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
-+      offset += 2 * UNITS_PER_WORD;
-     }
- 
-   /* Now assign stack slots for them.  */
--  for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
--    if (cfun->machine->frame.reg_offset[regno] != -1)
-+  for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
-+    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
-       {
- 	cfun->machine->frame.reg_offset[regno] = offset;
-+	if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
-+	  cfun->machine->frame.wb_candidate1 = regno;
-+	else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
-+	  cfun->machine->frame.wb_candidate2 = regno;
- 	offset += UNITS_PER_WORD;
-       }
- 
-   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
--    if (cfun->machine->frame.reg_offset[regno] != -1)
-+    if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
-       {
- 	cfun->machine->frame.reg_offset[regno] = offset;
-+	if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
-+	  cfun->machine->frame.wb_candidate1 = regno;
-+	else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
-+		 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
-+	  cfun->machine->frame.wb_candidate2 = regno;
- 	offset += UNITS_PER_WORD;
-       }
- 
--  if (frame_pointer_needed)
--    {
--      cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
--      offset += UNITS_PER_WORD;
--      cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
--    }
--
--  if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
--    {
--      cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
--      offset += UNITS_PER_WORD;
--      cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
--    }
--
-   cfun->machine->frame.padding0 =
-     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
-   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
- 
-   cfun->machine->frame.saved_regs_size = offset;
-+
-+  cfun->machine->frame.hard_fp_offset
-+    = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
-+			+ get_frame_size ()
-+			+ cfun->machine->frame.saved_regs_size,
-+			STACK_BOUNDARY / BITS_PER_UNIT);
-+
-+  cfun->machine->frame.frame_size
-+    = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
-+			+ crtl->outgoing_args_size,
-+			STACK_BOUNDARY / BITS_PER_UNIT);
-+
-   cfun->machine->frame.laid_out = true;
- }
- 
--/* Make the last instruction frame-related and note that it performs
--   the operation described by FRAME_PATTERN.  */
-+static bool
-+aarch64_register_saved_on_entry (int regno)
-+{
-+  return cfun->machine->frame.reg_offset[regno] >= 0;
-+}
- 
-+static unsigned
-+aarch64_next_callee_save (unsigned regno, unsigned limit)
-+{
-+  while (regno <= limit && !aarch64_register_saved_on_entry (regno))
-+    regno ++;
-+  return regno;
-+}
-+
- static void
--aarch64_set_frame_expr (rtx frame_pattern)
-+aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
-+			   HOST_WIDE_INT adjustment)
-+ {
-+  rtx base_rtx = stack_pointer_rtx;
-+  rtx insn, reg, mem;
-+
-+  reg = gen_rtx_REG (mode, regno);
-+  mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
-+			    plus_constant (Pmode, base_rtx, -adjustment));
-+  mem = gen_rtx_MEM (mode, mem);
-+
-+  insn = emit_move_insn (mem, reg);
-+  RTX_FRAME_RELATED_P (insn) = 1;
-+}
-+
-+static rtx
-+aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
-+			  HOST_WIDE_INT adjustment)
- {
-+  switch (mode)
-+    {
-+    case DImode:
-+      return gen_storewb_pairdi_di (base, base, reg, reg2,
-+				    GEN_INT (-adjustment),
-+				    GEN_INT (UNITS_PER_WORD - adjustment));
-+    case DFmode:
-+      return gen_storewb_pairdf_di (base, base, reg, reg2,
-+				    GEN_INT (-adjustment),
-+				    GEN_INT (UNITS_PER_WORD - adjustment));
-+    default:
-+      gcc_unreachable ();
-+    }
-+}
-+
-+static void
-+aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
-+			 unsigned regno2, HOST_WIDE_INT adjustment)
-+{
-   rtx insn;
-+  rtx reg1 = gen_rtx_REG (mode, regno1);
-+  rtx reg2 = gen_rtx_REG (mode, regno2);
- 
--  insn = get_last_insn ();
-+  insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
-+					      reg2, adjustment));
-+  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
-+
-+  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-   RTX_FRAME_RELATED_P (insn) = 1;
--  RTX_FRAME_RELATED_P (frame_pattern) = 1;
--  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
--				      frame_pattern,
--				      REG_NOTES (insn));
- }
- 
--static bool
--aarch64_register_saved_on_entry (int regno)
-+static rtx
-+aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
-+			 HOST_WIDE_INT adjustment)
- {
--  return cfun->machine->frame.reg_offset[regno] != -1;
-+  switch (mode)
-+    {
-+    case DImode:
-+      return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
-+				   GEN_INT (UNITS_PER_WORD));
-+    case DFmode:
-+      return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
-+				   GEN_INT (UNITS_PER_WORD));
-+    default:
-+      gcc_unreachable ();
-+    }
- }
- 
-+static rtx
-+aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
-+			rtx reg2)
-+{
-+  switch (mode)
-+    {
-+    case DImode:
-+      return gen_store_pairdi (mem1, reg1, mem2, reg2);
- 
--static void
--aarch64_save_or_restore_fprs (int start_offset, int increment,
--			      bool restore, rtx base_rtx)
-+    case DFmode:
-+      return gen_store_pairdf (mem1, reg1, mem2, reg2);
- 
-+    default:
-+      gcc_unreachable ();
-+    }
-+}
-+
-+static rtx
-+aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
-+		       rtx mem2)
- {
-+  switch (mode)
-+    {
-+    case DImode:
-+      return gen_load_pairdi (reg1, mem1, reg2, mem2);
-+
-+    case DFmode:
-+      return gen_load_pairdf (reg1, mem1, reg2, mem2);
-+
-+    default:
-+      gcc_unreachable ();
-+    }
-+}
-+
-+
-+static void
-+aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
-+			   unsigned start, unsigned limit, bool skip_wb)
-+{
-+  rtx insn;
-+  rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
-+						 ? gen_frame_mem : gen_rtx_MEM);
-   unsigned regno;
-   unsigned regno2;
--  rtx insn;
--  rtx (*gen_mem_ref)(enum machine_mode, rtx)
--    = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
- 
--
--  for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
-+  for (regno = aarch64_next_callee_save (start, limit);
-+       regno <= limit;
-+       regno = aarch64_next_callee_save (regno + 1, limit))
-     {
--      if (aarch64_register_saved_on_entry (regno))
--	{
--	  rtx mem;
--	  mem = gen_mem_ref (DFmode,
--			     plus_constant (Pmode,
--					    base_rtx,
--					    start_offset));
-+      rtx reg, mem;
-+      HOST_WIDE_INT offset;
- 
--	  for (regno2 = regno + 1;
--	       regno2 <= V31_REGNUM
--		 && !aarch64_register_saved_on_entry (regno2);
--	       regno2++)
--	    {
--	      /* Empty loop.  */
--	    }
--	  if (regno2 <= V31_REGNUM &&
--	      aarch64_register_saved_on_entry (regno2))
--	    {
--	      rtx mem2;
--	      /* Next highest register to be saved.  */
--	      mem2 = gen_mem_ref (DFmode,
--				  plus_constant
--				  (Pmode,
--				   base_rtx,
--				   start_offset + increment));
--	      if (restore == false)
--		{
--		  insn = emit_insn
--		    ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
--					mem2, gen_rtx_REG (DFmode, regno2)));
-+      if (skip_wb
-+	  && (regno == cfun->machine->frame.wb_candidate1
-+	      || regno == cfun->machine->frame.wb_candidate2))
-+	continue;
- 
--		}
--	      else
--		{
--		  insn = emit_insn
--		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
--				       gen_rtx_REG (DFmode, regno2), mem2));
-+      reg = gen_rtx_REG (mode, regno);
-+      offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+      mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-+					      offset));
- 
--		  add_reg_note (insn, REG_CFA_RESTORE,
--				gen_rtx_REG (DFmode, regno));
--		  add_reg_note (insn, REG_CFA_RESTORE,
--				gen_rtx_REG (DFmode, regno2));
--		}
-+      regno2 = aarch64_next_callee_save (regno + 1, limit);
- 
--		  /* The first part of a frame-related parallel insn
--		     is always assumed to be relevant to the frame
--		     calculations; subsequent parts, are only
--		     frame-related if explicitly marked.  */
--	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
--	      regno = regno2;
--	      start_offset += increment * 2;
--	    }
--	  else
--	    {
--	      if (restore == false)
--		insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
--	      else
--		{
--		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
--		  add_reg_note (insn, REG_CFA_RESTORE,
--				gen_rtx_REG (DImode, regno));
--		}
--	      start_offset += increment;
--	    }
--	  RTX_FRAME_RELATED_P (insn) = 1;
-+      if (regno2 <= limit
-+	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-+	      == cfun->machine->frame.reg_offset[regno2]))
-+
-+	{
-+	  rtx reg2 = gen_rtx_REG (mode, regno2);
-+	  rtx mem2;
-+
-+	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-+	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-+						   offset));
-+	  insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
-+						    reg2));
-+
-+	  /* The first part of a frame-related parallel insn is
-+	     always assumed to be relevant to the frame
-+	     calculations; subsequent parts, are only
-+	     frame-related if explicitly marked.  */
-+	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
-+	  regno = regno2;
- 	}
-+      else
-+	insn = emit_move_insn (mem, reg);
-+
-+      RTX_FRAME_RELATED_P (insn) = 1;
-     }
--
- }
- 
--
--/* offset from the stack pointer of where the saves and
--   restore's have to happen.  */
- static void
--aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
--					    bool restore)
-+aarch64_restore_callee_saves (enum machine_mode mode,
-+			      HOST_WIDE_INT start_offset, unsigned start,
-+			      unsigned limit, bool skip_wb, rtx *cfi_ops)
- {
--  rtx insn;
-   rtx base_rtx = stack_pointer_rtx;
--  HOST_WIDE_INT start_offset = offset;
--  HOST_WIDE_INT increment = UNITS_PER_WORD;
--  rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
--  unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
-+  rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
-+						 ? gen_frame_mem : gen_rtx_MEM);
-   unsigned regno;
-   unsigned regno2;
-+  HOST_WIDE_INT offset;
- 
--  for (regno = R0_REGNUM; regno <= limit; regno++)
-+  for (regno = aarch64_next_callee_save (start, limit);
-+       regno <= limit;
-+       regno = aarch64_next_callee_save (regno + 1, limit))
-     {
--      if (aarch64_register_saved_on_entry (regno))
--	{
--	  rtx mem;
--	  mem = gen_mem_ref (Pmode,
--			     plus_constant (Pmode,
--					    base_rtx,
--					    start_offset));
-+      rtx reg, mem;
- 
--	  for (regno2 = regno + 1;
--	       regno2 <= limit
--		 && !aarch64_register_saved_on_entry (regno2);
--	       regno2++)
--	    {
--	      /* Empty loop.  */
--	    }
--	  if (regno2 <= limit &&
--	      aarch64_register_saved_on_entry (regno2))
--	    {
--	      rtx mem2;
--	      /* Next highest register to be saved.  */
--	      mem2 = gen_mem_ref (Pmode,
--				  plus_constant
--				  (Pmode,
--				   base_rtx,
--				   start_offset + increment));
--	      if (restore == false)
--		{
--		  insn = emit_insn
--		    ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
--					mem2, gen_rtx_REG (DImode, regno2)));
-+      if (skip_wb
-+	  && (regno == cfun->machine->frame.wb_candidate1
-+	      || regno == cfun->machine->frame.wb_candidate2))
-+	continue;
- 
--		}
--	      else
--		{
--		  insn = emit_insn
--		    ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
--				     gen_rtx_REG (DImode, regno2), mem2));
-+      reg = gen_rtx_REG (mode, regno);
-+      offset = start_offset + cfun->machine->frame.reg_offset[regno];
-+      mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
- 
--		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
--		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
--		}
-+      regno2 = aarch64_next_callee_save (regno + 1, limit);
- 
--		  /* The first part of a frame-related parallel insn
--		     is always assumed to be relevant to the frame
--		     calculations; subsequent parts, are only
--		     frame-related if explicitly marked.  */
--	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
--					    1)) = 1;
--	      regno = regno2;
--	      start_offset += increment * 2;
--	    }
--	  else
--	    {
--	      if (restore == false)
--		insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
--	      else
--		{
--		  insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
--		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
--		}
--	      start_offset += increment;
--	    }
--	  RTX_FRAME_RELATED_P (insn) = 1;
-+      if (regno2 <= limit
-+	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
-+	      == cfun->machine->frame.reg_offset[regno2]))
-+	{
-+	  rtx reg2 = gen_rtx_REG (mode, regno2);
-+	  rtx mem2;
-+
-+	  offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-+	  mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
-+	  emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
-+
-+	  *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
-+	  regno = regno2;
- 	}
-+      else
-+	emit_move_insn (reg, mem);
-+      *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
-     }
--
--  aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
--
- }
- 
- /* AArch64 stack frames generated by this compiler look like:
-@@ -1986,37 +2243,35 @@
- 	|                               |
- 	|  incoming stack arguments     |
- 	|                               |
--	+-------------------------------+ <-- arg_pointer_rtx
--	|                               |
-+	+-------------------------------+
-+	|                               | <-- incoming stack pointer (aligned)
- 	|  callee-allocated save area   |
- 	|  for register varargs         |
- 	|                               |
--	+-------------------------------+ <-- frame_pointer_rtx
-+	+-------------------------------+
-+	|  local variables              | <-- frame_pointer_rtx
- 	|                               |
--	|  local variables              |
--	|                               |
- 	+-------------------------------+
- 	|  padding0                     | \
- 	+-------------------------------+  |
--	|                               |  |
--	|                               |  |
- 	|  callee-saved registers       |  | frame.saved_regs_size
--	|                               |  |
- 	+-------------------------------+  |
- 	|  LR'                          |  |
- 	+-------------------------------+  |
--	|  FP'                          | /
--      P +-------------------------------+ <-- hard_frame_pointer_rtx
-+	|  FP'                          | / <- hard_frame_pointer_rtx (aligned)
-+        +-------------------------------+
- 	|  dynamic allocation           |
- 	+-------------------------------+
--	|                               |
--	|  outgoing stack arguments     |
--	|                               |
--	+-------------------------------+ <-- stack_pointer_rtx
-+	|  padding                      |
-+	+-------------------------------+
-+	|  outgoing stack arguments     | <-- arg_pointer
-+        |                               |
-+	+-------------------------------+
-+	|                               | <-- stack_pointer_rtx (aligned)
- 
--   Dynamic stack allocations such as alloca insert data at point P.
--   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
--   hard_frame_pointer_rtx unchanged.  */
-+   Dynamic stack allocations via alloca() decrease stack_pointer_rtx
-+   but leave frame_pointer_rtx and hard_frame_pointer_rtx
-+   unchanged.  */
- 
- /* Generate the prologue instructions for entry into a function.
-    Establish the stack frame by decreasing the stack pointer with a
-@@ -2034,27 +2289,20 @@
- 
-      sub sp, sp, <final_adjustment_if_any>
-   */
--  HOST_WIDE_INT original_frame_size;	/* local variables + vararg save */
-   HOST_WIDE_INT frame_size, offset;
--  HOST_WIDE_INT fp_offset;		/* FP offset from SP */
-+  HOST_WIDE_INT fp_offset;		/* Offset from hard FP to SP.  */
-+  HOST_WIDE_INT hard_fp_offset;
-   rtx insn;
- 
-   aarch64_layout_frame ();
--  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
--  gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
--	      && (cfun->stdarg || !cfun->machine->saved_varargs_size));
--  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
--		+ crtl->outgoing_args_size);
--  offset = frame_size = AARCH64_ROUND_UP (frame_size,
--					  STACK_BOUNDARY / BITS_PER_UNIT);
- 
-+  offset = frame_size = cfun->machine->frame.frame_size;
-+  hard_fp_offset = cfun->machine->frame.hard_fp_offset;
-+  fp_offset = frame_size - hard_fp_offset;
-+
-   if (flag_stack_usage_info)
-     current_function_static_stack_size = frame_size;
- 
--  fp_offset = (offset
--	       - original_frame_size
--	       - cfun->machine->frame.saved_regs_size);
--
-   /* Store pairs and load pairs have a range only -512 to 504.  */
-   if (offset >= 512)
-     {
-@@ -2064,7 +2312,7 @@
- 	 register area.  This will allow the pre-index write-back
- 	 store pair instructions to be used for setting up the stack frame
- 	 efficiently.  */
--      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
-+      offset = hard_fp_offset;
-       if (offset >= 512)
- 	offset = cfun->machine->frame.saved_regs_size;
- 
-@@ -2075,29 +2323,29 @@
- 	{
- 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
- 	  emit_move_insn (op0, GEN_INT (-frame_size));
--	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
--	  aarch64_set_frame_expr (gen_rtx_SET
--				  (Pmode, stack_pointer_rtx,
--				   plus_constant (Pmode,
--						  stack_pointer_rtx,
--						  -frame_size)));
-+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
-+
-+	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
-+			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
-+				     plus_constant (Pmode, stack_pointer_rtx,
-+						    -frame_size)));
-+	  RTX_FRAME_RELATED_P (insn) = 1;
- 	}
-       else if (frame_size > 0)
- 	{
--	  if ((frame_size & 0xfff) != frame_size)
-+	  int hi_ofs = frame_size & 0xfff000;
-+	  int lo_ofs = frame_size & 0x000fff;
-+
-+	  if (hi_ofs)
- 	    {
- 	      insn = emit_insn (gen_add2_insn
--				(stack_pointer_rtx,
--				 GEN_INT (-(frame_size
--					    & ~(HOST_WIDE_INT)0xfff))));
-+				(stack_pointer_rtx, GEN_INT (-hi_ofs)));
- 	      RTX_FRAME_RELATED_P (insn) = 1;
- 	    }
--	  if ((frame_size & 0xfff) != 0)
-+	  if (lo_ofs)
- 	    {
- 	      insn = emit_insn (gen_add2_insn
--				(stack_pointer_rtx,
--				 GEN_INT (-(frame_size
--					    & (HOST_WIDE_INT)0xfff))));
-+				(stack_pointer_rtx, GEN_INT (-lo_ofs)));
- 	      RTX_FRAME_RELATED_P (insn) = 1;
- 	    }
- 	}
-@@ -2107,12 +2355,11 @@
- 
-   if (offset > 0)
-     {
--      /* Save the frame pointer and lr if the frame pointer is needed
--	 first.  Make the frame pointer point to the location of the
--	 old frame pointer on the stack.  */
-+      bool skip_wb = false;
-+
-       if (frame_pointer_needed)
- 	{
--	  rtx mem_fp, mem_lr;
-+	  skip_wb = true;
- 
- 	  if (fp_offset)
- 	    {
-@@ -2119,67 +2366,52 @@
- 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- 					       GEN_INT (-offset)));
- 	      RTX_FRAME_RELATED_P (insn) = 1;
--	      aarch64_set_frame_expr (gen_rtx_SET
--				      (Pmode, stack_pointer_rtx,
--				       gen_rtx_MINUS (Pmode,
--						      stack_pointer_rtx,
--						      GEN_INT (offset))));
--	      mem_fp = gen_frame_mem (DImode,
--				      plus_constant (Pmode,
--						     stack_pointer_rtx,
--						     fp_offset));
--	      mem_lr = gen_frame_mem (DImode,
--				      plus_constant (Pmode,
--						     stack_pointer_rtx,
--						     fp_offset
--						     + UNITS_PER_WORD));
--	      insn = emit_insn (gen_store_pairdi (mem_fp,
--						  hard_frame_pointer_rtx,
--						  mem_lr,
--						  gen_rtx_REG (DImode,
--							       LR_REGNUM)));
-+
-+	      aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
-+					 R30_REGNUM, false);
- 	    }
- 	  else
--	    {
--	      insn = emit_insn (gen_storewb_pairdi_di
--				(stack_pointer_rtx, stack_pointer_rtx,
--				 hard_frame_pointer_rtx,
--				 gen_rtx_REG (DImode, LR_REGNUM),
--				 GEN_INT (-offset),
--				 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
--	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
--	    }
-+	    aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
- 
--	  /* The first part of a frame-related parallel insn is always
--	     assumed to be relevant to the frame calculations;
--	     subsequent parts, are only frame-related if explicitly
--	     marked.  */
--	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
--	  RTX_FRAME_RELATED_P (insn) = 1;
--
- 	  /* Set up frame pointer to point to the location of the
- 	     previous frame pointer on the stack.  */
- 	  insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
- 					   stack_pointer_rtx,
- 					   GEN_INT (fp_offset)));
--	  aarch64_set_frame_expr (gen_rtx_SET
--				  (Pmode, hard_frame_pointer_rtx,
--				   plus_constant (Pmode,
--						  stack_pointer_rtx,
--						  fp_offset)));
- 	  RTX_FRAME_RELATED_P (insn) = 1;
--	  insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
--					   hard_frame_pointer_rtx));
-+	  emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- 	}
-       else
- 	{
--	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
--					   GEN_INT (-offset)));
--	  RTX_FRAME_RELATED_P (insn) = 1;
-+	  unsigned reg1 = cfun->machine->frame.wb_candidate1;
-+	  unsigned reg2 = cfun->machine->frame.wb_candidate2;
-+
-+	  if (fp_offset
-+	      || reg1 == FIRST_PSEUDO_REGISTER
-+	      || (reg2 == FIRST_PSEUDO_REGISTER
-+		  && offset >= 256))
-+	    {
-+	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-+					       GEN_INT (-offset)));
-+	      RTX_FRAME_RELATED_P (insn) = 1;
-+	    }
-+	  else
-+	    {
-+	      enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
-+
-+	      skip_wb = true;
-+
-+	      if (reg2 == FIRST_PSEUDO_REGISTER)
-+		aarch64_pushwb_single_reg (mode1, reg1, offset);
-+	      else
-+		aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
-+	    }
- 	}
- 
--      aarch64_save_or_restore_callee_save_registers
--	(fp_offset + cfun->machine->frame.hardfp_offset, 0);
-+      aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
-+				 skip_wb);
-+      aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
-+				 skip_wb);
-     }
- 
-   /* when offset >= 512,
-@@ -2200,28 +2432,21 @@
- void
- aarch64_expand_epilogue (bool for_sibcall)
- {
--  HOST_WIDE_INT original_frame_size, frame_size, offset;
-+  HOST_WIDE_INT frame_size, offset;
-   HOST_WIDE_INT fp_offset;
-+  HOST_WIDE_INT hard_fp_offset;
-   rtx insn;
--  rtx cfa_reg;
- 
-   aarch64_layout_frame ();
--  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
--  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
--		+ crtl->outgoing_args_size);
--  offset = frame_size = AARCH64_ROUND_UP (frame_size,
--					  STACK_BOUNDARY / BITS_PER_UNIT);
- 
--  fp_offset = (offset
--	       - original_frame_size
--	       - cfun->machine->frame.saved_regs_size);
-+  offset = frame_size = cfun->machine->frame.frame_size;
-+  hard_fp_offset = cfun->machine->frame.hard_fp_offset;
-+  fp_offset = frame_size - hard_fp_offset;
- 
--  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
--
-   /* Store pairs and load pairs have a range only -512 to 504.  */
-   if (offset >= 512)
-     {
--      offset = original_frame_size + cfun->machine->frame.saved_regs_size;
-+      offset = hard_fp_offset;
-       if (offset >= 512)
- 	offset = cfun->machine->frame.saved_regs_size;
- 
-@@ -2247,72 +2472,51 @@
-     {
-       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
- 				       hard_frame_pointer_rtx,
--				       GEN_INT (- fp_offset)));
--      RTX_FRAME_RELATED_P (insn) = 1;
--      /* As SP is set to (FP - fp_offset), according to the rules in
--	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
--	 from the value of SP from now on.  */
--      cfa_reg = stack_pointer_rtx;
-+				       GEN_INT (0)));
-+      offset = offset - fp_offset;
-     }
- 
--  aarch64_save_or_restore_callee_save_registers
--    (fp_offset + cfun->machine->frame.hardfp_offset, 1);
--
--  /* Restore the frame pointer and lr if the frame pointer is needed.  */
-   if (offset > 0)
-     {
-+      unsigned reg1 = cfun->machine->frame.wb_candidate1;
-+      unsigned reg2 = cfun->machine->frame.wb_candidate2;
-+      bool skip_wb = true;
-+      rtx cfi_ops = NULL;
-+
-       if (frame_pointer_needed)
-+	fp_offset = 0;
-+      else if (fp_offset
-+	       || reg1 == FIRST_PSEUDO_REGISTER
-+	       || (reg2 == FIRST_PSEUDO_REGISTER
-+		   && offset >= 256))
-+	skip_wb = false;
-+
-+      aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
-+				    skip_wb, &cfi_ops);
-+      aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
-+				    skip_wb, &cfi_ops);
-+
-+      if (skip_wb)
- 	{
--	  rtx mem_fp, mem_lr;
-+	  enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
-+	  rtx rreg1 = gen_rtx_REG (mode1, reg1);
- 
--	  if (fp_offset)
-+	  cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
-+	  if (reg2 == FIRST_PSEUDO_REGISTER)
- 	    {
--	      mem_fp = gen_frame_mem (DImode,
--				      plus_constant (Pmode,
--						     stack_pointer_rtx,
--						     fp_offset));
--	      mem_lr = gen_frame_mem (DImode,
--				      plus_constant (Pmode,
--						     stack_pointer_rtx,
--						     fp_offset
--						     + UNITS_PER_WORD));
--	      insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
--						 mem_fp,
--						 gen_rtx_REG (DImode,
--							      LR_REGNUM),
--						 mem_lr));
-+	      rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
-+	      mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
-+	      mem = gen_rtx_MEM (mode1, mem);
-+	      insn = emit_move_insn (rreg1, mem);
- 	    }
- 	  else
- 	    {
--	      insn = emit_insn (gen_loadwb_pairdi_di
--				(stack_pointer_rtx,
--				 stack_pointer_rtx,
--				 hard_frame_pointer_rtx,
--				 gen_rtx_REG (DImode, LR_REGNUM),
--				 GEN_INT (offset),
--				 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
--	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
--	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
--			    (gen_rtx_SET (Pmode, stack_pointer_rtx,
--					  plus_constant (Pmode, cfa_reg,
--							 offset))));
--	    }
-+	      rtx rreg2 = gen_rtx_REG (mode1, reg2);
- 
--	  /* The first part of a frame-related parallel insn
--	     is always assumed to be relevant to the frame
--	     calculations; subsequent parts, are only
--	     frame-related if explicitly marked.  */
--	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
--	  RTX_FRAME_RELATED_P (insn) = 1;
--	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
--	  add_reg_note (insn, REG_CFA_RESTORE,
--			gen_rtx_REG (DImode, LR_REGNUM));
--
--	  if (fp_offset)
--	    {
--	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
--					       GEN_INT (offset)));
--	      RTX_FRAME_RELATED_P (insn) = 1;
-+	      cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
-+	      insn = emit_insn (aarch64_gen_loadwb_pair
-+				(mode1, stack_pointer_rtx, rreg1,
-+				 rreg2, offset));
- 	    }
- 	}
-       else
-@@ -2319,79 +2523,57 @@
- 	{
- 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- 					   GEN_INT (offset)));
--	  RTX_FRAME_RELATED_P (insn) = 1;
- 	}
--    }
- 
--  /* Stack adjustment for exception handler.  */
--  if (crtl->calls_eh_return)
--    {
--      /* We need to unwind the stack by the offset computed by
--	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
--	 based on SP.  Ideally we would update the SP and define the
--	 CFA along the lines of:
--
--	 SP = SP + EH_RETURN_STACKADJ_RTX
--	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
--
--	 However the dwarf emitter only understands a constant
--	 register offset.
--
--	 The solution chosen here is to use the otherwise unused IP0
--	 as a temporary register to hold the current SP value.  The
--	 CFA is described using IP0 then SP is modified.  */
--
--      rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
--
--      insn = emit_move_insn (ip0, stack_pointer_rtx);
--      add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
-+      /* Reset the CFA to be SP + FRAME_SIZE.  */
-+      rtx new_cfa = stack_pointer_rtx;
-+      if (frame_size > 0)
-+	new_cfa = plus_constant (Pmode, new_cfa, frame_size);
-+      cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
-+      REG_NOTES (insn) = cfi_ops;
-       RTX_FRAME_RELATED_P (insn) = 1;
--
--      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
--
--      /* Ensure the assignment to IP0 does not get optimized away.  */
--      emit_use (ip0);
-     }
- 
--  if (frame_size > -1)
-+  if (frame_size > 0)
-     {
-       if (frame_size >= 0x1000000)
- 	{
- 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
- 	  emit_move_insn (op0, GEN_INT (frame_size));
--	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
--	  aarch64_set_frame_expr (gen_rtx_SET
--				  (Pmode, stack_pointer_rtx,
--				   plus_constant (Pmode,
--						  stack_pointer_rtx,
--						  frame_size)));
-+	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
- 	}
--      else if (frame_size > 0)
-+      else
- 	{
--	  if ((frame_size & 0xfff) != 0)
-+          int hi_ofs = frame_size & 0xfff000;
-+          int lo_ofs = frame_size & 0x000fff;
-+
-+	  if (hi_ofs && lo_ofs)
- 	    {
- 	      insn = emit_insn (gen_add2_insn
--				(stack_pointer_rtx,
--				 GEN_INT ((frame_size
--					   & (HOST_WIDE_INT) 0xfff))));
-+				(stack_pointer_rtx, GEN_INT (hi_ofs)));
- 	      RTX_FRAME_RELATED_P (insn) = 1;
-+	      frame_size = lo_ofs;
- 	    }
--	  if ((frame_size & 0xfff) != frame_size)
--	    {
--	      insn = emit_insn (gen_add2_insn
--				(stack_pointer_rtx,
--				 GEN_INT ((frame_size
--					   & ~ (HOST_WIDE_INT) 0xfff))));
--	      RTX_FRAME_RELATED_P (insn) = 1;
--	    }
-+	  insn = emit_insn (gen_add2_insn
-+			    (stack_pointer_rtx, GEN_INT (frame_size)));
- 	}
- 
--        aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
--					     plus_constant (Pmode,
--							    stack_pointer_rtx,
--							    offset)));
-+      /* Reset the CFA to be SP + 0.  */
-+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
-+      RTX_FRAME_RELATED_P (insn) = 1;
-     }
- 
-+  /* Stack adjustment for exception handler.  */
-+  if (crtl->calls_eh_return)
-+    {
-+      /* We need to unwind the stack by the offset computed by
-+	 EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
-+	 to be SP; letting the CFA move during this adjustment
-+	 is just as correct as retaining the CFA from the body
-+	 of the function.  Therefore, do nothing special.  */
-+      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
-+    }
-+
-   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
-   if (!for_sibcall)
-     emit_jump_insn (ret_rtx);
-@@ -2403,17 +2585,13 @@
- rtx
- aarch64_final_eh_return_addr (void)
- {
--  HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
-+  HOST_WIDE_INT fp_offset;
-+
-   aarch64_layout_frame ();
--  original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
--  frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
--		+ crtl->outgoing_args_size);
--  offset = frame_size = AARCH64_ROUND_UP (frame_size,
--					  STACK_BOUNDARY / BITS_PER_UNIT);
--  fp_offset = offset
--    - original_frame_size
--    - cfun->machine->frame.saved_regs_size;
- 
-+  fp_offset = cfun->machine->frame.frame_size
-+	      - cfun->machine->frame.hard_fp_offset;
-+
-   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-     return gen_rtx_REG (DImode, LR_REGNUM);
- 
-@@ -2449,12 +2627,22 @@
- 				       - 2 * UNITS_PER_WORD));
- }
- 
--/* Output code to build up a constant in a register.  */
--static void
--aarch64_build_constant (int regnum, HOST_WIDE_INT val)
-+/* Possibly output code to build up a constant in a register.  For
-+   the benefit of the costs infrastructure, returns the number of
-+   instructions which would be emitted.  GENERATE inhibits or
-+   enables code generation.  */
-+
-+static int
-+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
- {
-+  int insns = 0;
-+
-   if (aarch64_bitmask_imm (val, DImode))
--    emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
-+    {
-+      if (generate)
-+	emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
-+      insns = 1;
-+    }
-   else
-     {
-       int i;
-@@ -2485,15 +2673,19 @@
- 	 the same.  */
-       if (ncount < zcount)
- 	{
--	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
--			  GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
-+	  if (generate)
-+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
-+			    GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
- 	  tval = 0xffff;
-+	  insns++;
- 	}
-       else
- 	{
--	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
--			  GEN_INT (val & 0xffff));
-+	  if (generate)
-+	    emit_move_insn (gen_rtx_REG (Pmode, regnum),
-+			    GEN_INT (val & 0xffff));
- 	  tval = 0;
-+	  insns++;
- 	}
- 
-       val >>= 16;
-@@ -2501,11 +2693,17 @@
-       for (i = 16; i < 64; i += 16)
- 	{
- 	  if ((val & 0xffff) != tval)
--	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
--				       GEN_INT (i), GEN_INT (val & 0xffff)));
-+	    {
-+	      if (generate)
-+		emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
-+					   GEN_INT (i),
-+					   GEN_INT (val & 0xffff)));
-+	      insns++;
-+	    }
- 	  val >>= 16;
- 	}
-     }
-+  return insns;
- }
- 
- static void
-@@ -2520,7 +2718,7 @@
- 
-   if (mdelta >= 4096 * 4096)
-     {
--      aarch64_build_constant (scratchreg, delta);
-+      (void) aarch64_build_constant (scratchreg, delta, true);
-       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
-     }
-   else if (mdelta > 0)
-@@ -2594,7 +2792,7 @@
- 	  addr = plus_constant (Pmode, temp0, vcall_offset);
-       else
- 	{
--	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
-+	  (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
- 	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
- 	}
- 
-@@ -3011,8 +3209,8 @@
-   return false;
- }
- 
--static inline bool
--offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
-+bool
-+aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
- {
-   return (offset >= -64 * GET_MODE_SIZE (mode)
- 	  && offset < 64 * GET_MODE_SIZE (mode)
-@@ -3046,11 +3244,11 @@
-   enum rtx_code code = GET_CODE (x);
-   rtx op0, op1;
-   bool allow_reg_index_p =
--    outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
--
-+    outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
-+			       || aarch64_vector_mode_supported_p (mode));
-   /* Don't support anything other than POST_INC or REG addressing for
-      AdvSIMD.  */
--  if (aarch64_vector_mode_p (mode)
-+  if (aarch64_vect_struct_mode_p (mode)
-       && (code != POST_INC && code != REG))
-     return false;
- 
-@@ -3066,6 +3264,21 @@
-     case PLUS:
-       op0 = XEXP (x, 0);
-       op1 = XEXP (x, 1);
-+
-+      if (! strict_p
-+	  && REG_P (op0)
-+	  && (op0 == virtual_stack_vars_rtx
-+	      || op0 == frame_pointer_rtx
-+	      || op0 == arg_pointer_rtx)
-+	  && CONST_INT_P (op1))
-+	{
-+	  info->type = ADDRESS_REG_IMM;
-+	  info->base = op0;
-+	  info->offset = op1;
-+
-+	  return true;
-+	}
-+
-       if (GET_MODE_SIZE (mode) != 0
- 	  && CONST_INT_P (op1)
- 	  && aarch64_base_register_rtx_p (op0, strict_p))
-@@ -3084,12 +3297,12 @@
- 	     We conservatively require an offset representable in either mode.
- 	   */
- 	  if (mode == TImode || mode == TFmode)
--	    return (offset_7bit_signed_scaled_p (mode, offset)
-+	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
- 		    && offset_9bit_signed_unscaled_p (mode, offset));
- 
- 	  if (outer_code == PARALLEL)
- 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
--		    && offset_7bit_signed_scaled_p (mode, offset));
-+		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
- 	  else
- 	    return (offset_9bit_signed_unscaled_p (mode, offset)
- 		    || offset_12bit_unsigned_scaled_p (mode, offset));
-@@ -3144,12 +3357,12 @@
- 	     We conservatively require an offset representable in either mode.
- 	   */
- 	  if (mode == TImode || mode == TFmode)
--	    return (offset_7bit_signed_scaled_p (mode, offset)
-+	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
- 		    && offset_9bit_signed_unscaled_p (mode, offset));
- 
- 	  if (outer_code == PARALLEL)
- 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
--		    && offset_7bit_signed_scaled_p (mode, offset));
-+		    && aarch64_offset_7bit_signed_scaled_p (mode, offset));
- 	  else
- 	    return offset_9bit_signed_unscaled_p (mode, offset);
- 	}
-@@ -3333,7 +3546,7 @@
-      the comparison will have to be swapped when we emit the assembly
-      code.  */
-   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
--      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
-+      && (REG_P (y) || GET_CODE (y) == SUBREG)
-       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
- 	  || GET_CODE (x) == LSHIFTRT
- 	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
-@@ -3342,7 +3555,7 @@
-   /* Similarly for a negated operand, but we can only do this for
-      equalities.  */
-   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
--      && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
-+      && (REG_P (y) || GET_CODE (y) == SUBREG)
-       && (code == EQ || code == NE)
-       && GET_CODE (x) == NEG)
-     return CC_Zmode;
-@@ -3359,7 +3572,7 @@
-   return CCmode;
- }
- 
--static unsigned
-+int
- aarch64_get_condition_code (rtx x)
- {
-   enum machine_mode mode = GET_MODE (XEXP (x, 0));
-@@ -3386,7 +3599,7 @@
- 	case UNLE: return AARCH64_LE;
- 	case UNGT: return AARCH64_HI;
- 	case UNGE: return AARCH64_PL;
--	default: gcc_unreachable ();
-+	default: return -1;
- 	}
-       break;
- 
-@@ -3403,7 +3616,7 @@
- 	case GTU: return AARCH64_HI;
- 	case LEU: return AARCH64_LS;
- 	case LTU: return AARCH64_CC;
--	default: gcc_unreachable ();
-+	default: return -1;
- 	}
-       break;
- 
-@@ -3422,7 +3635,7 @@
- 	case GTU: return AARCH64_CC;
- 	case LEU: return AARCH64_CS;
- 	case LTU: return AARCH64_HI;
--	default: gcc_unreachable ();
-+	default: return -1;
- 	}
-       break;
- 
-@@ -3433,7 +3646,7 @@
- 	case EQ: return AARCH64_EQ;
- 	case GE: return AARCH64_PL;
- 	case LT: return AARCH64_MI;
--	default: gcc_unreachable ();
-+	default: return -1;
- 	}
-       break;
- 
-@@ -3442,16 +3655,46 @@
- 	{
- 	case NE: return AARCH64_NE;
- 	case EQ: return AARCH64_EQ;
--	default: gcc_unreachable ();
-+	default: return -1;
- 	}
-       break;
- 
-     default:
--      gcc_unreachable ();
-+      return -1;
-       break;
-     }
- }
- 
-+bool
-+aarch64_const_vec_all_same_in_range_p (rtx x,
-+				  HOST_WIDE_INT minval,
-+				  HOST_WIDE_INT maxval)
-+{
-+  HOST_WIDE_INT firstval;
-+  int count, i;
-+
-+  if (GET_CODE (x) != CONST_VECTOR
-+      || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
-+    return false;
-+
-+  firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
-+  if (firstval < minval || firstval > maxval)
-+    return false;
-+
-+  count = CONST_VECTOR_NUNITS (x);
-+  for (i = 1; i < count; i++)
-+    if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
-+      return false;
-+
-+  return true;
-+}
-+
-+bool
-+aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
-+{
-+  return aarch64_const_vec_all_same_in_range_p (x, val, val);
-+}
-+
- static unsigned
- bit_count (unsigned HOST_WIDE_INT value)
- {
-@@ -3502,7 +3745,7 @@
-       {
- 	int n;
- 
--	if (GET_CODE (x) != CONST_INT
-+	if (!CONST_INT_P (x)
- 	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
- 	  {
- 	    output_operand_lossage ("invalid operand for '%%%c'", code);
-@@ -3532,7 +3775,7 @@
- 	int n;
- 
- 	/* Print N such that 2^N == X.  */
--	if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
-+	if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
- 	  {
- 	    output_operand_lossage ("invalid operand for '%%%c'", code);
- 	    return;
-@@ -3544,7 +3787,7 @@
- 
-     case 'P':
-       /* Print the number of non-zero bits in X (a const_int).  */
--      if (GET_CODE (x) != CONST_INT)
-+      if (!CONST_INT_P (x))
- 	{
- 	  output_operand_lossage ("invalid operand for '%%%c'", code);
- 	  return;
-@@ -3555,7 +3798,7 @@
- 
-     case 'H':
-       /* Print the higher numbered register of a pair (TImode) of regs.  */
--      if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
-+      if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
- 	{
- 	  output_operand_lossage ("invalid operand for '%%%c'", code);
- 	  return;
-@@ -3565,39 +3808,48 @@
-       break;
- 
-     case 'm':
--      /* Print a condition (eq, ne, etc).  */
-+      {
-+        int cond_code;
-+        /* Print a condition (eq, ne, etc).  */
- 
--      /* CONST_TRUE_RTX means always -- that's the default.  */
--      if (x == const_true_rtx)
--	return;
--
--      if (!COMPARISON_P (x))
--	{
--	  output_operand_lossage ("invalid operand for '%%%c'", code);
-+        /* CONST_TRUE_RTX means always -- that's the default.  */
-+        if (x == const_true_rtx)
- 	  return;
--	}
- 
--      fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
-+        if (!COMPARISON_P (x))
-+	  {
-+	    output_operand_lossage ("invalid operand for '%%%c'", code);
-+	    return;
-+	  }
-+
-+        cond_code = aarch64_get_condition_code (x);
-+        gcc_assert (cond_code >= 0);
-+        fputs (aarch64_condition_codes[cond_code], f);
-+      }
-       break;
- 
-     case 'M':
--      /* Print the inverse of a condition (eq <-> ne, etc).  */
-+      {
-+        int cond_code;
-+        /* Print the inverse of a condition (eq <-> ne, etc).  */
- 
--      /* CONST_TRUE_RTX means never -- that's the default.  */
--      if (x == const_true_rtx)
--	{
--	  fputs ("nv", f);
--	  return;
--	}
-+        /* CONST_TRUE_RTX means never -- that's the default.  */
-+        if (x == const_true_rtx)
-+	  {
-+	    fputs ("nv", f);
-+	    return;
-+	  }
- 
--      if (!COMPARISON_P (x))
--	{
--	  output_operand_lossage ("invalid operand for '%%%c'", code);
--	  return;
--	}
--
--      fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
--				  (aarch64_get_condition_code (x))], f);
-+        if (!COMPARISON_P (x))
-+	  {
-+	    output_operand_lossage ("invalid operand for '%%%c'", code);
-+	    return;
-+	  }
-+        cond_code = aarch64_get_condition_code (x);
-+        gcc_assert (cond_code >= 0);
-+        fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
-+                                       (cond_code)], f);
-+      }
-       break;
- 
-     case 'b':
-@@ -3629,7 +3881,7 @@
- 
-     case 'X':
-       /* Print bottom 16 bits of integer constant in hex.  */
--      if (GET_CODE (x) != CONST_INT)
-+      if (!CONST_INT_P (x))
- 	{
- 	  output_operand_lossage ("invalid operand for '%%%c'", code);
- 	  return;
-@@ -3694,9 +3946,10 @@
- 	case CONST_VECTOR:
- 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
- 	    {
--	      gcc_assert (aarch64_const_vec_all_same_int_p (x,
--							    HOST_WIDE_INT_MIN,
--							    HOST_WIDE_INT_MAX));
-+	      gcc_assert (
-+		  aarch64_const_vec_all_same_in_range_p (x,
-+							 HOST_WIDE_INT_MIN,
-+							 HOST_WIDE_INT_MAX));
- 	      asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
- 	    }
- 	  else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
-@@ -3839,34 +4092,34 @@
- 	if (addr.offset == const0_rtx)
- 	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
- 	else
--	  asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
- 		       INTVAL (addr.offset));
- 	return;
- 
-       case ADDRESS_REG_REG:
- 	if (addr.shift == 0)
--	  asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
- 		       reg_names [REGNO (addr.offset)]);
- 	else
--	  asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
- 		       reg_names [REGNO (addr.offset)], addr.shift);
- 	return;
- 
-       case ADDRESS_REG_UXTW:
- 	if (addr.shift == 0)
--	  asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
- 		       REGNO (addr.offset) - R0_REGNUM);
- 	else
--	  asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
- 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
- 	return;
- 
-       case ADDRESS_REG_SXTW:
- 	if (addr.shift == 0)
--	  asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
- 		       REGNO (addr.offset) - R0_REGNUM);
- 	else
--	  asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
-+	  asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
- 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
- 	return;
- 
-@@ -3874,27 +4127,27 @@
- 	switch (GET_CODE (x))
- 	  {
- 	  case PRE_INC:
--	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], 
-+	    asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
- 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
- 	    return;
- 	  case POST_INC:
--	    asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
-+	    asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
- 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
- 	    return;
- 	  case PRE_DEC:
--	    asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
-+	    asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
- 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
- 	    return;
- 	  case POST_DEC:
--	    asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
-+	    asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
- 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
- 	    return;
- 	  case PRE_MODIFY:
--	    asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
-+	    asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
- 			 INTVAL (addr.offset));
- 	    return;
- 	  case POST_MODIFY:
--	    asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
-+	    asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
- 			 INTVAL (addr.offset));
- 	    return;
- 	  default:
-@@ -3903,7 +4156,7 @@
- 	break;
- 
-       case ADDRESS_LO_SUM:
--	asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
-+	asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
- 	output_addr_const (f, addr.offset);
- 	asm_fprintf (f, "]");
- 	return;
-@@ -3954,7 +4207,7 @@
- aarch64_regno_regclass (unsigned regno)
- {
-   if (GP_REGNUM_P (regno))
--    return CORE_REGS;
-+    return GENERAL_REGS;
- 
-   if (regno == SP_REGNUM)
-     return STACK_REG;
-@@ -3969,6 +4222,47 @@
-   return NO_REGS;
- }
- 
-+static rtx
-+aarch64_legitimize_address (rtx x, rtx /* orig_x  */, enum machine_mode mode)
-+{
-+  /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
-+     where mask is selected by alignment and size of the offset.
-+     We try to pick as large a range for the offset as possible to
-+     maximize the chance of a CSE.  However, for aligned addresses
-+     we limit the range to 4k so that structures with different sized
-+     elements are likely to use the same base.  */
-+
-+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
-+    {
-+      HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
-+      HOST_WIDE_INT base_offset;
-+
-+      /* Does it look like we'll need a load/store-pair operation?  */
-+      if (GET_MODE_SIZE (mode) > 16
-+	  || mode == TImode)
-+	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-+		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
-+      /* For offsets aren't a multiple of the access size, the limit is
-+	 -256...255.  */
-+      else if (offset & (GET_MODE_SIZE (mode) - 1))
-+	base_offset = (offset + 0x100) & ~0x1ff;
-+      else
-+	base_offset = offset & ~0xfff;
-+
-+      if (base_offset == 0)
-+	return x;
-+
-+      offset -= base_offset;
-+      rtx base_reg = gen_reg_rtx (Pmode);
-+      rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
-+			   NULL_RTX);
-+      emit_move_insn (base_reg, val);
-+      x = plus_constant (Pmode, base_reg, offset);
-+    }
-+
-+  return x;
-+}
-+
- /* Try a machine-dependent way of reloading an illegitimate address
-    operand.  If we find one, push the reload and return the new rtx.  */
- 
-@@ -3980,8 +4274,8 @@
- {
-   rtx x = *x_p;
- 
--  /* Do not allow mem (plus (reg, const)) if vector mode.  */
--  if (aarch64_vector_mode_p (mode)
-+  /* Do not allow mem (plus (reg, const)) if vector struct mode.  */
-+  if (aarch64_vect_struct_mode_p (mode)
-       && GET_CODE (x) == PLUS
-       && REG_P (XEXP (x, 0))
-       && CONST_INT_P (XEXP (x, 1)))
-@@ -4105,12 +4399,12 @@
-   /* A TFmode or TImode memory access should be handled via an FP_REGS
-      because AArch64 has richer addressing modes for LDR/STR instructions
-      than LDP/STP instructions.  */
--  if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
-+  if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
-       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
-     return FP_REGS;
- 
-   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
--      return CORE_REGS;
-+      return GENERAL_REGS;
- 
-   return NO_REGS;
- }
-@@ -4142,43 +4436,28 @@
- HOST_WIDE_INT
- aarch64_initial_elimination_offset (unsigned from, unsigned to)
- {
--  HOST_WIDE_INT frame_size;
--  HOST_WIDE_INT offset;
--
-   aarch64_layout_frame ();
--  frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
--		+ crtl->outgoing_args_size
--		+ cfun->machine->saved_varargs_size);
- 
--   frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
--   offset = frame_size;
-+  if (to == HARD_FRAME_POINTER_REGNUM)
-+    {
-+      if (from == ARG_POINTER_REGNUM)
-+	return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
- 
--   if (to == HARD_FRAME_POINTER_REGNUM)
--     {
--       if (from == ARG_POINTER_REGNUM)
--	 return offset - crtl->outgoing_args_size;
-+      if (from == FRAME_POINTER_REGNUM)
-+	return (cfun->machine->frame.hard_fp_offset
-+		- cfun->machine->frame.saved_varargs_size);
-+    }
- 
--       if (from == FRAME_POINTER_REGNUM)
--	 return cfun->machine->frame.saved_regs_size + get_frame_size ();
--     }
-+  if (to == STACK_POINTER_REGNUM)
-+    {
-+      if (from == FRAME_POINTER_REGNUM)
-+	  return (cfun->machine->frame.frame_size
-+		  - cfun->machine->frame.saved_varargs_size);
-+    }
- 
--   if (to == STACK_POINTER_REGNUM)
--     {
--       if (from == FRAME_POINTER_REGNUM)
--         {
--           HOST_WIDE_INT elim = crtl->outgoing_args_size
--                              + cfun->machine->frame.saved_regs_size
--                              + get_frame_size ()
--                              - cfun->machine->frame.fp_lr_offset;
--           elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
--           return elim;
--         }
--     }
--
--   return offset;
-+  return cfun->machine->frame.frame_size;
- }
- 
--
- /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
-    previous frame.  */
- 
-@@ -4242,7 +4521,7 @@
- {
-   switch (regclass)
-     {
--    case CORE_REGS:
-+    case CALLER_SAVE_REGS:
-     case POINTER_REGS:
-     case GENERAL_REGS:
-     case ALL_REGS:
-@@ -4443,9 +4722,13 @@
- {
-   rtx op = x;
- 
-+  /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
-+     we can convert both to ROR during final output.  */
-   if ((GET_CODE (op) == ASHIFT
-        || GET_CODE (op) == ASHIFTRT
--       || GET_CODE (op) == LSHIFTRT)
-+       || GET_CODE (op) == LSHIFTRT
-+       || GET_CODE (op) == ROTATERT
-+       || GET_CODE (op) == ROTATE)
-       && CONST_INT_P (XEXP (op, 1)))
-     return XEXP (op, 0);
- 
-@@ -4457,12 +4740,12 @@
-   return x;
- }
- 
--/* Helper function for rtx cost calculation.  Strip a shift or extend
-+/* Helper function for rtx cost calculation.  Strip an extend
-    expression from X.  Returns the inner operand if successful, or the
-    original expression on failure.  We deal with a number of possible
-    canonicalization variations here.  */
- static rtx
--aarch64_strip_shift_or_extend (rtx x)
-+aarch64_strip_extend (rtx x)
- {
-   rtx op = x;
- 
-@@ -4469,6 +4752,7 @@
-   /* Zero and sign extraction of a widened value.  */
-   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
-       && XEXP (op, 2) == const0_rtx
-+      && GET_CODE (XEXP (op, 0)) == MULT
-       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
- 					 XEXP (op, 1)))
-     return XEXP (XEXP (op, 0), 0);
-@@ -4497,9 +4781,335 @@
-   if (op != x)
-     return op;
- 
--  return aarch64_strip_shift (x);
-+  return x;
- }
- 
-+/* Helper function for rtx cost calculation.  Calculate the cost of
-+   a MULT, which may be part of a multiply-accumulate rtx.  Return
-+   the calculated cost of the expression, recursing manually in to
-+   operands where needed.  */
-+
-+static int
-+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
-+{
-+  rtx op0, op1;
-+  const struct cpu_cost_table *extra_cost
-+    = aarch64_tune_params->insn_extra_cost;
-+  int cost = 0;
-+  bool maybe_fma = (outer == PLUS || outer == MINUS);
-+  enum machine_mode mode = GET_MODE (x);
-+
-+  gcc_checking_assert (code == MULT);
-+
-+  op0 = XEXP (x, 0);
-+  op1 = XEXP (x, 1);
-+
-+  if (VECTOR_MODE_P (mode))
-+    mode = GET_MODE_INNER (mode);
-+
-+  /* Integer multiply/fma.  */
-+  if (GET_MODE_CLASS (mode) == MODE_INT)
-+    {
-+      /* The multiply will be canonicalized as a shift, cost it as such.  */
-+      if (CONST_INT_P (op1)
-+	  && exact_log2 (INTVAL (op1)) > 0)
-+	{
-+	  if (speed)
-+	    {
-+	      if (maybe_fma)
-+		/* ADD (shifted register).  */
-+		cost += extra_cost->alu.arith_shift;
-+	      else
-+		/* LSL (immediate).  */
-+		cost += extra_cost->alu.shift;
-+	    }
-+
-+	  cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
-+
-+	  return cost;
-+	}
-+
-+      /* Integer multiplies or FMAs have zero/sign extending variants.  */
-+      if ((GET_CODE (op0) == ZERO_EXTEND
-+	   && GET_CODE (op1) == ZERO_EXTEND)
-+	  || (GET_CODE (op0) == SIGN_EXTEND
-+	      && GET_CODE (op1) == SIGN_EXTEND))
-+	{
-+	  cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
-+		  + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
-+
-+	  if (speed)
-+	    {
-+	      if (maybe_fma)
-+		/* MADD/SMADDL/UMADDL.  */
-+		cost += extra_cost->mult[0].extend_add;
-+	      else
-+		/* MUL/SMULL/UMULL.  */
-+		cost += extra_cost->mult[0].extend;
-+	    }
-+
-+	  return cost;
-+	}
-+
-+      /* This is either an integer multiply or an FMA.  In both cases
-+	 we want to recurse and cost the operands.  */
-+      cost += rtx_cost (op0, MULT, 0, speed)
-+	      + rtx_cost (op1, MULT, 1, speed);
-+
-+      if (speed)
-+	{
-+	  if (maybe_fma)
-+	    /* MADD.  */
-+	    cost += extra_cost->mult[mode == DImode].add;
-+	  else
-+	    /* MUL.  */
-+	    cost += extra_cost->mult[mode == DImode].simple;
-+	}
-+
-+      return cost;
-+    }
-+  else
-+    {
-+      if (speed)
-+	{
-+	  /* Floating-point FMA/FMUL can also support negations of the
-+	     operands.  */
-+	  if (GET_CODE (op0) == NEG)
-+	    op0 = XEXP (op0, 0);
-+	  if (GET_CODE (op1) == NEG)
-+	    op1 = XEXP (op1, 0);
-+
-+	  if (maybe_fma)
-+	    /* FMADD/FNMADD/FNMSUB/FMSUB.  */
-+	    cost += extra_cost->fp[mode == DFmode].fma;
-+	  else
-+	    /* FMUL/FNMUL.  */
-+	    cost += extra_cost->fp[mode == DFmode].mult;
-+	}
-+
-+      cost += rtx_cost (op0, MULT, 0, speed)
-+	      + rtx_cost (op1, MULT, 1, speed);
-+      return cost;
-+    }
-+}
-+
-+static int
-+aarch64_address_cost (rtx x,
-+		      enum machine_mode mode,
-+		      addr_space_t as ATTRIBUTE_UNUSED,
-+		      bool speed)
-+{
-+  enum rtx_code c = GET_CODE (x);
-+  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
-+  struct aarch64_address_info info;
-+  int cost = 0;
-+  info.shift = 0;
-+
-+  if (!aarch64_classify_address (&info, x, mode, c, false))
-+    {
-+      if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
-+	{
-+	  /* This is a CONST or SYMBOL ref which will be split
-+	     in a different way depending on the code model in use.
-+	     Cost it through the generic infrastructure.  */
-+	  int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
-+	  /* Divide through by the cost of one instruction to
-+	     bring it to the same units as the address costs.  */
-+	  cost_symbol_ref /= COSTS_N_INSNS (1);
-+	  /* The cost is then the cost of preparing the address,
-+	     followed by an immediate (possibly 0) offset.  */
-+	  return cost_symbol_ref + addr_cost->imm_offset;
-+	}
-+      else
-+	{
-+	  /* This is most likely a jump table from a case
-+	     statement.  */
-+	  return addr_cost->register_offset;
-+	}
-+    }
-+
-+  switch (info.type)
-+    {
-+      case ADDRESS_LO_SUM:
-+      case ADDRESS_SYMBOLIC:
-+      case ADDRESS_REG_IMM:
-+	cost += addr_cost->imm_offset;
-+	break;
-+
-+      case ADDRESS_REG_WB:
-+	if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
-+	  cost += addr_cost->pre_modify;
-+	else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
-+	  cost += addr_cost->post_modify;
-+	else
-+	  gcc_unreachable ();
-+
-+	break;
-+
-+      case ADDRESS_REG_REG:
-+	cost += addr_cost->register_offset;
-+	break;
-+
-+      case ADDRESS_REG_UXTW:
-+      case ADDRESS_REG_SXTW:
-+	cost += addr_cost->register_extend;
-+	break;
-+
-+      default:
-+	gcc_unreachable ();
-+    }
-+
-+
-+  if (info.shift > 0)
-+    {
-+      /* For the sake of calculating the cost of the shifted register
-+	 component, we can treat same sized modes in the same way.  */
-+      switch (GET_MODE_BITSIZE (mode))
-+	{
-+	  case 16:
-+	    cost += addr_cost->addr_scale_costs.hi;
-+	    break;
-+
-+	  case 32:
-+	    cost += addr_cost->addr_scale_costs.si;
-+	    break;
-+
-+	  case 64:
-+	    cost += addr_cost->addr_scale_costs.di;
-+	    break;
-+
-+	  /* We can't tell, or this is a 128-bit vector.  */
-+	  default:
-+	    cost += addr_cost->addr_scale_costs.ti;
-+	    break;
-+	}
-+    }
-+
-+  return cost;
-+}
-+
-+/* Return true if the RTX X in mode MODE is a zero or sign extract
-+   usable in an ADD or SUB (extended register) instruction.  */
-+static bool
-+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
-+{
-+  /* Catch add with a sign extract.
-+     This is add_<optab><mode>_multp2.  */
-+  if (GET_CODE (x) == SIGN_EXTRACT
-+      || GET_CODE (x) == ZERO_EXTRACT)
-+    {
-+      rtx op0 = XEXP (x, 0);
-+      rtx op1 = XEXP (x, 1);
-+      rtx op2 = XEXP (x, 2);
-+
-+      if (GET_CODE (op0) == MULT
-+	  && CONST_INT_P (op1)
-+	  && op2 == const0_rtx
-+	  && CONST_INT_P (XEXP (op0, 1))
-+	  && aarch64_is_extend_from_extract (mode,
-+					     XEXP (op0, 1),
-+					     op1))
-+	{
-+	  return true;
-+	}
-+    }
-+
-+  return false;
-+}
-+
-+static bool
-+aarch64_frint_unspec_p (unsigned int u)
-+{
-+  switch (u)
-+    {
-+      case UNSPEC_FRINTZ:
-+      case UNSPEC_FRINTP:
-+      case UNSPEC_FRINTM:
-+      case UNSPEC_FRINTA:
-+      case UNSPEC_FRINTN:
-+      case UNSPEC_FRINTX:
-+      case UNSPEC_FRINTI:
-+        return true;
-+
-+      default:
-+        return false;
-+    }
-+}
-+
-+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
-+   storing it in *COST.  Result is true if the total cost of the operation
-+   has now been calculated.  */
-+static bool
-+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
-+{
-+  rtx inner;
-+  rtx comparator;
-+  enum rtx_code cmpcode;
-+
-+  if (COMPARISON_P (op0))
-+    {
-+      inner = XEXP (op0, 0);
-+      comparator = XEXP (op0, 1);
-+      cmpcode = GET_CODE (op0);
-+    }
-+  else
-+    {
-+      inner = op0;
-+      comparator = const0_rtx;
-+      cmpcode = NE;
-+    }
-+
-+  if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
-+    {
-+      /* Conditional branch.  */
-+      if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
-+	return true;
-+      else
-+	{
-+	  if (cmpcode == NE || cmpcode == EQ)
-+	    {
-+	      if (comparator == const0_rtx)
-+		{
-+		  /* TBZ/TBNZ/CBZ/CBNZ.  */
-+		  if (GET_CODE (inner) == ZERO_EXTRACT)
-+		    /* TBZ/TBNZ.  */
-+		    *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
-+			 	       0, speed);
-+		else
-+		  /* CBZ/CBNZ.  */
-+		  *cost += rtx_cost (inner, cmpcode, 0, speed);
-+
-+	        return true;
-+	      }
-+	    }
-+	  else if (cmpcode == LT || cmpcode == GE)
-+	    {
-+	      /* TBZ/TBNZ.  */
-+	      if (comparator == const0_rtx)
-+		return true;
-+	    }
-+	}
-+    }
-+  else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
-+    {
-+      /* It's a conditional operation based on the status flags,
-+	 so it must be some flavor of CSEL.  */
-+
-+      /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL.  */
-+      if (GET_CODE (op1) == NEG
-+          || GET_CODE (op1) == NOT
-+          || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
-+	op1 = XEXP (op1, 0);
-+
-+      *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
-+      *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
-+      return true;
-+    }
-+
-+  /* We don't know what this is, cost all operands.  */
-+  return false;
-+}
-+
- /* Calculate the cost of calculating X, storing it in *COST.  Result
-    is true if the total cost of the operation has now been calculated.  */
- static bool
-@@ -4506,13 +5116,31 @@
- aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
- 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
- {
--  rtx op0, op1;
-+  rtx op0, op1, op2;
-   const struct cpu_cost_table *extra_cost
-     = aarch64_tune_params->insn_extra_cost;
-+  enum machine_mode mode = GET_MODE (x);
- 
-+  /* By default, assume that everything has equivalent cost to the
-+     cheapest instruction.  Any additional costs are applied as a delta
-+     above this default.  */
-+  *cost = COSTS_N_INSNS (1);
-+
-+  /* TODO: The cost infrastructure currently does not handle
-+     vector operations.  Assume that all vector operations
-+     are equally expensive.  */
-+  if (VECTOR_MODE_P (mode))
-+    {
-+      if (speed)
-+	*cost += extra_cost->vect.alu;
-+      return true;
-+    }
-+
-   switch (code)
-     {
-     case SET:
-+      /* The cost depends entirely on the operands to SET.  */
-+      *cost = 0;
-       op0 = SET_DEST (x);
-       op1 = SET_SRC (x);
- 
-@@ -4520,52 +5148,194 @@
- 	{
- 	case MEM:
- 	  if (speed)
--	    *cost += extra_cost->ldst.store;
-+	    {
-+	      rtx address = XEXP (op0, 0);
-+	      if (GET_MODE_CLASS (mode) == MODE_INT)
-+		*cost += extra_cost->ldst.store;
-+	      else if (mode == SFmode)
-+		*cost += extra_cost->ldst.storef;
-+	      else if (mode == DFmode)
-+		*cost += extra_cost->ldst.stored;
- 
--	  if (op1 != const0_rtx)
--	    *cost += rtx_cost (op1, SET, 1, speed);
-+	      *cost +=
-+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+						     0, speed));
-+	    }
-+
-+	  *cost += rtx_cost (op1, SET, 1, speed);
- 	  return true;
- 
- 	case SUBREG:
- 	  if (! REG_P (SUBREG_REG (op0)))
- 	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
-+
- 	  /* Fall through.  */
- 	case REG:
--	  /* Cost is just the cost of the RHS of the set.  */
--	  *cost += rtx_cost (op1, SET, 1, true);
-+	  /* const0_rtx is in general free, but we will use an
-+	     instruction to set a register to 0.  */
-+          if (REG_P (op1) || op1 == const0_rtx)
-+            {
-+              /* The cost is 1 per register copied.  */
-+              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
-+			      / UNITS_PER_WORD;
-+              *cost = COSTS_N_INSNS (n_minus_1 + 1);
-+            }
-+          else
-+	    /* Cost is just the cost of the RHS of the set.  */
-+	    *cost += rtx_cost (op1, SET, 1, speed);
- 	  return true;
- 
--	case ZERO_EXTRACT:  /* Bit-field insertion.  */
-+	case ZERO_EXTRACT:
- 	case SIGN_EXTRACT:
--	  /* Strip any redundant widening of the RHS to meet the width of
--	     the target.  */
-+	  /* Bit-field insertion.  Strip any redundant widening of
-+	     the RHS to meet the width of the target.  */
- 	  if (GET_CODE (op1) == SUBREG)
- 	    op1 = SUBREG_REG (op1);
- 	  if ((GET_CODE (op1) == ZERO_EXTEND
- 	       || GET_CODE (op1) == SIGN_EXTEND)
--	      && GET_CODE (XEXP (op0, 1)) == CONST_INT
-+	      && CONST_INT_P (XEXP (op0, 1))
- 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
- 		  >= INTVAL (XEXP (op0, 1))))
- 	    op1 = XEXP (op1, 0);
--	  *cost += rtx_cost (op1, SET, 1, speed);
-+
-+          if (CONST_INT_P (op1))
-+            {
-+              /* MOV immediate is assumed to always be cheap.  */
-+              *cost = COSTS_N_INSNS (1);
-+            }
-+          else
-+            {
-+              /* BFM.  */
-+	      if (speed)
-+		*cost += extra_cost->alu.bfi;
-+              *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
-+            }
-+
- 	  return true;
- 
- 	default:
--	  break;
-+	  /* We can't make sense of this, assume default cost.  */
-+          *cost = COSTS_N_INSNS (1);
-+	  return false;
- 	}
-       return false;
- 
-+    case CONST_INT:
-+      /* If an instruction can incorporate a constant within the
-+	 instruction, the instruction's expression avoids calling
-+	 rtx_cost() on the constant.  If rtx_cost() is called on a
-+	 constant, then it is usually because the constant must be
-+	 moved into a register by one or more instructions.
-+
-+	 The exception is constant 0, which can be expressed
-+	 as XZR/WZR and is therefore free.  The exception to this is
-+	 if we have (set (reg) (const0_rtx)) in which case we must cost
-+	 the move.  However, we can catch that when we cost the SET, so
-+	 we don't need to consider that here.  */
-+      if (x == const0_rtx)
-+	*cost = 0;
-+      else
-+	{
-+	  /* To an approximation, building any other constant is
-+	     proportionally expensive to the number of instructions
-+	     required to build that constant.  This is true whether we
-+	     are compiling for SPEED or otherwise.  */
-+	  *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
-+				 (NULL_RTX, x, false, mode));
-+	}
-+      return true;
-+
-+    case CONST_DOUBLE:
-+      if (speed)
-+	{
-+	  /* mov[df,sf]_aarch64.  */
-+	  if (aarch64_float_const_representable_p (x))
-+	    /* FMOV (scalar immediate).  */
-+	    *cost += extra_cost->fp[mode == DFmode].fpconst;
-+	  else if (!aarch64_float_const_zero_rtx_p (x))
-+	    {
-+	      /* This will be a load from memory.  */
-+	      if (mode == DFmode)
-+		*cost += extra_cost->ldst.loadd;
-+	      else
-+		*cost += extra_cost->ldst.loadf;
-+	    }
-+	  else
-+	    /* Otherwise this is +0.0.  We get this using MOVI d0, #0
-+	       or MOV v0.s[0], wzr - neither of which are modeled by the
-+	       cost tables.  Just use the default cost.  */
-+	    {
-+	    }
-+	}
-+
-+      return true;
-+
-     case MEM:
-       if (speed)
--	*cost += extra_cost->ldst.load;
-+	{
-+	  /* For loads we want the base cost of a load, plus an
-+	     approximation for the additional cost of the addressing
-+	     mode.  */
-+	  rtx address = XEXP (x, 0);
-+	  if (GET_MODE_CLASS (mode) == MODE_INT)
-+	    *cost += extra_cost->ldst.load;
-+	  else if (mode == SFmode)
-+	    *cost += extra_cost->ldst.loadf;
-+	  else if (mode == DFmode)
-+	    *cost += extra_cost->ldst.loadd;
- 
-+	  *cost +=
-+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+						     0, speed));
-+	}
-+
-       return true;
- 
-     case NEG:
--      op0 = CONST0_RTX (GET_MODE (x));
--      op1 = XEXP (x, 0);
--      goto cost_minus;
-+      op0 = XEXP (x, 0);
- 
-+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-+       {
-+          if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
-+              || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
-+            {
-+              /* CSETM.  */
-+              *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
-+              return true;
-+            }
-+
-+	  /* Cost this as SUB wzr, X.  */
-+          op0 = CONST0_RTX (GET_MODE (x));
-+          op1 = XEXP (x, 0);
-+          goto cost_minus;
-+        }
-+
-+      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
-+        {
-+          /* Support (neg(fma...)) as a single instruction only if
-+             sign of zeros is unimportant.  This matches the decision
-+             making in aarch64.md.  */
-+          if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
-+            {
-+	      /* FNMADD.  */
-+              *cost = rtx_cost (op0, NEG, 0, speed);
-+              return true;
-+            }
-+	  if (speed)
-+	    /* FNEG.  */
-+	    *cost += extra_cost->fp[mode == DFmode].neg;
-+          return false;
-+        }
-+
-+      return false;
-+
-+    case CLRSB:
-+    case CLZ:
-+      if (speed)
-+        *cost += extra_cost->alu.clz;
-+
-+      return false;
-+
-     case COMPARE:
-       op0 = XEXP (x, 0);
-       op1 = XEXP (x, 1);
-@@ -4577,96 +5347,228 @@
- 	  goto cost_logic;
- 	}
- 
--      /* Comparisons can work if the order is swapped.
--	 Canonicalization puts the more complex operation first, but
--	 we want it in op1.  */
--      if (! (REG_P (op0)
--	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
--	{
--	  op0 = XEXP (x, 1);
--	  op1 = XEXP (x, 0);
--	}
--      goto cost_minus;
-+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
-+        {
-+          /* TODO: A write to the CC flags possibly costs extra, this
-+	     needs encoding in the cost tables.  */
- 
-+          /* CC_ZESWPmode supports zero extend for free.  */
-+          if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
-+            op0 = XEXP (op0, 0);
-+
-+          /* ANDS.  */
-+          if (GET_CODE (op0) == AND)
-+            {
-+              x = op0;
-+              goto cost_logic;
-+            }
-+
-+          if (GET_CODE (op0) == PLUS)
-+            {
-+	      /* ADDS (and CMN alias).  */
-+              x = op0;
-+              goto cost_plus;
-+            }
-+
-+          if (GET_CODE (op0) == MINUS)
-+            {
-+	      /* SUBS.  */
-+              x = op0;
-+              goto cost_minus;
-+            }
-+
-+          if (GET_CODE (op1) == NEG)
-+            {
-+	      /* CMN.  */
-+	      if (speed)
-+		*cost += extra_cost->alu.arith;
-+
-+              *cost += rtx_cost (op0, COMPARE, 0, speed);
-+	      *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
-+              return true;
-+            }
-+
-+          /* CMP.
-+
-+	     Compare can freely swap the order of operands, and
-+             canonicalization puts the more complex operation first.
-+             But the integer MINUS logic expects the shift/extend
-+             operation in op1.  */
-+          if (! (REG_P (op0)
-+                 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
-+          {
-+            op0 = XEXP (x, 1);
-+            op1 = XEXP (x, 0);
-+          }
-+          goto cost_minus;
-+        }
-+
-+      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
-+        {
-+	  /* FCMP.  */
-+	  if (speed)
-+	    *cost += extra_cost->fp[mode == DFmode].compare;
-+
-+          if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
-+            {
-+              /* FCMP supports constant 0.0 for no extra cost. */
-+              return true;
-+            }
-+          return false;
-+        }
-+
-+      return false;
-+
-     case MINUS:
--      op0 = XEXP (x, 0);
--      op1 = XEXP (x, 1);
-+      {
-+	op0 = XEXP (x, 0);
-+	op1 = XEXP (x, 1);
- 
--    cost_minus:
--      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
--	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
--	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
--	{
--	  if (op0 != const0_rtx)
-+cost_minus:
-+	/* Detect valid immediates.  */
-+	if ((GET_MODE_CLASS (mode) == MODE_INT
-+	     || (GET_MODE_CLASS (mode) == MODE_CC
-+		 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
-+	    && CONST_INT_P (op1)
-+	    && aarch64_uimm12_shift (INTVAL (op1)))
-+	  {
- 	    *cost += rtx_cost (op0, MINUS, 0, speed);
- 
--	  if (CONST_INT_P (op1))
--	    {
--	      if (!aarch64_uimm12_shift (INTVAL (op1)))
--		*cost += rtx_cost (op1, MINUS, 1, speed);
--	    }
--	  else
--	    {
--	      op1 = aarch64_strip_shift_or_extend (op1);
--	      *cost += rtx_cost (op1, MINUS, 1, speed);
--	    }
--	  return true;
--	}
-+	    if (speed)
-+	      /* SUB(S) (immediate).  */
-+	      *cost += extra_cost->alu.arith;
-+	    return true;
- 
--      return false;
-+	  }
- 
-+	/* Look for SUB (extended register).  */
-+        if (aarch64_rtx_arith_op_extract_p (op1, mode))
-+	  {
-+	    if (speed)
-+	      *cost += extra_cost->alu.arith_shift;
-+
-+	    *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
-+			       (enum rtx_code) GET_CODE (op1),
-+			       0, speed);
-+	    return true;
-+	  }
-+
-+	rtx new_op1 = aarch64_strip_extend (op1);
-+
-+	/* Cost this as an FMA-alike operation.  */
-+	if ((GET_CODE (new_op1) == MULT
-+	     || GET_CODE (new_op1) == ASHIFT)
-+	    && code != COMPARE)
-+	  {
-+	    *cost += aarch64_rtx_mult_cost (new_op1, MULT,
-+					    (enum rtx_code) code,
-+					    speed);
-+	    *cost += rtx_cost (op0, MINUS, 0, speed);
-+	    return true;
-+	  }
-+
-+	*cost += rtx_cost (new_op1, MINUS, 1, speed);
-+
-+	if (speed)
-+	  {
-+	    if (GET_MODE_CLASS (mode) == MODE_INT)
-+	      /* SUB(S).  */
-+	      *cost += extra_cost->alu.arith;
-+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+	      /* FSUB.  */
-+	      *cost += extra_cost->fp[mode == DFmode].addsub;
-+	  }
-+	return true;
-+      }
-+
-     case PLUS:
--      op0 = XEXP (x, 0);
--      op1 = XEXP (x, 1);
-+      {
-+	rtx new_op0;
- 
--      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
--	{
--	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
--	    {
--	      *cost += rtx_cost (op0, PLUS, 0, speed);
--	    }
--	  else
--	    {
--	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
-+	op0 = XEXP (x, 0);
-+	op1 = XEXP (x, 1);
- 
--	      if (new_op0 == op0
--		  && GET_CODE (op0) == MULT)
--		{
--		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
--		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
--		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
--			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
--		    {
--		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
--					  speed)
--				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
--					    speed)
--				+ rtx_cost (op1, PLUS, 1, speed));
--		      if (speed)
--			*cost +=
--			  extra_cost->mult[GET_MODE (x) == DImode].extend_add;
--		      return true;
--		    }
-+cost_plus:
-+	if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
-+	    || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
-+	  {
-+	    /* CSINC.  */
-+	    *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
-+	    *cost += rtx_cost (op1, PLUS, 1, speed);
-+	    return true;
-+	  }
- 
--		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
--			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
--			    + rtx_cost (op1, PLUS, 1, speed));
-+	if (GET_MODE_CLASS (mode) == MODE_INT
-+	    && CONST_INT_P (op1)
-+	    && aarch64_uimm12_shift (INTVAL (op1)))
-+	  {
-+	    *cost += rtx_cost (op0, PLUS, 0, speed);
- 
--		  if (speed)
--		    *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
-+	    if (speed)
-+	      /* ADD (immediate).  */
-+	      *cost += extra_cost->alu.arith;
-+	    return true;
-+	  }
- 
--		  return true;
--		}
-+	/* Look for ADD (extended register).  */
-+        if (aarch64_rtx_arith_op_extract_p (op0, mode))
-+	  {
-+	    if (speed)
-+	      *cost += extra_cost->alu.arith_shift;
- 
--	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
--			+ rtx_cost (op1, PLUS, 1, speed));
--	    }
--	  return true;
--	}
-+	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
-+			       (enum rtx_code) GET_CODE (op0),
-+			       0, speed);
-+	    return true;
-+	  }
- 
-+	/* Strip any extend, leave shifts behind as we will
-+	   cost them through mult_cost.  */
-+	new_op0 = aarch64_strip_extend (op0);
-+
-+	if (GET_CODE (new_op0) == MULT
-+	    || GET_CODE (new_op0) == ASHIFT)
-+	  {
-+	    *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
-+					    speed);
-+	    *cost += rtx_cost (op1, PLUS, 1, speed);
-+	    return true;
-+	  }
-+
-+	*cost += (rtx_cost (new_op0, PLUS, 0, speed)
-+		  + rtx_cost (op1, PLUS, 1, speed));
-+
-+	if (speed)
-+	  {
-+	    if (GET_MODE_CLASS (mode) == MODE_INT)
-+	      /* ADD.  */
-+	      *cost += extra_cost->alu.arith;
-+	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+	      /* FADD.  */
-+	      *cost += extra_cost->fp[mode == DFmode].addsub;
-+	  }
-+	return true;
-+      }
-+
-+    case BSWAP:
-+      *cost = COSTS_N_INSNS (1);
-+
-+      if (speed)
-+        *cost += extra_cost->alu.rev;
-+
-       return false;
- 
-     case IOR:
-+      if (aarch_rev16_p (x))
-+        {
-+          *cost = COSTS_N_INSNS (1);
-+
-+          if (speed)
-+            *cost += extra_cost->alu.rev;
-+
-+          return true;
-+        }
-+    /* Fall through.  */
-     case XOR:
-     case AND:
-     cost_logic:
-@@ -4673,117 +5575,252 @@
-       op0 = XEXP (x, 0);
-       op1 = XEXP (x, 1);
- 
-+      if (code == AND
-+          && GET_CODE (op0) == MULT
-+          && CONST_INT_P (XEXP (op0, 1))
-+          && CONST_INT_P (op1)
-+          && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
-+                               INTVAL (op1)) != 0)
-+        {
-+          /* This is a UBFM/SBFM.  */
-+          *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
-+	  if (speed)
-+	    *cost += extra_cost->alu.bfx;
-+          return true;
-+        }
-+
-       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- 	{
-+	  /* We possibly get the immediate for free, this is not
-+	     modelled.  */
- 	  if (CONST_INT_P (op1)
- 	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
- 	    {
--	      *cost += rtx_cost (op0, AND, 0, speed);
-+	      *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
-+
-+	      if (speed)
-+		*cost += extra_cost->alu.logical;
-+
-+	      return true;
- 	    }
- 	  else
- 	    {
-+	      rtx new_op0 = op0;
-+
-+	      /* Handle ORN, EON, or BIC.  */
- 	      if (GET_CODE (op0) == NOT)
- 		op0 = XEXP (op0, 0);
--	      op0 = aarch64_strip_shift (op0);
--	      *cost += (rtx_cost (op0, AND, 0, speed)
--			+ rtx_cost (op1, AND, 1, speed));
-+
-+	      new_op0 = aarch64_strip_shift (op0);
-+
-+	      /* If we had a shift on op0 then this is a logical-shift-
-+		 by-register/immediate operation.  Otherwise, this is just
-+		 a logical operation.  */
-+	      if (speed)
-+		{
-+		  if (new_op0 != op0)
-+		    {
-+		      /* Shift by immediate.  */
-+		      if (CONST_INT_P (XEXP (op0, 1)))
-+			*cost += extra_cost->alu.log_shift;
-+		      else
-+			*cost += extra_cost->alu.log_shift_reg;
-+		    }
-+		  else
-+		    *cost += extra_cost->alu.logical;
-+		}
-+
-+	      /* In both cases we want to cost both operands.  */
-+	      *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
-+		       + rtx_cost (op1, (enum rtx_code) code, 1, speed);
-+
-+	      return true;
- 	    }
--	  return true;
- 	}
-       return false;
- 
-+    case NOT:
-+      /* MVN.  */
-+      if (speed)
-+	*cost += extra_cost->alu.logical;
-+
-+      /* The logical instruction could have the shifted register form,
-+         but the cost is the same if the shift is processed as a separate
-+         instruction, so we don't bother with it here.  */
-+      return false;
-+
-     case ZERO_EXTEND:
--      if ((GET_MODE (x) == DImode
--	   && GET_MODE (XEXP (x, 0)) == SImode)
--	  || GET_CODE (XEXP (x, 0)) == MEM)
-+
-+      op0 = XEXP (x, 0);
-+      /* If a value is written in SI mode, then zero extended to DI
-+	 mode, the operation will in general be free as a write to
-+	 a 'w' register implicitly zeroes the upper bits of an 'x'
-+	 register.  However, if this is
-+
-+	   (set (reg) (zero_extend (reg)))
-+
-+	 we must cost the explicit register move.  */
-+      if (mode == DImode
-+	  && GET_MODE (op0) == SImode
-+	  && outer == SET)
- 	{
--	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
-+	  int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
-+
-+	  if (!op_cost && speed)
-+	    /* MOV.  */
-+	    *cost += extra_cost->alu.extend;
-+	  else
-+	    /* Free, the cost is that of the SI mode operation.  */
-+	    *cost = op_cost;
-+
- 	  return true;
- 	}
-+      else if (MEM_P (XEXP (x, 0)))
-+	{
-+	  /* All loads can zero extend to any size for free.  */
-+	  *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
-+	  return true;
-+	}
-+
-+      /* UXTB/UXTH.  */
-+      if (speed)
-+	*cost += extra_cost->alu.extend;
-+
-       return false;
- 
-     case SIGN_EXTEND:
--      if (GET_CODE (XEXP (x, 0)) == MEM)
-+      if (MEM_P (XEXP (x, 0)))
- 	{
--	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
-+	  /* LDRSH.  */
-+	  if (speed)
-+	    {
-+	      rtx address = XEXP (XEXP (x, 0), 0);
-+	      *cost += extra_cost->ldst.load_sign_extend;
-+
-+	      *cost +=
-+		COSTS_N_INSNS (aarch64_address_cost (address, mode,
-+						     0, speed));
-+	    }
- 	  return true;
- 	}
-+
-+      if (speed)
-+	*cost += extra_cost->alu.extend;
-       return false;
- 
-+    case ASHIFT:
-+      op0 = XEXP (x, 0);
-+      op1 = XEXP (x, 1);
-+
-+      if (CONST_INT_P (op1))
-+        {
-+	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
-+	     aliases.  */
-+	  if (speed)
-+	    *cost += extra_cost->alu.shift;
-+
-+          /* We can incorporate zero/sign extend for free.  */
-+          if (GET_CODE (op0) == ZERO_EXTEND
-+              || GET_CODE (op0) == SIGN_EXTEND)
-+            op0 = XEXP (op0, 0);
-+
-+          *cost += rtx_cost (op0, ASHIFT, 0, speed);
-+          return true;
-+        }
-+      else
-+        {
-+	  /* LSLV.  */
-+	  if (speed)
-+	    *cost += extra_cost->alu.shift_reg;
-+
-+	  return false;  /* All arguments need to be in registers.  */
-+        }
-+
-     case ROTATE:
--      if (!CONST_INT_P (XEXP (x, 1)))
--	*cost += COSTS_N_INSNS (2);
--      /* Fall through.  */
-     case ROTATERT:
-     case LSHIFTRT:
--    case ASHIFT:
-     case ASHIFTRT:
-+      op0 = XEXP (x, 0);
-+      op1 = XEXP (x, 1);
- 
--      /* Shifting by a register often takes an extra cycle.  */
--      if (speed && !CONST_INT_P (XEXP (x, 1)))
--	*cost += extra_cost->alu.arith_shift_reg;
-+      if (CONST_INT_P (op1))
-+	{
-+	  /* ASR (immediate) and friends.  */
-+	  if (speed)
-+	    *cost += extra_cost->alu.shift;
- 
--      *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
-+	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
-+	  return true;
-+	}
-+      else
-+	{
-+
-+	  /* ASR (register) and friends.  */
-+	  if (speed)
-+	    *cost += extra_cost->alu.shift_reg;
-+
-+	  return false;  /* All arguments need to be in registers.  */
-+	}
-+
-+    case SYMBOL_REF:
-+
-+      if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
-+	{
-+	  /* LDR.  */
-+	  if (speed)
-+	    *cost += extra_cost->ldst.load;
-+	}
-+      else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-+	       || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-+	{
-+	  /* ADRP, followed by ADD.  */
-+	  *cost += COSTS_N_INSNS (1);
-+	  if (speed)
-+	    *cost += 2 * extra_cost->alu.arith;
-+	}
-+      else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-+	       || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-+	{
-+	  /* ADR.  */
-+	  if (speed)
-+	    *cost += extra_cost->alu.arith;
-+	}
-+
-+      if (flag_pic)
-+	{
-+	  /* One extra load instruction, after accessing the GOT.  */
-+	  *cost += COSTS_N_INSNS (1);
-+	  if (speed)
-+	    *cost += extra_cost->ldst.load;
-+	}
-       return true;
- 
-     case HIGH:
--      if (!CONSTANT_P (XEXP (x, 0)))
--	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
--      return true;
--
-     case LO_SUM:
--      if (!CONSTANT_P (XEXP (x, 1)))
--	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
--      *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
-+      /* ADRP/ADD (immediate).  */
-+      if (speed)
-+	*cost += extra_cost->alu.arith;
-       return true;
- 
-     case ZERO_EXTRACT:
-     case SIGN_EXTRACT:
--      *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
-+      /* UBFX/SBFX.  */
-+      if (speed)
-+	*cost += extra_cost->alu.bfx;
-+
-+      /* We can trust that the immediates used will be correct (there
-+	 are no by-register forms), so we need only cost op0.  */
-+      *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
-       return true;
- 
-     case MULT:
--      op0 = XEXP (x, 0);
--      op1 = XEXP (x, 1);
-+      *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
-+      /* aarch64_rtx_mult_cost always handles recursion to its
-+	 operands.  */
-+      return true;
- 
--      *cost = COSTS_N_INSNS (1);
--      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
--	{
--	  if (CONST_INT_P (op1)
--	      && exact_log2 (INTVAL (op1)) > 0)
--	    {
--	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
--	      return true;
--	    }
--
--	  if ((GET_CODE (op0) == ZERO_EXTEND
--	       && GET_CODE (op1) == ZERO_EXTEND)
--	      || (GET_CODE (op0) == SIGN_EXTEND
--		  && GET_CODE (op1) == SIGN_EXTEND))
--	    {
--	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
--			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
--	      if (speed)
--		*cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
--	      return true;
--	    }
--
--	  if (speed)
--	    *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
--	}
--      else if (speed)
--	{
--	  if (GET_MODE (x) == DFmode)
--	    *cost += extra_cost->fp[1].mult;
--	  else if (GET_MODE (x) == SFmode)
--	    *cost += extra_cost->fp[0].mult;
--	}
--
--      return false;  /* All arguments need to be in registers.  */
--
-     case MOD:
-     case UMOD:
--      *cost = COSTS_N_INSNS (2);
-       if (speed)
- 	{
- 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
-@@ -4800,53 +5837,222 @@
- 
-     case DIV:
-     case UDIV:
--      *cost = COSTS_N_INSNS (1);
-+    case SQRT:
-       if (speed)
- 	{
--	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
--	    *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
--	  else if (GET_MODE (x) == DFmode)
--	    *cost += extra_cost->fp[1].div;
--	  else if (GET_MODE (x) == SFmode)
--	    *cost += extra_cost->fp[0].div;
-+	  if (GET_MODE_CLASS (mode) == MODE_INT)
-+	    /* There is no integer SQRT, so only DIV and UDIV can get
-+	       here.  */
-+	    *cost += extra_cost->mult[mode == DImode].idiv;
-+	  else
-+	    *cost += extra_cost->fp[mode == DFmode].div;
- 	}
-       return false;  /* All arguments need to be in registers.  */
- 
-+    case IF_THEN_ELSE:
-+      return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
-+					 XEXP (x, 2), cost, speed);
-+
-+    case EQ:
-+    case NE:
-+    case GT:
-+    case GTU:
-+    case LT:
-+    case LTU:
-+    case GE:
-+    case GEU:
-+    case LE:
-+    case LEU:
-+
-+      return false; /* All arguments must be in registers.  */
-+
-+    case FMA:
-+      op0 = XEXP (x, 0);
-+      op1 = XEXP (x, 1);
-+      op2 = XEXP (x, 2);
-+
-+      if (speed)
-+	*cost += extra_cost->fp[mode == DFmode].fma;
-+
-+      /* FMSUB, FNMADD, and FNMSUB are free.  */
-+      if (GET_CODE (op0) == NEG)
-+        op0 = XEXP (op0, 0);
-+
-+      if (GET_CODE (op2) == NEG)
-+        op2 = XEXP (op2, 0);
-+
-+      /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
-+	 and the by-element operand as operand 0.  */
-+      if (GET_CODE (op1) == NEG)
-+        op1 = XEXP (op1, 0);
-+
-+      /* Catch vector-by-element operations.  The by-element operand can
-+	 either be (vec_duplicate (vec_select (x))) or just
-+	 (vec_select (x)), depending on whether we are multiplying by
-+	 a vector or a scalar.
-+
-+	 Canonicalization is not very good in these cases, FMA4 will put the
-+	 by-element operand as operand 0, FNMA4 will have it as operand 1.  */
-+      if (GET_CODE (op0) == VEC_DUPLICATE)
-+	op0 = XEXP (op0, 0);
-+      else if (GET_CODE (op1) == VEC_DUPLICATE)
-+	op1 = XEXP (op1, 0);
-+
-+      if (GET_CODE (op0) == VEC_SELECT)
-+	op0 = XEXP (op0, 0);
-+      else if (GET_CODE (op1) == VEC_SELECT)
-+	op1 = XEXP (op1, 0);
-+
-+      /* If the remaining parameters are not registers,
-+         get the cost to put them into registers.  */
-+      *cost += rtx_cost (op0, FMA, 0, speed);
-+      *cost += rtx_cost (op1, FMA, 1, speed);
-+      *cost += rtx_cost (op2, FMA, 2, speed);
-+      return true;
-+
-+    case FLOAT_EXTEND:
-+      if (speed)
-+	*cost += extra_cost->fp[mode == DFmode].widen;
-+      return false;
-+
-+    case FLOAT_TRUNCATE:
-+      if (speed)
-+	*cost += extra_cost->fp[mode == DFmode].narrow;
-+      return false;
-+
-+    case FIX:
-+    case UNSIGNED_FIX:
-+      x = XEXP (x, 0);
-+      /* Strip the rounding part.  They will all be implemented
-+         by the fcvt* family of instructions anyway.  */
-+      if (GET_CODE (x) == UNSPEC)
-+        {
-+          unsigned int uns_code = XINT (x, 1);
-+
-+          if (uns_code == UNSPEC_FRINTA
-+              || uns_code == UNSPEC_FRINTM
-+              || uns_code == UNSPEC_FRINTN
-+              || uns_code == UNSPEC_FRINTP
-+              || uns_code == UNSPEC_FRINTZ)
-+            x = XVECEXP (x, 0, 0);
-+        }
-+
-+      if (speed)
-+        *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-+
-+      *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
-+      return true;
-+
-+    case ABS:
-+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-+	{
-+	  /* FABS and FNEG are analogous.  */
-+	  if (speed)
-+	    *cost += extra_cost->fp[mode == DFmode].neg;
-+	}
-+      else
-+	{
-+	  /* Integer ABS will either be split to
-+	     two arithmetic instructions, or will be an ABS
-+	     (scalar), which we don't model.  */
-+	  *cost = COSTS_N_INSNS (2);
-+	  if (speed)
-+	    *cost += 2 * extra_cost->alu.arith;
-+	}
-+      return false;
-+
-+    case SMAX:
-+    case SMIN:
-+      if (speed)
-+	{
-+	  /* FMAXNM/FMINNM/FMAX/FMIN.
-+	     TODO: This may not be accurate for all implementations, but
-+	     we do not model this in the cost tables.  */
-+	  *cost += extra_cost->fp[mode == DFmode].addsub;
-+	}
-+      return false;
-+
-+    case UNSPEC:
-+      /* The floating point round to integer frint* instructions.  */
-+      if (aarch64_frint_unspec_p (XINT (x, 1)))
-+        {
-+          if (speed)
-+            *cost += extra_cost->fp[mode == DFmode].roundint;
-+
-+          return false;
-+        }
-+
-+      if (XINT (x, 1) == UNSPEC_RBIT)
-+        {
-+          if (speed)
-+            *cost += extra_cost->alu.rev;
-+
-+          return false;
-+        }
-+      break;
-+
-+    case TRUNCATE:
-+
-+      /* Decompose <su>muldi3_highpart.  */
-+      if (/* (truncate:DI  */
-+	  mode == DImode
-+	  /*   (lshiftrt:TI  */
-+          && GET_MODE (XEXP (x, 0)) == TImode
-+          && GET_CODE (XEXP (x, 0)) == LSHIFTRT
-+	  /*      (mult:TI  */
-+          && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
-+	  /*        (ANY_EXTEND:TI (reg:DI))
-+	            (ANY_EXTEND:TI (reg:DI)))  */
-+          && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
-+               && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
-+              || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
-+                  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
-+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
-+          && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
-+	  /*     (const_int 64)  */
-+          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
-+          && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
-+        {
-+          /* UMULH/SMULH.  */
-+	  if (speed)
-+	    *cost += extra_cost->mult[mode == DImode].extend;
-+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
-+			     MULT, 0, speed);
-+          *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
-+			     MULT, 1, speed);
-+          return true;
-+        }
-+
-+      /* Fall through.  */
-     default:
-       break;
-     }
--  return false;
-+
-+  if (dump_file && (dump_flags & TDF_DETAILS))
-+    fprintf (dump_file,
-+      "\nFailed to cost RTX.  Assuming default cost.\n");
-+
-+  return true;
- }
- 
--static int
--aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
--		  enum machine_mode mode ATTRIBUTE_UNUSED,
--		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
-+/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
-+   calculated for X.  This cost is stored in *COST.  Returns true
-+   if the total cost of X was calculated.  */
-+static bool
-+aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
-+		   int param, int *cost, bool speed)
- {
--  enum rtx_code c  = GET_CODE (x);
--  const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
-+  bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
- 
--  if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
--    return addr_cost->pre_modify;
--
--  if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
--    return addr_cost->post_modify;
--
--  if (c == PLUS)
-+  if (dump_file && (dump_flags & TDF_DETAILS))
-     {
--      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
--	return addr_cost->imm_offset;
--      else if (GET_CODE (XEXP (x, 0)) == MULT
--	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
--	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
--	return addr_cost->register_extend;
--
--      return addr_cost->register_offset;
-+      print_rtl_single (dump_file, x);
-+      fprintf (dump_file, "\n%s cost: %d (%s)\n",
-+	       speed ? "Hot" : "Cold",
-+	       *cost, result ? "final" : "partial");
-     }
--  else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
--    return addr_cost->imm_offset;
- 
--  return 0;
-+  return result;
- }
- 
- static int
-@@ -4858,6 +6064,13 @@
-   const struct cpu_regmove_cost *regmove_cost
-     = aarch64_tune_params->regmove_cost;
- 
-+  /* Caller save and pointer regs are equivalent to GENERAL_REGS.  */
-+  if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
-+    to = GENERAL_REGS;
-+
-+  if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
-+    from = GENERAL_REGS;
-+
-   /* Moving between GPR and stack cost is the same as GP2GP.  */
-   if ((from == GENERAL_REGS && to == STACK_REG)
-       || (to == GENERAL_REGS && from == STACK_REG))
-@@ -4880,7 +6093,7 @@
-      secondary reload.  A general register is used as a scratch to move
-      the upper DI value and the lower DI value is moved directly,
-      hence the cost is the sum of three moves. */
--  if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
-+  if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 16)
-     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
- 
-   return regmove_cost->FP2FP;
-@@ -5253,6 +6466,7 @@
-   aarch64_tune_flags = selected_tune->flags;
-   aarch64_tune = selected_tune->core;
-   aarch64_tune_params = selected_tune->tune;
-+  aarch64_architecture_version = selected_cpu->architecture_version;
- 
-   if (aarch64_fix_a53_err835769 == 2)
-     {
-@@ -5998,7 +7212,7 @@
- 
-   /* We don't save the size into *PRETEND_SIZE because we want to avoid
-      any complication of having crtl->args.pretend_args_size changed.  */
--  cfun->machine->saved_varargs_size
-+  cfun->machine->frame.saved_varargs_size
-     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
- 		      STACK_BOUNDARY / BITS_PER_UNIT)
-        + vr_saved * UNITS_PER_VREG);
-@@ -6685,7 +7899,7 @@
-       unsigned HOST_WIDE_INT elpart;
-       unsigned int part, parts;
- 
--      if (GET_CODE (el) == CONST_INT)
-+      if (CONST_INT_P (el))
-         {
-           elpart = INTVAL (el);
-           parts = 1;
-@@ -6816,30 +8030,6 @@
- #undef CHECK
- }
- 
--static bool
--aarch64_const_vec_all_same_int_p (rtx x,
--				  HOST_WIDE_INT minval,
--				  HOST_WIDE_INT maxval)
--{
--  HOST_WIDE_INT firstval;
--  int count, i;
--
--  if (GET_CODE (x) != CONST_VECTOR
--      || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
--    return false;
--
--  firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
--  if (firstval < minval || firstval > maxval)
--    return false;
--
--  count = CONST_VECTOR_NUNITS (x);
--  for (i = 1; i < count; i++)
--    if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
--      return false;
--
--  return true;
--}
--
- /* Check of immediate shift constants are within range.  */
- bool
- aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
-@@ -6846,9 +8036,9 @@
- {
-   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
-   if (left)
--    return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
-+    return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
-   else
--    return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
-+    return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
- }
- 
- /* Return true if X is a uniform vector where all elements
-@@ -6886,7 +8076,7 @@
-       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
-     return true;
- 
--  if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
-+  if (CONST_INT_P (x))
-     return true;
- 
-   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
-@@ -6923,17 +8113,43 @@
-   return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
- }
- 
--/* Construct and return a PARALLEL RTX vector.  */
-+/* Construct and return a PARALLEL RTX vector with elements numbering the
-+   lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
-+   the vector - from the perspective of the architecture.  This does not
-+   line up with GCC's perspective on lane numbers, so we end up with
-+   different masks depending on our target endian-ness.  The diagram
-+   below may help.  We must draw the distinction when building masks
-+   which select one half of the vector.  An instruction selecting
-+   architectural low-lanes for a big-endian target, must be described using
-+   a mask selecting GCC high-lanes.
-+
-+                 Big-Endian             Little-Endian
-+
-+GCC             0   1   2   3           3   2   1   0
-+              | x | x | x | x |       | x | x | x | x |
-+Architecture    3   2   1   0           3   2   1   0
-+
-+Low Mask:         { 2, 3 }                { 0, 1 }
-+High Mask:        { 0, 1 }                { 2, 3 }
-+*/
-+
- rtx
- aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
- {
-   int nunits = GET_MODE_NUNITS (mode);
-   rtvec v = rtvec_alloc (nunits / 2);
--  int base = high ? nunits / 2 : 0;
-+  int high_base = nunits / 2;
-+  int low_base = 0;
-+  int base;
-   rtx t1;
-   int i;
- 
--  for (i=0; i < nunits / 2; i++)
-+  if (BYTES_BIG_ENDIAN)
-+    base = high ? low_base : high_base;
-+  else
-+    base = high ? high_base : low_base;
-+
-+  for (i = 0; i < nunits / 2; i++)
-     RTVEC_ELT (v, i) = GEN_INT (base + i);
- 
-   t1 = gen_rtx_PARALLEL (mode, v);
-@@ -6940,6 +8156,38 @@
-   return t1;
- }
- 
-+/* Check OP for validity as a PARALLEL RTX vector with elements
-+   numbering the lanes of either the high (HIGH == TRUE) or low lanes,
-+   from the perspective of the architecture.  See the diagram above
-+   aarch64_simd_vect_par_cnst_half for more details.  */
-+
-+bool
-+aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
-+				       bool high)
-+{
-+  rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
-+  HOST_WIDE_INT count_op = XVECLEN (op, 0);
-+  HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
-+  int i = 0;
-+
-+  if (!VECTOR_MODE_P (mode))
-+    return false;
-+
-+  if (count_op != count_ideal)
-+    return false;
-+
-+  for (i = 0; i < count_ideal; i++)
-+    {
-+      rtx elt_op = XVECEXP (op, 0, i);
-+      rtx elt_ideal = XVECEXP (ideal, 0, i);
-+
-+      if (!CONST_INT_P (elt_op)
-+	  || INTVAL (elt_ideal) != INTVAL (elt_op))
-+	return false;
-+    }
-+  return true;
-+}
-+
- /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
-    HIGH (exclusive).  */
- void
-@@ -6946,7 +8194,7 @@
- aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
- {
-   HOST_WIDE_INT lane;
--  gcc_assert (GET_CODE (operand) == CONST_INT);
-+  gcc_assert (CONST_INT_P (operand));
-   lane = INTVAL (operand);
- 
-   if (lane < low || lane >= high)
-@@ -6956,7 +8204,7 @@
- void
- aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
- {
--  gcc_assert (GET_CODE (operand) == CONST_INT);
-+  gcc_assert (CONST_INT_P (operand));
-   HOST_WIDE_INT lane = INTVAL (operand);
- 
-   if (lane < low || lane >= high)
-@@ -6994,7 +8242,7 @@
- aarch64_simd_mem_operand_p (rtx op)
- {
-   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
--			|| GET_CODE (XEXP (op, 0)) == REG);
-+			|| REG_P (XEXP (op, 0)));
- }
- 
- /* Set up OPERANDS for a register copy from SRC to DEST, taking care
-@@ -7647,6 +8895,9 @@
-   if (!CONST_DOUBLE_P (x))
-     return false;
- 
-+  if (GET_MODE (x) == VOIDmode)
-+    return false;
-+
-   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
- 
-   /* We cannot represent infinities, NaNs or +/-zero.  We won't
-@@ -7899,20 +9150,26 @@
- aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
- {
-   enum machine_mode vmode = GET_MODE (target);
--  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
-+  unsigned int nelt = GET_MODE_NUNITS (vmode);
-   bool one_vector_p = rtx_equal_p (op0, op1);
--  rtx rmask[MAX_VECT_LEN], mask;
-+  rtx mask;
- 
--  gcc_checking_assert (!BYTES_BIG_ENDIAN);
--
-   /* The TBL instruction does not use a modulo index, so we must take care
-      of that ourselves.  */
--  mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
--  for (i = 0; i < nelt; ++i)
--    rmask[i] = mask;
--  mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
-+  mask = aarch64_simd_gen_const_vector_dup (vmode,
-+      one_vector_p ? nelt - 1 : 2 * nelt - 1);
-   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
- 
-+  /* For big-endian, we also need to reverse the index within the vector
-+     (but not which vector).  */
-+  if (BYTES_BIG_ENDIAN)
-+    {
-+      /* If one_vector_p, mask is a vector of (nelt - 1)'s already.  */
-+      if (!one_vector_p)
-+        mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
-+      sel = expand_simple_binop (vmode, XOR, sel, mask,
-+				 NULL, 0, OPTAB_LIB_WIDEN);
-+    }
-   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
- }
- 
-@@ -8171,7 +9428,145 @@
-   return true;
- }
- 
-+/* Recognize patterns for the EXT insn.  */
-+
- static bool
-+aarch64_evpc_ext (struct expand_vec_perm_d *d)
-+{
-+  unsigned int i, nelt = d->nelt;
-+  rtx (*gen) (rtx, rtx, rtx, rtx);
-+  rtx offset;
-+
-+  unsigned int location = d->perm[0]; /* Always < nelt.  */
-+
-+  /* Check if the extracted indices are increasing by one.  */
-+  for (i = 1; i < nelt; i++)
-+    {
-+      unsigned int required = location + i;
-+      if (d->one_vector_p)
-+        {
-+          /* We'll pass the same vector in twice, so allow indices to wrap.  */
-+	  required &= (nelt - 1);
-+	}
-+      if (d->perm[i] != required)
-+        return false;
-+    }
-+
-+  switch (d->vmode)
-+    {
-+    case V16QImode: gen = gen_aarch64_extv16qi; break;
-+    case V8QImode: gen = gen_aarch64_extv8qi; break;
-+    case V4HImode: gen = gen_aarch64_extv4hi; break;
-+    case V8HImode: gen = gen_aarch64_extv8hi; break;
-+    case V2SImode: gen = gen_aarch64_extv2si; break;
-+    case V4SImode: gen = gen_aarch64_extv4si; break;
-+    case V2SFmode: gen = gen_aarch64_extv2sf; break;
-+    case V4SFmode: gen = gen_aarch64_extv4sf; break;
-+    case V2DImode: gen = gen_aarch64_extv2di; break;
-+    case V2DFmode: gen = gen_aarch64_extv2df; break;
-+    default:
-+      return false;
-+    }
-+
-+  /* Success! */
-+  if (d->testing_p)
-+    return true;
-+
-+  /* The case where (location == 0) is a no-op for both big- and little-endian,
-+     and is removed by the mid-end at optimization levels -O1 and higher.  */
-+
-+  if (BYTES_BIG_ENDIAN && (location != 0))
-+    {
-+      /* After setup, we want the high elements of the first vector (stored
-+         at the LSB end of the register), and the low elements of the second
-+         vector (stored at the MSB end of the register). So swap.  */
-+      rtx temp = d->op0;
-+      d->op0 = d->op1;
-+      d->op1 = temp;
-+      /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
-+      location = nelt - location;
-+    }
-+
-+  offset = GEN_INT (location);
-+  emit_insn (gen (d->target, d->op0, d->op1, offset));
-+  return true;
-+}
-+
-+/* Recognize patterns for the REV insns.  */
-+
-+static bool
-+aarch64_evpc_rev (struct expand_vec_perm_d *d)
-+{
-+  unsigned int i, j, diff, nelt = d->nelt;
-+  rtx (*gen) (rtx, rtx);
-+
-+  if (!d->one_vector_p)
-+    return false;
-+
-+  diff = d->perm[0];
-+  switch (diff)
-+    {
-+    case 7:
-+      switch (d->vmode)
-+	{
-+	case V16QImode: gen = gen_aarch64_rev64v16qi; break;
-+	case V8QImode: gen = gen_aarch64_rev64v8qi;  break;
-+	default:
-+	  return false;
-+	}
-+      break;
-+    case 3:
-+      switch (d->vmode)
-+	{
-+	case V16QImode: gen = gen_aarch64_rev32v16qi; break;
-+	case V8QImode: gen = gen_aarch64_rev32v8qi;  break;
-+	case V8HImode: gen = gen_aarch64_rev64v8hi;  break;
-+	case V4HImode: gen = gen_aarch64_rev64v4hi;  break;
-+	default:
-+	  return false;
-+	}
-+      break;
-+    case 1:
-+      switch (d->vmode)
-+	{
-+	case V16QImode: gen = gen_aarch64_rev16v16qi; break;
-+	case V8QImode: gen = gen_aarch64_rev16v8qi;  break;
-+	case V8HImode: gen = gen_aarch64_rev32v8hi;  break;
-+	case V4HImode: gen = gen_aarch64_rev32v4hi;  break;
-+	case V4SImode: gen = gen_aarch64_rev64v4si;  break;
-+	case V2SImode: gen = gen_aarch64_rev64v2si;  break;
-+	case V4SFmode: gen = gen_aarch64_rev64v4sf;  break;
-+	case V2SFmode: gen = gen_aarch64_rev64v2sf;  break;
-+	default:
-+	  return false;
-+	}
-+      break;
-+    default:
-+      return false;
-+    }
-+
-+  for (i = 0; i < nelt ; i += diff + 1)
-+    for (j = 0; j <= diff; j += 1)
-+      {
-+	/* This is guaranteed to be true as the value of diff
-+	   is 7, 3, 1 and we should have enough elements in the
-+	   queue to generate this.  Getting a vector mask with a
-+	   value of diff other than these values implies that
-+	   something is wrong by the time we get here.  */
-+	gcc_assert (i + j < nelt);
-+	if (d->perm[i + j] != i + diff - j)
-+	  return false;
-+      }
-+
-+  /* Success! */
-+  if (d->testing_p)
-+    return true;
-+
-+  emit_insn (gen (d->target, d->op0));
-+  return true;
-+}
-+
-+static bool
- aarch64_evpc_dup (struct expand_vec_perm_d *d)
- {
-   rtx (*gen) (rtx, rtx, rtx);
-@@ -8181,10 +9576,6 @@
-   unsigned int i, elt, nelt = d->nelt;
-   rtx lane;
- 
--  /* TODO: This may not be big-endian safe.  */
--  if (BYTES_BIG_ENDIAN)
--    return false;
--
-   elt = d->perm[0];
-   for (i = 1; i < nelt; i++)
-     {
-@@ -8198,7 +9589,7 @@
-      use d->op0 and need not do any extra arithmetic to get the
-      correct lane number.  */
-   in0 = d->op0;
--  lane = GEN_INT (elt);
-+  lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
- 
-   switch (vmode)
-     {
-@@ -8227,11 +9618,6 @@
-   enum machine_mode vmode = d->vmode;
-   unsigned int i, nelt = d->nelt;
- 
--  /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
--     numbering of elements for big-endian, we must reverse the order.  */
--  if (BYTES_BIG_ENDIAN)
--    return false;
--
-   if (d->testing_p)
-     return true;
- 
-@@ -8242,7 +9628,15 @@
-     return false;
- 
-   for (i = 0; i < nelt; ++i)
--    rperm[i] = GEN_INT (d->perm[i]);
-+    {
-+      int nunits = GET_MODE_NUNITS (vmode);
-+
-+      /* If big-endian and two vectors we end up with a weird mixed-endian
-+	 mode on NEON.  Reverse the index within each word but not the word
-+	 itself.  */
-+      rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
-+					   : d->perm[i]);
-+    }
-   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
-   sel = force_reg (vmode, sel);
- 
-@@ -8271,14 +9665,18 @@
- 
-   if (TARGET_SIMD)
-     {
--      if (aarch64_evpc_zip (d))
-+      if (aarch64_evpc_rev (d))
- 	return true;
-+      else if (aarch64_evpc_ext (d))
-+	return true;
-+      else if (aarch64_evpc_dup (d))
-+	return true;
-+      else if (aarch64_evpc_zip (d))
-+	return true;
-       else if (aarch64_evpc_uzp (d))
- 	return true;
-       else if (aarch64_evpc_trn (d))
- 	return true;
--      else if (aarch64_evpc_dup (d))
--	return true;
-       return aarch64_evpc_tbl (d);
-     }
-   return false;
-@@ -8397,7 +9795,8 @@
-   /* Limited combinations of subregs are safe on FPREGs.  Particularly,
-      1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
-      2. Scalar to Scalar for integer modes or same size float modes.
--     3. Vector to Vector modes.  */
-+     3. Vector to Vector modes.
-+     4. On little-endian only, Vector-Structure to Vector modes.  */
-   if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
-     {
-       if (aarch64_vector_mode_supported_p (from)
-@@ -8413,11 +9812,215 @@
-       if (aarch64_vector_mode_supported_p (from)
- 	  && aarch64_vector_mode_supported_p (to))
- 	return false;
-+
-+      /* Within an vector structure straddling multiple vector registers
-+	 we are in a mixed-endian representation.  As such, we can't
-+	 easily change modes for BYTES_BIG_ENDIAN.  Otherwise, we can
-+	 switch between vectors and vector structures cheaply.  */
-+      if (!BYTES_BIG_ENDIAN)
-+	if ((aarch64_vector_mode_supported_p (from)
-+	      && aarch64_vect_struct_mode_p (to))
-+	    || (aarch64_vector_mode_supported_p (to)
-+	      && aarch64_vect_struct_mode_p (from)))
-+	  return false;
-     }
- 
-   return true;
- }
- 
-+/* Implement MODES_TIEABLE_P.  */
-+
-+bool
-+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
-+{
-+  if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
-+    return true;
-+
-+  /* We specifically want to allow elements of "structure" modes to
-+     be tieable to the structure.  This more general condition allows
-+     other rarer situations too.  */
-+  if (TARGET_SIMD
-+      && aarch64_vector_mode_p (mode1)
-+      && aarch64_vector_mode_p (mode2))
-+    return true;
-+
-+  return false;
-+}
-+
-+/* Return a new RTX holding the result of moving POINTER forward by
-+   AMOUNT bytes.  */
-+
-+static rtx
-+aarch64_move_pointer (rtx pointer, int amount)
-+{
-+  rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
-+
-+  return adjust_automodify_address (pointer, GET_MODE (pointer),
-+				    next, amount);
-+}
-+
-+/* Return a new RTX holding the result of moving POINTER forward by the
-+   size of the mode it points to.  */
-+
-+static rtx
-+aarch64_progress_pointer (rtx pointer)
-+{
-+  HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
-+
-+  return aarch64_move_pointer (pointer, amount);
-+}
-+
-+/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
-+   MODE bytes.  */
-+
-+static void
-+aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
-+					      enum machine_mode mode)
-+{
-+  rtx reg = gen_reg_rtx (mode);
-+
-+  /* "Cast" the pointers to the correct mode.  */
-+  *src = adjust_address (*src, mode, 0);
-+  *dst = adjust_address (*dst, mode, 0);
-+  /* Emit the memcpy.  */
-+  emit_move_insn (reg, *src);
-+  emit_move_insn (*dst, reg);
-+  /* Move the pointers forward.  */
-+  *src = aarch64_progress_pointer (*src);
-+  *dst = aarch64_progress_pointer (*dst);
-+}
-+
-+/* Expand movmem, as if from a __builtin_memcpy.  Return true if
-+   we succeed, otherwise return false.  */
-+
-+bool
-+aarch64_expand_movmem (rtx *operands)
-+{
-+  unsigned int n;
-+  rtx dst = operands[0];
-+  rtx src = operands[1];
-+  rtx base;
-+  bool speed_p = !optimize_function_for_size_p (cfun);
-+
-+  /* When optimizing for size, give a better estimate of the length of a
-+     memcpy call, but use the default otherwise.  */
-+  unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
-+
-+  /* We can't do anything smart if the amount to copy is not constant.  */
-+  if (!CONST_INT_P (operands[2]))
-+    return false;
-+
-+  n = UINTVAL (operands[2]);
-+
-+  /* Try to keep the number of instructions low.  For cases below 16 bytes we
-+     need to make at most two moves.  For cases above 16 bytes it will be one
-+     move for each 16 byte chunk, then at most two additional moves.  */
-+  if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
-+    return false;
-+
-+  base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
-+  dst = adjust_automodify_address (dst, VOIDmode, base, 0);
-+
-+  base = copy_to_mode_reg (Pmode, XEXP (src, 0));
-+  src = adjust_automodify_address (src, VOIDmode, base, 0);
-+
-+  /* Simple cases.  Copy 0-3 bytes, as (if applicable) a 2-byte, then a
-+     1-byte chunk.  */
-+  if (n < 4)
-+    {
-+      if (n >= 2)
-+	{
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-+	  n -= 2;
-+	}
-+
-+      if (n == 1)
-+	aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
-+
-+      return true;
-+    }
-+
-+  /* Copy 4-8 bytes.  First a 4-byte chunk, then (if applicable) a second
-+     4-byte chunk, partially overlapping with the previously copied chunk.  */
-+  if (n < 8)
-+    {
-+      aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+      n -= 4;
-+      if (n > 0)
-+	{
-+	  int move = n - 4;
-+
-+	  src = aarch64_move_pointer (src, move);
-+	  dst = aarch64_move_pointer (dst, move);
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+	}
-+      return true;
-+    }
-+
-+  /* Copy more than 8 bytes.  Copy chunks of 16 bytes until we run out of
-+     them, then (if applicable) an 8-byte chunk.  */
-+  while (n >= 8)
-+    {
-+      if (n / 16)
-+	{
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
-+	  n -= 16;
-+	}
-+      else
-+	{
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
-+	  n -= 8;
-+	}
-+    }
-+
-+  /* Finish the final bytes of the copy.  We can always do this in one
-+     instruction.  We either copy the exact amount we need, or partially
-+     overlap with the previous chunk we copied and copy 8-bytes.  */
-+  if (n == 0)
-+    return true;
-+  else if (n == 1)
-+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
-+  else if (n == 2)
-+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
-+  else if (n == 4)
-+    aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+  else
-+    {
-+      if (n == 3)
-+	{
-+	  src = aarch64_move_pointer (src, -1);
-+	  dst = aarch64_move_pointer (dst, -1);
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
-+	}
-+      else
-+	{
-+	  int move = n - 8;
-+
-+	  src = aarch64_move_pointer (src, move);
-+	  dst = aarch64_move_pointer (dst, move);
-+	  aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
-+	}
-+    }
-+
-+  return true;
-+}
-+
-+static bool
-+aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+					unsigned int align,
-+					enum by_pieces_operation op,
-+					bool speed_p)
-+{
-+  /* STORE_BY_PIECES can be used when copying a constant string, but
-+     in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
-+     For now we always fail this and let the move_by_pieces code copy
-+     the string from read-only memory.  */
-+  if (op == STORE_BY_PIECES)
-+    return false;
-+
-+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
-+}
-+
- #undef TARGET_ADDRESS_COST
- #define TARGET_ADDRESS_COST aarch64_address_cost
- 
-@@ -8588,7 +10191,7 @@
- #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
- 
- #undef TARGET_RTX_COSTS
--#define TARGET_RTX_COSTS aarch64_rtx_costs
-+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
- 
- #undef TARGET_SCHED_ISSUE_RATE
- #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
-@@ -8626,6 +10229,10 @@
- #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
-   aarch64_autovectorize_vector_sizes
- 
-+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
-+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
-+  aarch64_atomic_assign_expand_fenv
-+
- /* Section anchor support.  */
- 
- #undef TARGET_MIN_ANCHOR_OFFSET
-@@ -8654,6 +10261,19 @@
- #undef TARGET_FIXED_CONDITION_CODE_REGS
- #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
- 
-+#undef TARGET_FLAGS_REGNUM
-+#define TARGET_FLAGS_REGNUM CC_REGNUM
-+
-+#undef TARGET_LEGITIMIZE_ADDRESS
-+#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
-+
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+  aarch64_use_by_pieces_infrastructure_p
-+
-+#undef TARGET_CAN_USE_DOLOOP_P
-+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
- 
- #include "gt-aarch64.h"
---- a/src/gcc/config/aarch64/aarch64-elf-raw.h
-+++ b/src/gcc/config/aarch64/aarch64-elf-raw.h
-@@ -23,7 +23,9 @@
- #define GCC_AARCH64_ELF_RAW_H
- 
- #define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s"
--#define ENDFILE_SPEC " crtend%O%s crtn%O%s"
-+#define ENDFILE_SPEC \
-+  " crtend%O%s crtn%O%s " \
-+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
- 
- #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
- #define CA53_ERR_835769_SPEC \
---- a/src/gcc/config/aarch64/aarch64-linux.h
-+++ b/src/gcc/config/aarch64/aarch64-linux.h
-@@ -21,7 +21,7 @@
- #ifndef GCC_AARCH64_LINUX_H
- #define GCC_AARCH64_LINUX_H
- 
--#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1"
-+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1"
- 
- #define CPP_SPEC "%{pthread:-D_REENTRANT}"
- 
-@@ -33,7 +33,7 @@
-    -dynamic-linker " GNU_USER_DYNAMIC_LINKER "	\
-    -X						\
-    %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
--   -maarch64linux%{mbig-endian:b}"
-+   -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}"
- 
- #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
- #define CA53_ERR_835769_SPEC \
-@@ -46,6 +46,14 @@
- #define LINK_SPEC LINUX_TARGET_LINK_SPEC \
-                   CA53_ERR_835769_SPEC
- 
-+#define GNU_USER_TARGET_MATHFILE_SPEC \
-+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
-+
-+#undef ENDFILE_SPEC
-+#define ENDFILE_SPEC   \
-+  GNU_USER_TARGET_MATHFILE_SPEC " " \
-+  GNU_USER_TARGET_ENDFILE_SPEC
-+
- #define TARGET_OS_CPP_BUILTINS()		\
-   do						\
-     {						\
---- a/src/gcc/config/aarch64/iterators.md
-+++ b/src/gcc/config/aarch64/iterators.md
-@@ -95,6 +95,9 @@
- ;; Vector Float modes.
- (define_mode_iterator VDQF [V2SF V4SF V2DF])
- 
-+;; Vector Float modes, and DF.
-+(define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
-+
- ;; Vector single Float modes.
- (define_mode_iterator VDQSF [V2SF V4SF])
- 
-@@ -156,6 +159,9 @@
- ;; Vector modes for H and S types.
- (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI])
- 
-+;; Vector modes for H, S and D types.
-+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
-+
- ;; Vector modes for Q, H and S types.
- (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI])
- 
-@@ -273,6 +279,10 @@
-     UNSPEC_UZP2		; Used in vector permute patterns.
-     UNSPEC_TRN1		; Used in vector permute patterns.
-     UNSPEC_TRN2		; Used in vector permute patterns.
-+    UNSPEC_EXT		; Used in aarch64-simd.md.
-+    UNSPEC_REV64	; Used in vector reverse patterns (permute).
-+    UNSPEC_REV32	; Used in vector reverse patterns (permute).
-+    UNSPEC_REV16	; Used in vector reverse patterns (permute).
-     UNSPEC_AESE		; Used in aarch64-simd.md.
-     UNSPEC_AESD         ; Used in aarch64-simd.md.
-     UNSPEC_AESMC        ; Used in aarch64-simd.md.
-@@ -299,6 +309,10 @@
- ;; 32-bit version and "%x0" in the 64-bit version.
- (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")])
- 
-+;; For inequal width int to float conversion
-+(define_mode_attr w1 [(SF "w") (DF "x")])
-+(define_mode_attr w2 [(SF "x") (DF "w")])
-+
- ;; For constraints used in scalar immediate vector moves
- (define_mode_attr hq [(HI "h") (QI "q")])
- 
-@@ -348,6 +362,9 @@
- ;; Attribute to describe constants acceptable in logical operations
- (define_mode_attr lconst [(SI "K") (DI "L")])
- 
-+;; Attribute to describe constants acceptable in atomic logical operations
-+(define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")])
-+
- ;; Map a mode to a specific constraint character.
- (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
- 
-@@ -358,6 +375,9 @@
-                          (V2DI "2d") (V2SF "2s")
- 			 (V4SF "4s") (V2DF "2d")])
- 
-+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32")
-+                            (V4SI "32") (V2DI "64")])
-+
- (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b")
- 			 (V4HI ".4h") (V8HI  ".8h")
- 			 (V2SI ".2s") (V4SI  ".4s")
-@@ -552,13 +572,43 @@
- 
- (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")])
- 
-+;; Mode of pair of elements for each vector mode, to define transfer
-+;; size for structure lane/dup loads and stores.
-+(define_mode_attr V_TWO_ELEM [(V8QI "HI")   (V16QI "HI")
-+                              (V4HI "SI")   (V8HI "SI")
-+                              (V2SI "V2SI") (V4SI "V2SI")
-+                              (DI "V2DI")   (V2DI "V2DI")
-+                              (V2SF "V2SF") (V4SF "V2SF")
-+                              (DF "V2DI")   (V2DF "V2DI")])
-+
-+;; Similar, for three elements.
-+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK")
-+                                (V4HI "BLK") (V8HI "BLK")
-+                                (V2SI "BLK") (V4SI "BLK")
-+                                (DI "EI")    (V2DI "EI")
-+                                (V2SF "BLK") (V4SF "BLK")
-+                                (DF "EI")    (V2DF "EI")])
-+
-+;; Similar, for four elements.
-+(define_mode_attr V_FOUR_ELEM [(V8QI "SI")   (V16QI "SI")
-+                               (V4HI "V4HI") (V8HI "V4HI")
-+                               (V2SI "V4SI") (V4SI "V4SI")
-+                               (DI "OI")     (V2DI "OI")
-+                               (V2SF "V4SF") (V4SF "V4SF")
-+                               (DF "OI")     (V2DF "OI")])
-+
-+
- ;; Mode for atomic operation suffixes
- (define_mode_attr atomic_sfx
-   [(QI "b") (HI "h") (SI "") (DI "")])
- 
--(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
--(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
-+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")])
-+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")])
- 
-+;; for the inequal width integer to fp conversions
-+(define_mode_attr fcvt_iesize [(SF "di") (DF "si")])
-+(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")])
-+
- (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
- 				(V4HI "V8HI") (V8HI  "V4HI")
- 				(V2SI "V4SI") (V4SI  "V2SI")
-@@ -853,6 +903,8 @@
- 			      UNSPEC_TRN1 UNSPEC_TRN2
- 			      UNSPEC_UZP1 UNSPEC_UZP2])
- 
-+(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16])
-+
- (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
- 			     UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX
- 			     UNSPEC_FRINTA])
-@@ -862,6 +914,10 @@
- 
- (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
- 
-+(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
-+                          UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH
-+                          UNSPEC_CRC32CW UNSPEC_CRC32CX])
-+
- (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD])
- (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC])
- 
-@@ -980,6 +1036,10 @@
- 			    (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
- 			    (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
- 
-+; op code for REV instructions (size within which elements are reversed).
-+(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
-+			 (UNSPEC_REV16 "16")])
-+
- (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
- 			    (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
- 			    (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
-@@ -986,6 +1046,16 @@
- 
- (define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
- 
-+(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
-+                        (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x")
-+                        (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch")
-+                        (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")])
-+
-+(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI")
-+                        (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI")
-+                        (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI")
-+                        (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")])
-+
- (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")])
- (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")])
- 
---- a/src/gcc/config/aarch64/aarch64.h
-+++ b/src/gcc/config/aarch64/aarch64.h
-@@ -26,14 +26,48 @@
- #define TARGET_CPU_CPP_BUILTINS()			\
-   do							\
-     {							\
--      builtin_define ("__aarch64__");			\
-+      builtin_define ("__aarch64__");                   \
-+      builtin_define ("__ARM_64BIT_STATE");             \
-+      builtin_define_with_int_value                     \
-+        ("__ARM_ARCH", aarch64_architecture_version);   \
-+      cpp_define_formatted                                              \
-+        (parse_in, "__ARM_ARCH_%dA", aarch64_architecture_version);     \
-+      builtin_define ("__ARM_ARCH_ISA_A64");            \
-+      builtin_define_with_int_value                     \
-+        ("__ARM_ARCH_PROFILE", 'A');                    \
-+      builtin_define ("__ARM_FEATURE_CLZ");             \
-+      builtin_define ("__ARM_FEATURE_IDIV");            \
-+      builtin_define ("__ARM_FEATURE_UNALIGNED");       \
-+      if (flag_unsafe_math_optimizations)               \
-+        builtin_define ("__ARM_FP_FAST");               \
-+      builtin_define ("__ARM_PCS_AAPCS64");             \
-+      builtin_define_with_int_value                     \
-+        ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);  \
-+      builtin_define_with_int_value                     \
-+        ("__ARM_SIZEOF_MINIMAL_ENUM",                   \
-+         flag_short_enums? 1 : 4);                      \
-       if (TARGET_BIG_END)				\
--	builtin_define ("__AARCH64EB__");		\
-+        {                                               \
-+          builtin_define ("__AARCH64EB__");             \
-+          builtin_define ("__ARM_BIG_ENDIAN");          \
-+        }                                               \
-       else						\
- 	builtin_define ("__AARCH64EL__");		\
- 							\
--      if (TARGET_SIMD)					\
--	builtin_define ("__ARM_NEON");			\
-+      if (TARGET_FLOAT)                                         \
-+        {                                                       \
-+          builtin_define ("__ARM_FEATURE_FMA");                 \
-+          builtin_define_with_int_value ("__ARM_FP", 0x0C);     \
-+        }                                                       \
-+      if (TARGET_SIMD)                                          \
-+        {                                                       \
-+          builtin_define ("__ARM_FEATURE_NUMERIC_MAXMIN");      \
-+          builtin_define ("__ARM_NEON");			\
-+          builtin_define_with_int_value ("__ARM_NEON_FP", 0x0C);\
-+        }                                                       \
-+							        \
-+      if (TARGET_CRC32)				        \
-+	builtin_define ("__ARM_FEATURE_CRC32");		\
- 							\
-       switch (aarch64_cmodel)				\
- 	{						\
-@@ -155,6 +189,8 @@
- 
- #define PCC_BITFIELD_TYPE_MATTERS	1
- 
-+/* Major revision number of the ARM Architecture implemented by the target.  */
-+extern unsigned aarch64_architecture_version;
- 
- /* Instruction tuning/selection flags.  */
- 
-@@ -188,6 +224,9 @@
- /* Crypto is an optional extension to AdvSIMD.  */
- #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
- 
-+/* CRC instructions that can be enabled through +crc arch extension.  */
-+#define TARGET_CRC32 (AARCH64_ISA_CRC)
-+
- /* Standard register usage.  */
- 
- /* 31 64-bit general purpose registers R0-R30:
-@@ -365,8 +404,7 @@
- 
- #define HARD_REGNO_MODE_OK(REGNO, MODE)	aarch64_hard_regno_mode_ok (REGNO, MODE)
- 
--#define MODES_TIEABLE_P(MODE1, MODE2)			\
--  (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
-+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2)
- 
- #define DWARF2_UNWIND_INFO 1
- 
-@@ -409,7 +447,7 @@
- enum reg_class
- {
-   NO_REGS,
--  CORE_REGS,
-+  CALLER_SAVE_REGS,
-   GENERAL_REGS,
-   STACK_REG,
-   POINTER_REGS,
-@@ -424,7 +462,7 @@
- #define REG_CLASS_NAMES				\
- {						\
-   "NO_REGS",					\
--  "CORE_REGS",					\
-+  "CALLER_SAVE_REGS",				\
-   "GENERAL_REGS",				\
-   "STACK_REG",					\
-   "POINTER_REGS",				\
-@@ -436,7 +474,7 @@
- #define REG_CLASS_CONTENTS						\
- {									\
-   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
--  { 0x7fffffff, 0x00000000, 0x00000003 },	/* CORE_REGS */		\
-+  { 0x0007ffff, 0x00000000, 0x00000000 },	/* CALLER_SAVE_REGS */	\
-   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\
-   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\
-   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\
-@@ -447,7 +485,7 @@
- 
- #define REGNO_REG_CLASS(REGNO)	aarch64_regno_regclass (REGNO)
- 
--#define INDEX_REG_CLASS	CORE_REGS
-+#define INDEX_REG_CLASS	GENERAL_REGS
- #define BASE_REG_CLASS  POINTER_REGS
- 
- /* Register pairs used to eliminate unneeded registers that point into
-@@ -524,13 +562,33 @@
- struct GTY (()) aarch64_frame
- {
-   HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
-+
-+  /* The number of extra stack bytes taken up by register varargs.
-+     This area is allocated by the callee at the very top of the
-+     frame.  This value is rounded up to a multiple of
-+     STACK_BOUNDARY.  */
-+  HOST_WIDE_INT saved_varargs_size;
-+
-   HOST_WIDE_INT saved_regs_size;
-   /* Padding if needed after the all the callee save registers have
-      been saved.  */
-   HOST_WIDE_INT padding0;
-   HOST_WIDE_INT hardfp_offset;	/* HARD_FRAME_POINTER_REGNUM */
--  HOST_WIDE_INT fp_lr_offset;	/* Space needed for saving fp and/or lr */
- 
-+  /* Offset from the base of the frame (incomming SP) to the
-+     hard_frame_pointer.  This value is always a multiple of
-+     STACK_BOUNDARY.  */
-+  HOST_WIDE_INT hard_fp_offset;
-+
-+  /* The size of the frame.  This value is the offset from base of the
-+   * frame (incomming SP) to the stack_pointer.  This value is always
-+   * a multiple of STACK_BOUNDARY.  */
-+
-+  unsigned wb_candidate1;
-+  unsigned wb_candidate2;
-+
-+  HOST_WIDE_INT frame_size;
-+
-   bool laid_out;
- };
- 
-@@ -537,11 +595,6 @@
- typedef struct GTY (()) machine_function
- {
-   struct aarch64_frame frame;
--
--  /* The number of extra stack bytes taken up by register varargs.
--     This area is allocated by the callee at the very top of the frame.  */
--  HOST_WIDE_INT saved_varargs_size;
--
- } machine_function;
- #endif
- 
-@@ -565,11 +618,7 @@
- };
- 
- 
--extern enum arm_pcs arm_pcs_variant;
- 
--#ifndef ARM_DEFAULT_PCS
--#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64
--#endif
- 
- /* We can't use enum machine_mode inside a generator file because it
-    hasn't been created yet; we shouldn't be using any code that
-@@ -670,12 +719,14 @@
- /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends.  */
- #define AARCH64_CALL_RATIO 8
- 
--/* When optimizing for size, give a better estimate of the length of a memcpy
--   call, but use the default otherwise.  But move_by_pieces_ninsns() counts
--   memory-to-memory moves, and we'll have to generate a load & store for each,
--   so halve the value to take that into account.  */
-+/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure.
-+   move_by_pieces will continually copy the largest safe chunks.  So a
-+   7-byte copy is a 4-byte + 2-byte + byte copy.  This proves inefficient
-+   for both size and speed of copy, so we will instead use the "movmem"
-+   standard name to implement the copy.  This logic does not apply when
-+   targeting -mstrict-align, so keep a sensible default in that case.  */
- #define MOVE_RATIO(speed) \
--  (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)
-+  (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2))
- 
- /* For CLEAR_RATIO, when optimizing for size, give a better estimate
-    of the length of a memset call, but use the default otherwise.  */
-@@ -688,12 +739,6 @@
- #define SET_RATIO(speed) \
-   ((speed) ? 15 : AARCH64_CALL_RATIO - 2)
- 
--/* STORE_BY_PIECES_P can be used when copying a constant string, but
--   in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
--   For now we always fail this and let the move_by_pieces code copy
--   the string from read-only memory.  */
--#define STORE_BY_PIECES_P(SIZE, ALIGN) 0
--
- /* Disable auto-increment in move_by_pieces et al.  Use of auto-increment is
-    rarely a good idea in straight-line code since it adds an extra address
-    dependency between each instruction.  Better to use incrementing offsets.  */
-@@ -835,6 +880,11 @@
- 
- #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD
- 
-+/* Choose appropriate mode for caller saves, so we do the minimum
-+   required size of load/store.  */
-+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
-+  aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE))
-+
- /* Callee only saves lower 64-bits of a 128-bit register.  Tell the
-    compiler the callee clobbers the top 64-bits when restoring the
-    bottom 64-bits.  */
---- a/src/gcc/config/arc/arc.c
-+++ b/src/gcc/config/arc/arc.c
-@@ -398,6 +398,11 @@
- 
- static bool arc_frame_pointer_required (void);
- 
-+static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
-+						unsigned int,
-+						enum by_pieces_operation op,
-+						bool);
-+
- /* Implements target hook vector_mode_supported_p.  */
- 
- static bool
-@@ -512,6 +517,10 @@
- #undef TARGET_DELEGITIMIZE_ADDRESS
- #define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
- 
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+  arc_use_by_pieces_infrastructure_p
-+
- /* Usually, we will be able to scale anchor offsets.
-    When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
- #undef TARGET_MIN_ANCHOR_OFFSET
-@@ -9355,6 +9364,21 @@
-   return false;
- }
- 
-+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
-+
-+static bool
-+arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+				    unsigned int align,
-+				    enum by_pieces_operation op,
-+				    bool speed_p)
-+{
-+  /* Let the movmem expander handle small block moves.  */
-+  if (op == MOVE_BY_PIECES)
-+    return false;
-+
-+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
-+}
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
- 
- #include "gt-arc.h"
---- a/src/gcc/config/arc/arc.h
-+++ b/src/gcc/config/arc/arc.h
-@@ -1553,12 +1553,6 @@
-    in one reasonably fast instruction.  */
- #define MOVE_MAX 4
- 
--/* Let the movmem expander handle small block moves.  */
--#define MOVE_BY_PIECES_P(LEN, ALIGN)  0
--#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \
--  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
--   < (unsigned int) MOVE_RATIO (!optimize_size))
--
- /* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P .  */
- #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3)
- 
---- a/src/gcc/config/arm/aarch-cost-tables.h
-+++ b/src/gcc/config/arm/aarch-cost-tables.h
-@@ -39,6 +39,7 @@
-     0,			/* bfi.  */
-     0,			/* bfx.  */
-     0,			/* clz.  */
-+    0,			/* rev.  */
-     COSTS_N_INSNS (1),	/* non_exec.  */
-     false		/* non_exec_costs_exec.  */
-   },
-@@ -139,6 +140,7 @@
-     COSTS_N_INSNS (1),	/* bfi.  */
-     COSTS_N_INSNS (1),	/* bfx.  */
-     0,			/* clz.  */
-+    0,			/* rev.  */
-     0,			/* non_exec.  */
-     true		/* non_exec_costs_exec.  */
-   },
-@@ -239,6 +241,7 @@
-     COSTS_N_INSNS (1), /* bfi.  */
-     0,                 /* bfx.  */
-     0,                 /* clz.  */
-+    0,			/* rev.  */
-     0,                 /* non_exec.  */
-     true               /* non_exec_costs_exec.  */
-   },
---- a/src/gcc/config/arm/cortex-a15.md
-+++ b/src/gcc/config/arm/cortex-a15.md
-@@ -64,7 +64,7 @@
-        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                         alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        adr,bfm,rev,\
-+                        adr,bfm,clz,rbit,rev,\
-                         shift_imm,shift_reg,\
-                         mov_imm,mov_reg,\
-                         mvn_imm,mvn_reg,\
-@@ -72,11 +72,14 @@
-   "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)")
- 
- ;; ALU ops with immediate shift
-+;; crc is also included here so that appropriate scheduling of CRC32 ARMv8-A
-+;; instructions can be performed when tuning for the Cortex-A57 since that
-+;; core reuses the Cortex-A15 pipeline description for the moment.
- (define_insn_reservation "cortex_a15_alu_shift" 3
-   (and (eq_attr "tune" "cortexa15")
-        (eq_attr "type" "extend,\
-                         alu_shift_imm,alus_shift_imm,\
--                        logic_shift_imm,logics_shift_imm,\
-+                        crc,logic_shift_imm,logics_shift_imm,\
-                         mov_shift,mvn_shift"))
-   "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
- 	       |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
---- a/src/gcc/config/arm/arm-tables.opt
-+++ b/src/gcc/config/arm/arm-tables.opt
-@@ -274,6 +274,9 @@
- Enum(processor_type) String(cortex-r7) Value(cortexr7)
- 
- EnumValue
-+Enum(processor_type) String(cortex-m7) Value(cortexm7)
-+
-+EnumValue
- Enum(processor_type) String(cortex-m4) Value(cortexm4)
- 
- EnumValue
-@@ -423,17 +426,23 @@
- Enum(arm_fpu) String(fpv4-sp-d16) Value(11)
- 
- EnumValue
--Enum(arm_fpu) String(neon-vfpv4) Value(12)
-+Enum(arm_fpu) String(fpv5-sp-d16) Value(12)
- 
- EnumValue
--Enum(arm_fpu) String(fp-armv8) Value(13)
-+Enum(arm_fpu) String(fpv5-d16) Value(13)
- 
- EnumValue
--Enum(arm_fpu) String(neon-fp-armv8) Value(14)
-+Enum(arm_fpu) String(neon-vfpv4) Value(14)
- 
- EnumValue
--Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15)
-+Enum(arm_fpu) String(fp-armv8) Value(15)
- 
- EnumValue
--Enum(arm_fpu) String(vfp3) Value(16)
-+Enum(arm_fpu) String(neon-fp-armv8) Value(16)
- 
-+EnumValue
-+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(17)
-+
-+EnumValue
-+Enum(arm_fpu) String(vfp3) Value(18)
-+
---- a/src/gcc/config/arm/thumb2.md
-+++ b/src/gcc/config/arm/thumb2.md
-@@ -329,7 +329,7 @@
-    movw%?\\t%0, %L1\\t%@ movhi
-    str%(h%)\\t%1, %0\\t%@ movhi
-    ldr%(h%)\\t%0, %1\\t%@ movhi"
--  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1")
-+  [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1")
-    (set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "yes,no,yes,no,no,no")
-    (set_attr "length" "2,4,2,4,4,4")
-@@ -1370,6 +1370,103 @@
-    (set_attr "type" "alu_reg")]
- )
- 
-+; Constants for op 2 will never be given to these patterns.
-+(define_insn_and_split "*iordi_notdi_di"
-+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+	(ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r"))
-+		(match_operand:DI 2 "s_register_operand" "r,0")))]
-+  "TARGET_THUMB2"
-+  "#"
-+  "TARGET_THUMB2 && reload_completed"
-+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2)))
-+   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
-+  "
-+  {
-+    operands[3] = gen_highpart (SImode, operands[0]);
-+    operands[0] = gen_lowpart (SImode, operands[0]);
-+    operands[4] = gen_highpart (SImode, operands[1]);
-+    operands[1] = gen_lowpart (SImode, operands[1]);
-+    operands[5] = gen_highpart (SImode, operands[2]);
-+    operands[2] = gen_lowpart (SImode, operands[2]);
-+  }"
-+  [(set_attr "length" "8")
-+   (set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notzesidi_di"
-+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+	(ior:DI (not:DI (zero_extend:DI
-+			 (match_operand:SI 2 "s_register_operand" "r,r")))
-+		(match_operand:DI 1 "s_register_operand" "0,?r")))]
-+  "TARGET_THUMB2"
-+  "#"
-+  ; (not (zero_extend...)) means operand0 will always be 0xffffffff
-+  "TARGET_THUMB2 && reload_completed"
-+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+   (set (match_dup 3) (const_int -1))]
-+  "
-+  {
-+    operands[3] = gen_highpart (SImode, operands[0]);
-+    operands[0] = gen_lowpart (SImode, operands[0]);
-+    operands[1] = gen_lowpart (SImode, operands[1]);
-+  }"
-+  [(set_attr "length" "4,8")
-+   (set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notdi_zesidi"
-+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r"))
-+		(zero_extend:DI
-+		 (match_operand:SI 1 "s_register_operand" "r,r"))))]
-+  "TARGET_THUMB2"
-+  "#"
-+  "TARGET_THUMB2 && reload_completed"
-+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+   (set (match_dup 3) (not:SI (match_dup 4)))]
-+  "
-+  {
-+    operands[3] = gen_highpart (SImode, operands[0]);
-+    operands[0] = gen_lowpart (SImode, operands[0]);
-+    operands[1] = gen_lowpart (SImode, operands[1]);
-+    operands[4] = gen_highpart (SImode, operands[2]);
-+    operands[2] = gen_lowpart (SImode, operands[2]);
-+  }"
-+  [(set_attr "length" "8")
-+   (set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "type" "multiple")]
-+)
-+
-+(define_insn_and_split "*iordi_notsesidi_di"
-+  [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
-+	(ior:DI (not:DI (sign_extend:DI
-+			 (match_operand:SI 2 "s_register_operand" "r,r")))
-+		(match_operand:DI 1 "s_register_operand" "0,r")))]
-+  "TARGET_THUMB2"
-+  "#"
-+  "TARGET_THUMB2 && reload_completed"
-+  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
-+   (set (match_dup 3) (ior:SI (not:SI
-+				(ashiftrt:SI (match_dup 2) (const_int 31)))
-+			       (match_dup 4)))]
-+  "
-+  {
-+    operands[3] = gen_highpart (SImode, operands[0]);
-+    operands[0] = gen_lowpart (SImode, operands[0]);
-+    operands[4] = gen_highpart (SImode, operands[1]);
-+    operands[1] = gen_lowpart (SImode, operands[1]);
-+  }"
-+  [(set_attr "length" "8")
-+   (set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "type" "multiple")]
-+)
-+
- (define_insn "*orsi_notsi_si"
-   [(set (match_operand:SI 0 "s_register_operand" "=r")
- 	(ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
---- a/src/gcc/config/arm/arm.c
-+++ b/src/gcc/config/arm/arm.c
-@@ -50,6 +50,7 @@
- #include "except.h"
- #include "tm_p.h"
- #include "target.h"
-+#include "sched-int.h"
- #include "target-def.h"
- #include "debug.h"
- #include "langhooks.h"
-@@ -59,6 +60,7 @@
- #include "params.h"
- #include "opts.h"
- #include "dumpfile.h"
-+#include "gimple-expr.h"
- 
- /* Forward definitions of types.  */
- typedef struct minipool_node    Mnode;
-@@ -93,6 +95,7 @@
- static bool thumb_force_lr_save (void);
- static unsigned arm_size_return_regs (void);
- static bool arm_assemble_integer (rtx, unsigned int, int);
-+static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
- static void arm_print_operand (FILE *, rtx, int);
- static void arm_print_operand_address (FILE *, rtx);
- static bool arm_print_operand_punct_valid_p (unsigned char code);
-@@ -584,6 +587,9 @@
- #undef TARGET_MANGLE_TYPE
- #define TARGET_MANGLE_TYPE arm_mangle_type
- 
-+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
-+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
-+
- #undef TARGET_BUILD_BUILTIN_VA_LIST
- #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
- #undef TARGET_EXPAND_BUILTIN_VA_START
-@@ -985,6 +991,7 @@
-     COSTS_N_INSNS (1),	/* bfi.  */
-     COSTS_N_INSNS (1),	/* bfx.  */
-     0,			/* clz.  */
-+    0,			/* rev.  */
-     0,			/* non_exec.  */
-     true		/* non_exec_costs_exec.  */
-   },
-@@ -1068,7 +1075,210 @@
-   }
- };
- 
-+const struct cpu_cost_table cortexa8_extra_costs =
-+{
-+  /* ALU */
-+  {
-+    0,			/* arith.  */
-+    0,			/* logical.  */
-+    COSTS_N_INSNS (1),	/* shift.  */
-+    0,			/* shift_reg.  */
-+    COSTS_N_INSNS (1),	/* arith_shift.  */
-+    0,			/* arith_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* log_shift.  */
-+    0,			/* log_shift_reg.  */
-+    0,			/* extend.  */
-+    0,			/* extend_arith.  */
-+    0,			/* bfi.  */
-+    0,			/* bfx.  */
-+    0,			/* clz.  */
-+    0,			/* rev.  */
-+    0,			/* non_exec.  */
-+    true		/* non_exec_costs_exec.  */
-+  },
-+  {
-+    /* MULT SImode */
-+    {
-+      COSTS_N_INSNS (1),	/* simple.  */
-+      COSTS_N_INSNS (1),	/* flag_setting.  */
-+      COSTS_N_INSNS (1),	/* extend.  */
-+      COSTS_N_INSNS (1),	/* add.  */
-+      COSTS_N_INSNS (1),	/* extend_add.  */
-+      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
-+    },
-+    /* MULT DImode */
-+    {
-+      0,			/* simple (N/A).  */
-+      0,			/* flag_setting (N/A).  */
-+      COSTS_N_INSNS (2),	/* extend.  */
-+      0,			/* add (N/A).  */
-+      COSTS_N_INSNS (2),	/* extend_add.  */
-+      0				/* idiv (N/A).  */
-+    }
-+  },
-+  /* LD/ST */
-+  {
-+    COSTS_N_INSNS (1),	/* load.  */
-+    COSTS_N_INSNS (1),	/* load_sign_extend.  */
-+    COSTS_N_INSNS (1),	/* ldrd.  */
-+    COSTS_N_INSNS (1),	/* ldm_1st.  */
-+    1,			/* ldm_regs_per_insn_1st.  */
-+    2,			/* ldm_regs_per_insn_subsequent.  */
-+    COSTS_N_INSNS (1),	/* loadf.  */
-+    COSTS_N_INSNS (1),	/* loadd.  */
-+    COSTS_N_INSNS (1),  /* load_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store.  */
-+    COSTS_N_INSNS (1),	/* strd.  */
-+    COSTS_N_INSNS (1),	/* stm_1st.  */
-+    1,			/* stm_regs_per_insn_1st.  */
-+    2,			/* stm_regs_per_insn_subsequent.  */
-+    COSTS_N_INSNS (1),	/* storef.  */
-+    COSTS_N_INSNS (1),	/* stored.  */
-+    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+  },
-+  {
-+    /* FP SFmode */
-+    {
-+      COSTS_N_INSNS (36),	/* div.  */
-+      COSTS_N_INSNS (11),	/* mult.  */
-+      COSTS_N_INSNS (20),	/* mult_addsub. */
-+      COSTS_N_INSNS (30),	/* fma.  */
-+      COSTS_N_INSNS (9),	/* addsub.  */
-+      COSTS_N_INSNS (3),	/* fpconst.  */
-+      COSTS_N_INSNS (3),	/* neg.  */
-+      COSTS_N_INSNS (6),	/* compare.  */
-+      COSTS_N_INSNS (4),	/* widen.  */
-+      COSTS_N_INSNS (4),	/* narrow.  */
-+      COSTS_N_INSNS (8),	/* toint.  */
-+      COSTS_N_INSNS (8),	/* fromint.  */
-+      COSTS_N_INSNS (8)		/* roundint.  */
-+    },
-+    /* FP DFmode */
-+    {
-+      COSTS_N_INSNS (64),	/* div.  */
-+      COSTS_N_INSNS (16),	/* mult.  */
-+      COSTS_N_INSNS (25),	/* mult_addsub.  */
-+      COSTS_N_INSNS (30),	/* fma.  */
-+      COSTS_N_INSNS (9),	/* addsub.  */
-+      COSTS_N_INSNS (3),	/* fpconst.  */
-+      COSTS_N_INSNS (3),	/* neg.  */
-+      COSTS_N_INSNS (6),	/* compare.  */
-+      COSTS_N_INSNS (6),	/* widen.  */
-+      COSTS_N_INSNS (6),	/* narrow.  */
-+      COSTS_N_INSNS (8),	/* toint.  */
-+      COSTS_N_INSNS (8),	/* fromint.  */
-+      COSTS_N_INSNS (8)		/* roundint.  */
-+    }
-+  },
-+  /* Vector */
-+  {
-+    COSTS_N_INSNS (1)	/* alu.  */
-+  }
-+};
- 
-+const struct cpu_cost_table cortexa5_extra_costs =
-+{
-+  /* ALU */
-+  {
-+    0,			/* arith.  */
-+    0,			/* logical.  */
-+    COSTS_N_INSNS (1),	/* shift.  */
-+    COSTS_N_INSNS (1),	/* shift_reg.  */
-+    COSTS_N_INSNS (1),	/* arith_shift.  */
-+    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* log_shift.  */
-+    COSTS_N_INSNS (1),	/* log_shift_reg.  */
-+    COSTS_N_INSNS (1),	/* extend.  */
-+    COSTS_N_INSNS (1),	/* extend_arith.  */
-+    COSTS_N_INSNS (1),	/* bfi.  */
-+    COSTS_N_INSNS (1),	/* bfx.  */
-+    COSTS_N_INSNS (1),	/* clz.  */
-+    COSTS_N_INSNS (1),	/* rev.  */
-+    0,			/* non_exec.  */
-+    true		/* non_exec_costs_exec.  */
-+  },
-+
-+  {
-+    /* MULT SImode */
-+    {
-+      0,			/* simple.  */
-+      COSTS_N_INSNS (1),	/* flag_setting.  */
-+      COSTS_N_INSNS (1),	/* extend.  */
-+      COSTS_N_INSNS (1),	/* add.  */
-+      COSTS_N_INSNS (1),	/* extend_add.  */
-+      COSTS_N_INSNS (7)		/* idiv.  */
-+    },
-+    /* MULT DImode */
-+    {
-+      0,			/* simple (N/A).  */
-+      0,			/* flag_setting (N/A).  */
-+      COSTS_N_INSNS (1),	/* extend.  */
-+      0,			/* add.  */
-+      COSTS_N_INSNS (2),	/* extend_add.  */
-+      0				/* idiv (N/A).  */
-+    }
-+  },
-+  /* LD/ST */
-+  {
-+    COSTS_N_INSNS (1),	/* load.  */
-+    COSTS_N_INSNS (1),	/* load_sign_extend.  */
-+    COSTS_N_INSNS (6),	/* ldrd.  */
-+    COSTS_N_INSNS (1),	/* ldm_1st.  */
-+    1,			/* ldm_regs_per_insn_1st.  */
-+    2,			/* ldm_regs_per_insn_subsequent.  */
-+    COSTS_N_INSNS (2),	/* loadf.  */
-+    COSTS_N_INSNS (4),	/* loadd.  */
-+    COSTS_N_INSNS (1),	/* load_unaligned.  */
-+    COSTS_N_INSNS (1),	/* store.  */
-+    COSTS_N_INSNS (3),	/* strd.  */
-+    COSTS_N_INSNS (1),	/* stm_1st.  */
-+    1,			/* stm_regs_per_insn_1st.  */
-+    2,			/* stm_regs_per_insn_subsequent.  */
-+    COSTS_N_INSNS (2),	/* storef.  */
-+    COSTS_N_INSNS (2),	/* stored.  */
-+    COSTS_N_INSNS (1)	/* store_unaligned.  */
-+  },
-+  {
-+    /* FP SFmode */
-+    {
-+      COSTS_N_INSNS (15),	/* div.  */
-+      COSTS_N_INSNS (3),	/* mult.  */
-+      COSTS_N_INSNS (7),	/* mult_addsub. */
-+      COSTS_N_INSNS (7),	/* fma.  */
-+      COSTS_N_INSNS (3),	/* addsub.  */
-+      COSTS_N_INSNS (3),	/* fpconst.  */
-+      COSTS_N_INSNS (3),	/* neg.  */
-+      COSTS_N_INSNS (3),	/* compare.  */
-+      COSTS_N_INSNS (3),	/* widen.  */
-+      COSTS_N_INSNS (3),	/* narrow.  */
-+      COSTS_N_INSNS (3),	/* toint.  */
-+      COSTS_N_INSNS (3),	/* fromint.  */
-+      COSTS_N_INSNS (3)		/* roundint.  */
-+    },
-+    /* FP DFmode */
-+    {
-+      COSTS_N_INSNS (30),	/* div.  */
-+      COSTS_N_INSNS (6),	/* mult.  */
-+      COSTS_N_INSNS (10),	/* mult_addsub.  */
-+      COSTS_N_INSNS (7),	/* fma.  */
-+      COSTS_N_INSNS (3),	/* addsub.  */
-+      COSTS_N_INSNS (3),	/* fpconst.  */
-+      COSTS_N_INSNS (3),	/* neg.  */
-+      COSTS_N_INSNS (3),	/* compare.  */
-+      COSTS_N_INSNS (3),	/* widen.  */
-+      COSTS_N_INSNS (3),	/* narrow.  */
-+      COSTS_N_INSNS (3),	/* toint.  */
-+      COSTS_N_INSNS (3),	/* fromint.  */
-+      COSTS_N_INSNS (3)		/* roundint.  */
-+    }
-+  },
-+  /* Vector */
-+  {
-+    COSTS_N_INSNS (1)	/* alu.  */
-+  }
-+};
-+
-+
- const struct cpu_cost_table cortexa7_extra_costs =
- {
-   /* ALU */
-@@ -1086,6 +1296,7 @@
-     COSTS_N_INSNS (1),	/* bfi.  */
-     COSTS_N_INSNS (1),	/* bfx.  */
-     COSTS_N_INSNS (1),	/* clz.  */
-+    COSTS_N_INSNS (1),	/* rev.  */
-     0,			/* non_exec.  */
-     true		/* non_exec_costs_exec.  */
-   },
-@@ -1187,6 +1398,7 @@
-     0,			/* bfi.  */
-     COSTS_N_INSNS (1),	/* bfx.  */
-     COSTS_N_INSNS (1),	/* clz.  */
-+    COSTS_N_INSNS (1),	/* rev.  */
-     0,			/* non_exec.  */
-     true		/* non_exec_costs_exec.  */
-   },
-@@ -1287,6 +1499,7 @@
-     COSTS_N_INSNS (1),	/* bfi.  */
-     0,			/* bfx.  */
-     0,			/* clz.  */
-+    0,			/* rev.  */
-     0,			/* non_exec.  */
-     true		/* non_exec_costs_exec.  */
-   },
-@@ -1387,6 +1600,7 @@
-     0,			/* bfi.  */
-     0,			/* bfx.  */
-     0,			/* clz.  */
-+    0,			/* rev.  */
-     COSTS_N_INSNS (1),	/* non_exec.  */
-     false		/* non_exec_costs_exec.  */
-   },
-@@ -1483,7 +1697,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_fastmul_tune =
-@@ -1499,7 +1714,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- /* StrongARM has early execution of branches, so a sequence that is worth
-@@ -1518,7 +1734,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_xscale_tune =
-@@ -1534,7 +1751,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_9e_tune =
-@@ -1550,7 +1768,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_v6t2_tune =
-@@ -1566,7 +1785,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
-@@ -1583,9 +1803,27 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
-+const struct tune_params arm_cortex_a8_tune =
-+{
-+  arm_9e_rtx_costs,
-+  &cortexa8_extra_costs,
-+  NULL,						/* Sched adj cost.  */
-+  1,						/* Constant limit.  */
-+  5,						/* Max cond insns.  */
-+  ARM_PREFETCH_NOT_BENEFICIAL,
-+  false,					/* Prefer constant pool.  */
-+  arm_default_branch_cost,
-+  false,					/* Prefer LDRD/STRD.  */
-+  {true, true},					/* Prefer non short circuit.  */
-+  &arm_default_vec_cost,                        /* Vectorizer costs.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
-+};
-+
- const struct tune_params arm_cortex_a7_tune =
- {
-   arm_9e_rtx_costs,
-@@ -1599,7 +1837,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,			/* Vectorizer costs.  */
--  false						/* Prefer Neon for 64-bits bitops.  */
-+  false,					/* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_cortex_a15_tune =
-@@ -1615,7 +1854,8 @@
-   true,						/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  true, true                                    /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_cortex_a53_tune =
-@@ -1631,7 +1871,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,			/* Vectorizer costs.  */
--  false						/* Prefer Neon for 64-bits bitops.  */
-+  false,					/* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_cortex_a57_tune =
-@@ -1647,7 +1888,8 @@
-   true,                                       /* Prefer LDRD/STRD.  */
-   {true, true},                                /* Prefer non short circuit.  */
-   &arm_default_vec_cost,                       /* Vectorizer costs.  */
--  false                                        /* Prefer Neon for 64-bits bitops.  */
-+  false,                                       /* Prefer Neon for 64-bits bitops.  */
-+  true, true                                   /* Prefer 32-bit encodings.  */
- };
- 
- /* Branches can be dual-issued on Cortex-A5, so conditional execution is
-@@ -1656,7 +1898,7 @@
- const struct tune_params arm_cortex_a5_tune =
- {
-   arm_9e_rtx_costs,
--  NULL,
-+  &cortexa5_extra_costs,
-   NULL,						/* Sched adj cost.  */
-   1,						/* Constant limit.  */
-   1,						/* Max cond insns.  */
-@@ -1666,7 +1908,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {false, false},				/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_cortex_a9_tune =
-@@ -1682,7 +1925,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_cortex_a12_tune =
-@@ -1698,7 +1942,8 @@
-   true,						/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
-@@ -1721,7 +1966,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {false, false},				/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
-@@ -1739,7 +1985,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {false, false},				/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- const struct tune_params arm_fa726te_tune =
-@@ -1755,7 +2002,8 @@
-   false,					/* Prefer LDRD/STRD.  */
-   {true, true},					/* Prefer non short circuit.  */
-   &arm_default_vec_cost,                        /* Vectorizer costs.  */
--  false                                         /* Prefer Neon for 64-bits bitops.  */
-+  false,                                        /* Prefer Neon for 64-bits bitops.  */
-+  false, false                                  /* Prefer 32-bit encodings.  */
- };
- 
- 
-@@ -2806,7 +3054,7 @@
-      prefer_neon_for_64bits = true;
- 
-   /* Use the alternative scheduling-pressure algorithm by default.  */
--  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
-+  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
-                          global_options.x_param_values,
-                          global_options_set.x_param_values);
- 
-@@ -6079,11 +6327,6 @@
-   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
-     return false;
- 
--  /* Cannot tail-call to long calls, since these are out of range of
--     a branch instruction.  */
--  if (decl && arm_is_long_call_p (decl))
--    return false;
--
-   /* If we are interworking and the function is not declared static
-      then we can't tail-call it unless we know that it exists in this
-      compilation unit (since it might be a Thumb routine).  */
-@@ -9337,6 +9580,47 @@
-       *cost = LIBCALL_COST (2);
-       return false;
- 
-+    case BSWAP:
-+      if (arm_arch6)
-+        {
-+          if (mode == SImode)
-+            {
-+              *cost = COSTS_N_INSNS (1);
-+              if (speed_p)
-+                *cost += extra_cost->alu.rev;
-+
-+              return false;
-+            }
-+        }
-+      else
-+        {
-+        /* No rev instruction available.  Look at arm_legacy_rev
-+           and thumb_legacy_rev for the form of RTL used then.  */
-+          if (TARGET_THUMB)
-+            {
-+              *cost = COSTS_N_INSNS (10);
-+
-+              if (speed_p)
-+                {
-+                  *cost += 6 * extra_cost->alu.shift;
-+                  *cost += 3 * extra_cost->alu.logical;
-+                }
-+            }
-+          else
-+            {
-+              *cost = COSTS_N_INSNS (5);
-+
-+              if (speed_p)
-+                {
-+                  *cost += 2 * extra_cost->alu.shift;
-+                  *cost += extra_cost->alu.arith_shift;
-+                  *cost += 2 * extra_cost->alu.logical;
-+                }
-+            }
-+          return true;
-+        }
-+      return false;
-+
-     case MINUS:
-       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
- 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
-@@ -9719,8 +10003,17 @@
-       /* Vector mode?  */
-       *cost = LIBCALL_COST (2);
-       return false;
-+    case IOR:
-+      if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
-+        {
-+          *cost = COSTS_N_INSNS (1);
-+          if (speed_p)
-+            *cost += extra_cost->alu.rev;
- 
--    case AND: case XOR: case IOR:
-+          return true;
-+        }
-+    /* Fall through.  */
-+    case AND: case XOR:
-       if (mode == SImode)
- 	{
- 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
-@@ -10619,6 +10912,36 @@
-       *cost = LIBCALL_COST (1);
-       return false;
- 
-+    case FMA:
-+      if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
-+        {
-+          rtx op0 = XEXP (x, 0);
-+          rtx op1 = XEXP (x, 1);
-+          rtx op2 = XEXP (x, 2);
-+
-+          *cost = COSTS_N_INSNS (1);
-+
-+          /* vfms or vfnma.  */
-+          if (GET_CODE (op0) == NEG)
-+            op0 = XEXP (op0, 0);
-+
-+          /* vfnms or vfnma.  */
-+          if (GET_CODE (op2) == NEG)
-+            op2 = XEXP (op2, 0);
-+
-+          *cost += rtx_cost (op0, FMA, 0, speed_p);
-+          *cost += rtx_cost (op1, FMA, 1, speed_p);
-+          *cost += rtx_cost (op2, FMA, 2, speed_p);
-+
-+          if (speed_p)
-+            *cost += extra_cost->fp[mode ==DFmode].fma;
-+
-+          return true;
-+        }
-+
-+      *cost = LIBCALL_COST (3);
-+      return false;
-+
-     case FIX:
-     case UNSIGNED_FIX:
-       if (TARGET_HARD_FLOAT)
-@@ -10669,10 +10992,16 @@
-       return true;
- 
-     case ASM_OPERANDS:
--      /* Just a guess.  Cost one insn per input.  */
--      *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
--      return true;
-+      {
-+      /* Just a guess.  Guess number of instructions in the asm
-+         plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
-+         though (see PR60663).  */
-+        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
-+        int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
- 
-+        *cost = COSTS_N_INSNS (asm_length + num_operands);
-+        return true;
-+      }
-     default:
-       if (mode != VOIDmode)
- 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
-@@ -12566,7 +12895,11 @@
-       || (type == 0 && GET_CODE (ind) == PRE_DEC))
-     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
- 
--  /* FIXME: vld1 allows register post-modify.  */
-+  /* Allow post-increment by register for VLDn */
-+  if (type == 2 && GET_CODE (ind) == POST_MODIFY
-+      && GET_CODE (XEXP (ind, 1)) == PLUS
-+      && REG_P (XEXP (XEXP (ind, 1), 1)))
-+     return true;
- 
-   /* Match:
-      (plus (reg)
-@@ -16787,9 +17120,20 @@
-   compute_bb_for_insn ();
-   df_analyze ();
- 
-+  enum Convert_Action {SKIP, CONV, SWAP_CONV};
-+
-   FOR_EACH_BB_FN (bb, cfun)
-     {
-+      if (current_tune->disparage_flag_setting_t16_encodings
-+	  && optimize_bb_for_speed_p (bb))
-+	continue;
-+
-       rtx insn;
-+      Convert_Action action = SKIP;
-+      Convert_Action action_for_partial_flag_setting
-+	= (current_tune->disparage_partial_flag_setting_t16_encodings
-+	   && optimize_bb_for_speed_p (bb))
-+	  ? SKIP : CONV;
- 
-       COPY_REG_SET (&live, DF_LR_OUT (bb));
-       df_simulate_initialize_backwards (bb, &live);
-@@ -16799,7 +17143,7 @@
- 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
- 	      && GET_CODE (PATTERN (insn)) == SET)
- 	    {
--	      enum {SKIP, CONV, SWAP_CONV} action = SKIP;
-+	      action = SKIP;
- 	      rtx pat = PATTERN (insn);
- 	      rtx dst = XEXP (pat, 0);
- 	      rtx src = XEXP (pat, 1);
-@@ -16880,10 +17224,11 @@
- 		      /* ANDS <Rdn>,<Rm>  */
- 		      if (rtx_equal_p (dst, op0)
- 			  && low_register_operand (op1, SImode))
--			action = CONV;
-+			action = action_for_partial_flag_setting;
- 		      else if (rtx_equal_p (dst, op1)
- 			       && low_register_operand (op0, SImode))
--			action = SWAP_CONV;
-+			action = action_for_partial_flag_setting == SKIP
-+				 ? SKIP : SWAP_CONV;
- 		      break;
- 
- 		    case ASHIFTRT:
-@@ -16894,7 +17239,7 @@
- 		      /* LSLS <Rdn>,<Rm> */
- 		      if (rtx_equal_p (dst, op0)
- 			  && low_register_operand (op1, SImode))
--			action = CONV;
-+			action = action_for_partial_flag_setting;
- 		      /* ASRS <Rd>,<Rm>,#<imm5> */
- 		      /* LSRS <Rd>,<Rm>,#<imm5> */
- 		      /* LSLS <Rd>,<Rm>,#<imm5> */
-@@ -16901,7 +17246,7 @@
- 		      else if (low_register_operand (op0, SImode)
- 			       && CONST_INT_P (op1)
- 			       && IN_RANGE (INTVAL (op1), 0, 31))
--			action = CONV;
-+			action = action_for_partial_flag_setting;
- 		      break;
- 
- 		    case ROTATERT:
-@@ -16908,12 +17253,16 @@
- 		      /* RORS <Rdn>,<Rm>  */
- 		      if (rtx_equal_p (dst, op0)
- 			  && low_register_operand (op1, SImode))
--			action = CONV;
-+			action = action_for_partial_flag_setting;
- 		      break;
- 
- 		    case NOT:
-+		      /* MVNS <Rd>,<Rm>  */
-+		      if (low_register_operand (op0, SImode))
-+			action = action_for_partial_flag_setting;
-+		      break;
-+
- 		    case NEG:
--		      /* MVNS <Rd>,<Rm>  */
- 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
- 		      if (low_register_operand (op0, SImode))
- 			action = CONV;
-@@ -16923,7 +17272,7 @@
- 		      /* MOVS <Rd>,#<imm8>  */
- 		      if (CONST_INT_P (src)
- 			  && IN_RANGE (INTVAL (src), 0, 255))
--			action = CONV;
-+			action = action_for_partial_flag_setting;
- 		      break;
- 
- 		    case REG:
-@@ -17144,24 +17493,7 @@
- 
- /* Routines to output assembly language.  */
- 
--/* If the rtx is the correct value then return the string of the number.
--   In this way we can ensure that valid double constants are generated even
--   when cross compiling.  */
--const char *
--fp_immediate_constant (rtx x)
--{
--  REAL_VALUE_TYPE r;
--
--  if (!fp_consts_inited)
--    init_fp_table ();
--
--  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
--
--  gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
--  return "0";
--}
--
--/* As for fp_immediate_constant, but value is passed directly, not in rtx.  */
-+/* Return string representation of passed in real value.  */
- static const char *
- fp_const_from_val (REAL_VALUE_TYPE *r)
- {
-@@ -17252,14 +17584,22 @@
- /* Output the assembly for a store multiple.  */
- 
- const char *
--vfp_output_fstmd (rtx * operands)
-+vfp_output_vstmd (rtx * operands)
- {
-   char pattern[100];
-   int p;
-   int base;
-   int i;
-+  rtx addr_reg = REG_P (XEXP (operands[0], 0))
-+		   ? XEXP (operands[0], 0)
-+		   : XEXP (XEXP (operands[0], 0), 0);
-+  bool push_p =  REGNO (addr_reg) == SP_REGNUM;
- 
--  strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
-+  if (push_p)
-+    strcpy (pattern, "vpush%?.64\t{%P1");
-+  else
-+    strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
-+
-   p = strlen (pattern);
- 
-   gcc_assert (REG_P (operands[1]));
-@@ -17387,6 +17727,15 @@
-       require_pic_register ();
-       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
-     }
-+
-+  if (TARGET_AAPCS_BASED)
-+    {
-+      /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
-+         linker.  */
-+      rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
-+      clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
-+      clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM));
-+    }
- }
- 
- /* Output a 'call' insn.  */
-@@ -18066,19 +18415,19 @@
-   switch (GET_CODE (addr))
-     {
-     case PRE_DEC:
--      templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
-+      templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
-       ops[0] = XEXP (addr, 0);
-       ops[1] = reg;
-       break;
- 
-     case POST_INC:
--      templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
-+      templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
-       ops[0] = XEXP (addr, 0);
-       ops[1] = reg;
-       break;
- 
-     default:
--      templ = "f%s%c%%?\t%%%s0, %%1%s";
-+      templ = "v%sr%%?.%s\t%%%s0, %%1%s";
-       ops[0] = reg;
-       ops[1] = mem;
-       break;
-@@ -18086,7 +18435,7 @@
- 
-   sprintf (buff, templ,
- 	   load ? "ld" : "st",
--	   dp ? 'd' : 's',
-+	   dp ? "64" : "32",
- 	   dp ? "P" : "",
- 	   integer_p ? "\t%@ int" : "");
-   output_asm_insn (buff, ops);
-@@ -20426,6 +20775,18 @@
- 	{
- 	  int reg = -1;
- 
-+	  /* Register r3 is caller-saved.  Normally it does not need to be
-+	     saved on entry by the prologue.  However if we choose to save
-+	     it for padding then we may confuse the compiler into thinking
-+	     a prologue sequence is required when in fact it is not.  This
-+	     will occur when shrink-wrapping if r3 is used as a scratch
-+	     register and there are no other callee-saved writes.
-+
-+	     This situation can be avoided when other callee-saved registers
-+	     are available and r3 is not mandatory if we choose a callee-saved
-+	     register for padding.  */
-+	  bool prefer_callee_reg_p = false;
-+
- 	  /* If it is safe to use r3, then do so.  This sometimes
- 	     generates better code on Thumb-2 by avoiding the need to
- 	     use 32-bit push/pop instructions.  */
-@@ -20432,24 +20793,29 @@
-           if (! any_sibcall_could_use_r3 ()
- 	      && arm_size_return_regs () <= 12
- 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
--              && (TARGET_THUMB2
-+	      && (TARGET_THUMB2
- 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
- 	    {
- 	      reg = 3;
-+	      if (!TARGET_THUMB2)
-+		prefer_callee_reg_p = true;
- 	    }
--	  else
--	    for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
--	      {
--		/* Avoid fixed registers; they may be changed at
--		   arbitrary times so it's unsafe to restore them
--		   during the epilogue.  */
--		if (!fixed_regs[i]
--		    && (offsets->saved_regs_mask & (1 << i)) == 0)
--		  {
--		    reg = i;
--		    break;
--		  }
--	      }
-+	  if (reg == -1
-+	      || prefer_callee_reg_p)
-+	    {
-+	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
-+		{
-+		  /* Avoid fixed registers; they may be changed at
-+		     arbitrary times so it's unsafe to restore them
-+		     during the epilogue.  */
-+		  if (!fixed_regs[i]
-+		      && (offsets->saved_regs_mask & (1 << i)) == 0)
-+		    {
-+		      reg = i;
-+		      break;
-+		    }
-+		}
-+	    }
- 
- 	  if (reg != -1)
- 	    {
-@@ -21039,7 +21405,15 @@
- }
- 
- 
--/* If CODE is 'd', then the X is a condition operand and the instruction
-+/* Globally reserved letters: acln
-+   Puncutation letters currently used: @_|?().!#
-+   Lower case letters currently used: bcdefhimpqtvwxyz
-+   Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
-+   Letters previously used, but now deprecated/obsolete: sVWXYZ.
-+
-+   Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
-+
-+   If CODE is 'd', then the X is a condition operand and the instruction
-    should only be executed if the condition is true.
-    if CODE is 'D', then the X is a condition operand and the instruction
-    should only be executed if the condition is false: however, if the mode
-@@ -21179,6 +21553,19 @@
- 	}
-       return;
- 
-+    case 'b':
-+      /* Print the log2 of a CONST_INT.  */
-+      {
-+	HOST_WIDE_INT val;
-+
-+	if (!CONST_INT_P (x)
-+	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
-+	  output_operand_lossage ("Unsupported operand for code '%c'", code);
-+	else
-+	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
-+      }
-+      return;
-+
-     case 'L':
-       /* The low 16 bits of an immediate constant.  */
-       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
-@@ -21421,7 +21808,7 @@
-        register.  */
-     case 'p':
-       {
--        int mode = GET_MODE (x);
-+        enum machine_mode mode = GET_MODE (x);
-         int regno;
- 
-         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
-@@ -21445,7 +21832,7 @@
-     case 'P':
-     case 'q':
-       {
--	int mode = GET_MODE (x);
-+	enum machine_mode mode = GET_MODE (x);
- 	int is_quad = (code == 'q');
- 	int regno;
- 
-@@ -21481,7 +21868,7 @@
-     case 'e':
-     case 'f':
-       {
--        int mode = GET_MODE (x);
-+        enum machine_mode mode = GET_MODE (x);
-         int regno;
- 
-         if ((GET_MODE_SIZE (mode) != 16
-@@ -21563,6 +21950,7 @@
-       {
- 	rtx addr;
- 	bool postinc = FALSE;
-+	rtx postinc_reg = NULL;
- 	unsigned align, memsize, align_bits;
- 
- 	gcc_assert (MEM_P (x));
-@@ -21572,6 +21960,11 @@
- 	    postinc = 1;
- 	    addr = XEXP (addr, 0);
- 	  }
-+	if (GET_CODE (addr) == POST_MODIFY)
-+	  {
-+	    postinc_reg = XEXP( XEXP (addr, 1), 1);
-+	    addr = XEXP (addr, 0);
-+	  }
- 	asm_fprintf (stream, "[%r", REGNO (addr));
- 
- 	/* We know the alignment of this access, so we can emit a hint in the
-@@ -21597,6 +21990,8 @@
- 
- 	if (postinc)
- 	  fputs("!", stream);
-+	if (postinc_reg)
-+	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
-       }
-       return;
- 
-@@ -21614,7 +22009,7 @@
-     /* Translate an S register number into a D register number and element index.  */
-     case 'y':
-       {
--        int mode = GET_MODE (x);
-+        enum machine_mode mode = GET_MODE (x);
-         int regno;
- 
-         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
-@@ -21648,7 +22043,7 @@
-        number into a D register number and element index.  */
-     case 'z':
-       {
--        int mode = GET_MODE (x);
-+        enum machine_mode mode = GET_MODE (x);
-         int regno;
- 
-         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
-@@ -21688,15 +22083,12 @@
- 	  break;
- 
- 	case CONST_DOUBLE:
--          if (TARGET_NEON)
--            {
--              char fpstr[20];
--              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
--			       sizeof (fpstr), 0, 1);
--              fprintf (stream, "#%s", fpstr);
--            }
--          else
--	    fprintf (stream, "#%s", fp_immediate_constant (x));
-+	  {
-+            char fpstr[20];
-+            real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
-+			      sizeof (fpstr), 0, 1);
-+            fprintf (stream, "#%s", fpstr);
-+	  }
- 	  break;
- 
- 	default:
-@@ -22564,6 +22956,9 @@
- 	    || (TARGET_HARD_FLOAT && TARGET_VFP
- 		&& regno == VFPCC_REGNUM));
- 
-+  if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
-+    return false;
-+
-   if (TARGET_THUMB1)
-     /* For the Thumb we only allow values bigger than SImode in
-        registers 0 - 6, so that there is always a second low
-@@ -22609,13 +23004,20 @@
-     }
- 
-   /* We allow almost any value to be stored in the general registers.
--     Restrict doubleword quantities to even register pairs so that we can
--     use ldrd.  Do not allow very large Neon structure opaque modes in
--     general registers; they would use too many.  */
-+     Restrict doubleword quantities to even register pairs in ARM state
-+     so that we can use ldrd.  Do not allow very large Neon structure
-+     opaque modes in general registers; they would use too many.  */
-   if (regno <= LAST_ARM_REGNUM)
--    return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
--      && ARM_NUM_REGS (mode) <= 4;
-+    {
-+      if (ARM_NUM_REGS (mode) > 4)
-+	  return FALSE;
- 
-+      if (TARGET_THUMB2)
-+	return TRUE;
-+
-+      return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
-+    }
-+
-   if (regno == FRAME_POINTER_REGNUM
-       || regno == ARG_POINTER_REGNUM)
-     /* We only allow integers in the fake hard registers.  */
-@@ -22653,6 +23055,9 @@
- enum reg_class
- arm_regno_class (int regno)
- {
-+  if (regno == PC_REGNUM)
-+    return NO_REGS;
-+
-   if (TARGET_THUMB1)
-     {
-       if (regno == STACK_POINTER_REGNUM)
-@@ -22826,10 +23231,12 @@
-   NEON_BINOP,
-   NEON_TERNOP,
-   NEON_UNOP,
-+  NEON_BSWAP,
-   NEON_GETLANE,
-   NEON_SETLANE,
-   NEON_CREATE,
-   NEON_RINT,
-+  NEON_COPYSIGNF,
-   NEON_DUP,
-   NEON_DUPLANE,
-   NEON_COMBINE,
-@@ -22847,7 +23254,6 @@
-   NEON_FLOAT_NARROW,
-   NEON_FIXCONV,
-   NEON_SELECT,
--  NEON_RESULTPAIR,
-   NEON_REINTERP,
-   NEON_VTBL,
-   NEON_VTBX,
-@@ -23216,6 +23622,9 @@
-   ARM_BUILTIN_CRC32CH,
-   ARM_BUILTIN_CRC32CW,
- 
-+  ARM_BUILTIN_GET_FPSCR,
-+  ARM_BUILTIN_SET_FPSCR,
-+
- #undef CRYPTO1
- #undef CRYPTO2
- #undef CRYPTO3
-@@ -23293,14 +23702,19 @@
- 
-   tree V8QI_type_node;
-   tree V4HI_type_node;
-+  tree V4UHI_type_node;
-   tree V4HF_type_node;
-   tree V2SI_type_node;
-+  tree V2USI_type_node;
-   tree V2SF_type_node;
-   tree V16QI_type_node;
-   tree V8HI_type_node;
-+  tree V8UHI_type_node;
-   tree V4SI_type_node;
-+  tree V4USI_type_node;
-   tree V4SF_type_node;
-   tree V2DI_type_node;
-+  tree V2UDI_type_node;
- 
-   tree intUQI_type_node;
-   tree intUHI_type_node;
-@@ -23312,27 +23726,6 @@
-   tree intCI_type_node;
-   tree intXI_type_node;
- 
--  tree V8QI_pointer_node;
--  tree V4HI_pointer_node;
--  tree V2SI_pointer_node;
--  tree V2SF_pointer_node;
--  tree V16QI_pointer_node;
--  tree V8HI_pointer_node;
--  tree V4SI_pointer_node;
--  tree V4SF_pointer_node;
--  tree V2DI_pointer_node;
--
--  tree void_ftype_pv8qi_v8qi_v8qi;
--  tree void_ftype_pv4hi_v4hi_v4hi;
--  tree void_ftype_pv2si_v2si_v2si;
--  tree void_ftype_pv2sf_v2sf_v2sf;
--  tree void_ftype_pdi_di_di;
--  tree void_ftype_pv16qi_v16qi_v16qi;
--  tree void_ftype_pv8hi_v8hi_v8hi;
--  tree void_ftype_pv4si_v4si_v4si;
--  tree void_ftype_pv4sf_v4sf_v4sf;
--  tree void_ftype_pv2di_v2di_v2di;
--
-   tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
-   tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
-   tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
-@@ -23396,6 +23789,12 @@
-   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
-   const_float_pointer_node = build_pointer_type (const_float_node);
- 
-+  /* Unsigned integer types for various mode sizes.  */
-+  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-+  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-+  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-+  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-+  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
-   /* Now create vector types based on our NEON element types.  */
-   /* 64-bit vectors.  */
-   V8QI_type_node =
-@@ -23402,10 +23801,14 @@
-     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
-   V4HI_type_node =
-     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
-+  V4UHI_type_node =
-+    build_vector_type_for_mode (intUHI_type_node, V4HImode);
-   V4HF_type_node =
-     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
-   V2SI_type_node =
-     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
-+  V2USI_type_node =
-+    build_vector_type_for_mode (intUSI_type_node, V2SImode);
-   V2SF_type_node =
-     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
-   /* 128-bit vectors.  */
-@@ -23413,21 +23816,20 @@
-     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
-   V8HI_type_node =
-     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
-+  V8UHI_type_node =
-+    build_vector_type_for_mode (intUHI_type_node, V8HImode);
-   V4SI_type_node =
-     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
-+  V4USI_type_node =
-+    build_vector_type_for_mode (intUSI_type_node, V4SImode);
-   V4SF_type_node =
-     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
-   V2DI_type_node =
-     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
-+  V2UDI_type_node =
-+    build_vector_type_for_mode (intUDI_type_node, V2DImode);
- 
--  /* Unsigned integer types for various mode sizes.  */
--  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
--  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
--  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
--  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
--  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
- 
--
-   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
- 					     "__builtin_neon_uqi");
-   (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
-@@ -23458,53 +23860,8 @@
-   (*lang_hooks.types.register_builtin_type) (intXI_type_node,
- 					     "__builtin_neon_xi");
- 
--  /* Pointers to vector types.  */
--  V8QI_pointer_node = build_pointer_type (V8QI_type_node);
--  V4HI_pointer_node = build_pointer_type (V4HI_type_node);
--  V2SI_pointer_node = build_pointer_type (V2SI_type_node);
--  V2SF_pointer_node = build_pointer_type (V2SF_type_node);
--  V16QI_pointer_node = build_pointer_type (V16QI_type_node);
--  V8HI_pointer_node = build_pointer_type (V8HI_type_node);
--  V4SI_pointer_node = build_pointer_type (V4SI_type_node);
--  V4SF_pointer_node = build_pointer_type (V4SF_type_node);
--  V2DI_pointer_node = build_pointer_type (V2DI_type_node);
--
--  /* Operations which return results as pairs.  */
--  void_ftype_pv8qi_v8qi_v8qi =
--    build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
--  			      V8QI_type_node, NULL);
--  void_ftype_pv4hi_v4hi_v4hi =
--    build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
--  			      V4HI_type_node, NULL);
--  void_ftype_pv2si_v2si_v2si =
--    build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
--  			      V2SI_type_node, NULL);
--  void_ftype_pv2sf_v2sf_v2sf =
--    build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
--  			      V2SF_type_node, NULL);
--  void_ftype_pdi_di_di =
--    build_function_type_list (void_type_node, intDI_pointer_node,
--			      neon_intDI_type_node, neon_intDI_type_node, NULL);
--  void_ftype_pv16qi_v16qi_v16qi =
--    build_function_type_list (void_type_node, V16QI_pointer_node,
--			      V16QI_type_node, V16QI_type_node, NULL);
--  void_ftype_pv8hi_v8hi_v8hi =
--    build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
--  			      V8HI_type_node, NULL);
--  void_ftype_pv4si_v4si_v4si =
--    build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
--  			      V4SI_type_node, NULL);
--  void_ftype_pv4sf_v4sf_v4sf =
--    build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
--  			      V4SF_type_node, NULL);
--  void_ftype_pv2di_v2di_v2di =
--    build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
--			      V2DI_type_node, NULL);
--
-   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
-   {
--    tree V4USI_type_node =
--      build_vector_type_for_mode (intUSI_type_node, V4SImode);
- 
-     tree V16UQI_type_node =
-       build_vector_type_for_mode (intUQI_type_node, V16QImode);
-@@ -23790,25 +24147,6 @@
- 	  }
- 	  break;
- 
--	case NEON_RESULTPAIR:
--	  {
--	    switch (insn_data[d->code].operand[1].mode)
--	      {
--	      case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
--	      case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
--	      case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
--	      case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
--	      case DImode: ftype = void_ftype_pdi_di_di; break;
--	      case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
--	      case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
--	      case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
--	      case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
--	      case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
--	      default: gcc_unreachable ();
--	      }
--	  }
--	  break;
--
- 	case NEON_REINTERP:
- 	  {
- 	    /* We iterate over NUM_DREG_TYPES doubleword types,
-@@ -23868,6 +24206,47 @@
- 	    ftype = build_function_type_list (return_type, eltype, NULL);
- 	    break;
- 	  }
-+	case NEON_BSWAP:
-+	{
-+	    tree eltype = NULL_TREE;
-+	    switch (insn_data[d->code].operand[1].mode)
-+	    {
-+	      case V4HImode:
-+	        eltype = V4UHI_type_node;
-+	        break;
-+	      case V8HImode:
-+	        eltype = V8UHI_type_node;
-+	        break;
-+	      case V2SImode:
-+	        eltype = V2USI_type_node;
-+	        break;
-+	      case V4SImode:
-+	        eltype = V4USI_type_node;
-+	        break;
-+	      case V2DImode:
-+	        eltype = V2UDI_type_node;
-+	        break;
-+	      default: gcc_unreachable ();
-+	    }
-+	    ftype = build_function_type_list (eltype, eltype, NULL);
-+	    break;
-+	}
-+	case NEON_COPYSIGNF:
-+	  {
-+	    tree eltype = NULL_TREE;
-+	    switch (insn_data[d->code].operand[1].mode)
-+	      {
-+	      case V2SFmode:
-+		eltype = V2SF_type_node;
-+		break;
-+	      case V4SFmode:
-+		eltype = V4SF_type_node;
-+		break;
-+	      default: gcc_unreachable ();
-+	      }
-+	    ftype = build_function_type_list (eltype, eltype, NULL);
-+	    break;
-+	  }
- 	default:
- 	  gcc_unreachable ();
- 	}
-@@ -24014,6 +24393,15 @@
-   IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
-   IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
- 
-+
-+#define FP_BUILTIN(L, U) \
-+  {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
-+   UNKNOWN, 0},
-+
-+  FP_BUILTIN (get_fpscr, GET_FPSCR)
-+  FP_BUILTIN (set_fpscr, SET_FPSCR)
-+#undef FP_BUILTIN
-+
- #define CRC32_BUILTIN(L, U) \
-   {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
-    UNKNOWN, 0},
-@@ -24528,6 +24916,21 @@
- 
-   if (TARGET_CRC32)
-     arm_init_crc32_builtins ();
-+
-+  if (TARGET_VFP && TARGET_HARD_FLOAT)
-+    {
-+      tree ftype_set_fpscr
-+	= build_function_type_list (void_type_node, unsigned_type_node, NULL);
-+      tree ftype_get_fpscr
-+	= build_function_type_list (unsigned_type_node, NULL);
-+
-+      arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
-+	= add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
-+				ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
-+      arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
-+	= add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
-+				ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
-+    }
- }
- 
- /* Return the ARM builtin for CODE.  */
-@@ -25042,20 +25445,17 @@
-     case NEON_SPLIT:
-     case NEON_FLOAT_WIDEN:
-     case NEON_FLOAT_NARROW:
-+    case NEON_BSWAP:
-     case NEON_REINTERP:
-       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
-         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
- 
-+    case NEON_COPYSIGNF:
-     case NEON_COMBINE:
-     case NEON_VTBL:
-       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
-         NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
- 
--    case NEON_RESULTPAIR:
--      return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
--        NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
--        NEON_ARG_STOP);
--
-     case NEON_LANEMUL:
-     case NEON_LANEMULL:
-     case NEON_LANEMULH:
-@@ -25117,24 +25517,6 @@
-   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
- }
- 
--/* Emit code to place a Neon pair result in memory locations (with equal
--   registers).  */
--void
--neon_emit_pair_result_insn (enum machine_mode mode,
--			    rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
--                            rtx op1, rtx op2)
--{
--  rtx mem = gen_rtx_MEM (mode, destaddr);
--  rtx tmp1 = gen_reg_rtx (mode);
--  rtx tmp2 = gen_reg_rtx (mode);
--
--  emit_insn (intfn (tmp1, op1, op2, tmp2));
--
--  emit_move_insn (mem, tmp1);
--  mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
--  emit_move_insn (mem, tmp2);
--}
--
- /* Set up OPERANDS for a register copy from SRC to DEST, taking care
-    not to early-clobber SRC registers in the process.
- 
-@@ -25255,6 +25637,25 @@
- 
-   switch (fcode)
-     {
-+    case ARM_BUILTIN_GET_FPSCR:
-+    case ARM_BUILTIN_SET_FPSCR:
-+      if (fcode == ARM_BUILTIN_GET_FPSCR)
-+	{
-+	  icode = CODE_FOR_get_fpscr;
-+	  target = gen_reg_rtx (SImode);
-+	  pat = GEN_FCN (icode) (target);
-+	}
-+      else
-+	{
-+	  target = NULL_RTX;
-+	  icode = CODE_FOR_set_fpscr;
-+	  arg0 = CALL_EXPR_ARG (exp, 0);
-+	  op0 = expand_normal (arg0);
-+	  pat = GEN_FCN (icode) (op0);
-+	}
-+      emit_insn (pat);
-+      return target;
-+
-     case ARM_BUILTIN_TEXTRMSB:
-     case ARM_BUILTIN_TEXTRMUB:
-     case ARM_BUILTIN_TEXTRMSH:
-@@ -25888,7 +26289,7 @@
-   int pops_needed;
-   unsigned available;
-   unsigned required;
--  int mode;
-+  enum machine_mode mode;
-   int size;
-   int restore_a4 = FALSE;
- 
-@@ -29555,10 +29956,10 @@
- {
-   enum machine_mode in_mode, out_mode;
-   int in_n, out_n;
-+  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
- 
-   if (TREE_CODE (type_out) != VECTOR_TYPE
--      || TREE_CODE (type_in) != VECTOR_TYPE
--      || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
-+      || TREE_CODE (type_in) != VECTOR_TYPE)
-     return NULL_TREE;
- 
-   out_mode = TYPE_MODE (TREE_TYPE (type_out));
-@@ -29570,7 +29971,13 @@
-    decl of the vectorized builtin for the appropriate vector mode.
-    NULL_TREE is returned if no such builtin is available.  */
- #undef ARM_CHECK_BUILTIN_MODE
--#define ARM_CHECK_BUILTIN_MODE(C) \
-+#define ARM_CHECK_BUILTIN_MODE(C)    \
-+  (TARGET_NEON && TARGET_FPU_ARMV8   \
-+   && flag_unsafe_math_optimizations \
-+   && ARM_CHECK_BUILTIN_MODE_1 (C))
-+
-+#undef ARM_CHECK_BUILTIN_MODE_1
-+#define ARM_CHECK_BUILTIN_MODE_1(C) \
-   (out_mode == SFmode && out_n == C \
-    && in_mode == SFmode && in_n == C)
- 
-@@ -29595,6 +30002,67 @@
-             return ARM_FIND_VRINT_VARIANT (vrintz);
-           case BUILT_IN_ROUNDF:
-             return ARM_FIND_VRINT_VARIANT (vrinta);
-+#undef ARM_CHECK_BUILTIN_MODE_1
-+#define ARM_CHECK_BUILTIN_MODE_1(C) \
-+  (out_mode == SImode && out_n == C \
-+   && in_mode == SFmode && in_n == C)
-+
-+#define ARM_FIND_VCVT_VARIANT(N) \
-+  (ARM_CHECK_BUILTIN_MODE (2) \
-+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
-+   : (ARM_CHECK_BUILTIN_MODE (4) \
-+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
-+     : NULL_TREE))
-+
-+#define ARM_FIND_VCVTU_VARIANT(N) \
-+  (ARM_CHECK_BUILTIN_MODE (2) \
-+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
-+   : (ARM_CHECK_BUILTIN_MODE (4) \
-+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
-+     : NULL_TREE))
-+          case BUILT_IN_LROUNDF:
-+            return out_unsigned_p
-+                     ? ARM_FIND_VCVTU_VARIANT (vcvta)
-+                     : ARM_FIND_VCVT_VARIANT (vcvta);
-+          case BUILT_IN_LCEILF:
-+            return out_unsigned_p
-+                     ? ARM_FIND_VCVTU_VARIANT (vcvtp)
-+                     : ARM_FIND_VCVT_VARIANT (vcvtp);
-+          case BUILT_IN_LFLOORF:
-+            return out_unsigned_p
-+                     ? ARM_FIND_VCVTU_VARIANT (vcvtm)
-+                     : ARM_FIND_VCVT_VARIANT (vcvtm);
-+#undef ARM_CHECK_BUILTIN_MODE
-+#define ARM_CHECK_BUILTIN_MODE(C, N) \
-+  (out_mode == N##mode && out_n == C \
-+   && in_mode == N##mode && in_n == C)
-+          case BUILT_IN_BSWAP16:
-+            if (ARM_CHECK_BUILTIN_MODE (4, HI))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
-+            else if (ARM_CHECK_BUILTIN_MODE (8, HI))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
-+            else
-+              return NULL_TREE;
-+          case BUILT_IN_BSWAP32:
-+            if (ARM_CHECK_BUILTIN_MODE (2, SI))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
-+            else if (ARM_CHECK_BUILTIN_MODE (4, SI))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
-+            else
-+              return NULL_TREE;
-+          case BUILT_IN_BSWAP64:
-+            if (ARM_CHECK_BUILTIN_MODE (2, DI))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
-+            else
-+              return NULL_TREE;
-+	  case BUILT_IN_COPYSIGNF:
-+	    if (ARM_CHECK_BUILTIN_MODE (2, SF))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
-+	    else if (ARM_CHECK_BUILTIN_MODE (4, SF))
-+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
-+	    else
-+	      return NULL_TREE;
-+
-           default:
-             return NULL_TREE;
-         }
-@@ -29601,9 +30069,12 @@
-     }
-   return NULL_TREE;
- }
-+#undef ARM_FIND_VCVT_VARIANT
-+#undef ARM_FIND_VCVTU_VARIANT
- #undef ARM_CHECK_BUILTIN_MODE
- #undef ARM_FIND_VRINT_VARIANT
- 
-+
- /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
- static HOST_WIDE_INT
- arm_vector_alignment (const_tree type)
-@@ -31174,6 +31645,75 @@
-   return false;
- }
- 
-+static void
-+arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
-+{
-+  const unsigned ARM_FE_INVALID = 1;
-+  const unsigned ARM_FE_DIVBYZERO = 2;
-+  const unsigned ARM_FE_OVERFLOW = 4;
-+  const unsigned ARM_FE_UNDERFLOW = 8;
-+  const unsigned ARM_FE_INEXACT = 16;
-+  const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
-+						    | ARM_FE_DIVBYZERO
-+						    | ARM_FE_OVERFLOW
-+						    | ARM_FE_UNDERFLOW
-+						    | ARM_FE_INEXACT);
-+  const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
-+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
-+  tree new_fenv_var, reload_fenv, restore_fnenv;
-+  tree update_call, atomic_feraiseexcept, hold_fnclex;
-+
-+  if (!TARGET_VFP || !TARGET_HARD_FLOAT)
-+    return;
-+
-+  /* Generate the equivalent of :
-+       unsigned int fenv_var;
-+       fenv_var = __builtin_arm_get_fpscr ();
-+
-+       unsigned int masked_fenv;
-+       masked_fenv = fenv_var & mask;
-+
-+       __builtin_arm_set_fpscr (masked_fenv);  */
-+
-+  fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+  get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
-+  set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
-+  mask = build_int_cst (unsigned_type_node,
-+			~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
-+			  | ARM_FE_ALL_EXCEPT));
-+  ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
-+		    fenv_var, build_call_expr (get_fpscr, 0));
-+  masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
-+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
-+  *hold = build2 (COMPOUND_EXPR, void_type_node,
-+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
-+		  hold_fnclex);
-+
-+  /* Store the value of masked_fenv to clear the exceptions:
-+     __builtin_arm_set_fpscr (masked_fenv);  */
-+
-+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
-+
-+  /* Generate the equivalent of :
-+       unsigned int new_fenv_var;
-+       new_fenv_var = __builtin_arm_get_fpscr ();
-+
-+       __builtin_arm_set_fpscr (fenv_var);
-+
-+       __atomic_feraiseexcept (new_fenv_var);  */
-+
-+  new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
-+  reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
-+			build_call_expr (get_fpscr, 0));
-+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
-+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
-+  update_call = build_call_expr (atomic_feraiseexcept, 1,
-+				 fold_convert (integer_type_node, new_fenv_var));
-+  *update = build2 (COMPOUND_EXPR, void_type_node,
-+		    build2 (COMPOUND_EXPR, void_type_node,
-+			    reload_fenv, restore_fnenv), update_call);
-+}
-+
- /* return TRUE if x is a reference to a value in a constant pool */
- extern bool
- arm_is_constant_pool_ref (rtx x)
---- a/src/gcc/config/arm/arm.h
-+++ b/src/gcc/config/arm/arm.h
-@@ -166,7 +166,10 @@
- 	    builtin_define ("__ARM_EABI__");		\
- 	  }						\
- 	if (TARGET_IDIV)				\
--	  builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
-+         {						\
-+            builtin_define ("__ARM_ARCH_EXT_IDIV__");	\
-+            builtin_define ("__ARM_FEATURE_IDIV");	\
-+         }						\
-     } while (0)
- 
- #include "config/arm/arm-opts.h"
-@@ -298,6 +301,9 @@
- /* FPU supports VFPv3 instructions.  */
- #define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3)
- 
-+/* FPU supports FPv5 instructions.  */
-+#define TARGET_VFP5 (TARGET_VFP && arm_fpu_desc->rev >= 5)
-+
- /* FPU only supports VFP single-precision instructions.  */
- #define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE)
- 
-@@ -442,9 +448,6 @@
- #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT
- #endif
- 
--#define LARGEST_EXPONENT_IS_NORMAL(bits) \
--    ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
--
- #ifndef ARM_DEFAULT_ABI
- #define ARM_DEFAULT_ABI ARM_ABI_APCS
- #endif
---- a/src/gcc/config/arm/unspecs.md
-+++ b/src/gcc/config/arm/unspecs.md
-@@ -143,6 +143,8 @@
-   VUNSPEC_SLX		; Represent a store-register-release-exclusive.
-   VUNSPEC_LDA		; Represent a store-register-acquire.
-   VUNSPEC_STL		; Represent a store-register-release.
-+  VUNSPEC_GET_FPSCR	; Represent fetch of FPSCR content.
-+  VUNSPEC_SET_FPSCR	; Represent assign of FPSCR content.
- ])
- 
- ;; Enumerators for NEON unspecs.
---- a/src/gcc/config/arm/cortex-m4.md
-+++ b/src/gcc/config/arm/cortex-m4.md
-@@ -34,7 +34,7 @@
-        (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                              alu_reg,alus_reg,logic_reg,logics_reg,\
-                              adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                             adr,bfm,rev,\
-+                             adr,bfm,clz,rbit,rev,\
-                              shift_imm,shift_reg,extend,\
-                              alu_shift_imm,alus_shift_imm,\
-                              logic_shift_imm,logics_shift_imm,\
---- a/src/gcc/config/arm/arm-modes.def
-+++ b/src/gcc/config/arm/arm-modes.def
-@@ -21,9 +21,6 @@
-    along with GCC; see the file COPYING3.  If not see
-    <http://www.gnu.org/licenses/>.  */
- 
--/* Extended precision floating point.
--   FIXME What format is this?  */
--FLOAT_MODE (XF, 12, 0);
- 
- /* Half-precision floating point */
- FLOAT_MODE (HF, 2, 0);
---- a/src/gcc/config/arm/arm-cores.def
-+++ b/src/gcc/config/arm/arm-cores.def
-@@ -141,7 +141,7 @@
- ARM_CORE("generic-armv7-a",	genericv7a, genericv7a,		7A,  FL_LDSCHED, cortex)
- ARM_CORE("cortex-a5",		cortexa5, cortexa5,		7A,  FL_LDSCHED, cortex_a5)
- ARM_CORE("cortex-a7",		cortexa7, cortexa7,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7)
--ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex)
-+ARM_CORE("cortex-a8",		cortexa8, cortexa8,		7A,  FL_LDSCHED, cortex_a8)
- ARM_CORE("cortex-a9",		cortexa9, cortexa9,		7A,  FL_LDSCHED, cortex_a9)
- ARM_CORE("cortex-a12",	  	cortexa12, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12)
- ARM_CORE("cortex-a15",		cortexa15, cortexa15,		7A,  FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15)
-@@ -149,6 +149,7 @@
- ARM_CORE("cortex-r4f",		cortexr4f, cortexr4f,		7R,  FL_LDSCHED, cortex)
- ARM_CORE("cortex-r5",		cortexr5, cortexr5,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
- ARM_CORE("cortex-r7",		cortexr7, cortexr7,		7R,  FL_LDSCHED | FL_ARM_DIV, cortex)
-+ARM_CORE("cortex-m7",		cortexm7, cortexm7,		7EM, FL_LDSCHED, v7m)
- ARM_CORE("cortex-m4",		cortexm4, cortexm4,		7EM, FL_LDSCHED, v7m)
- ARM_CORE("cortex-m3",		cortexm3, cortexm3,		7M,  FL_LDSCHED, v7m)
- ARM_CORE("marvell-pj4",		marvell_pj4, marvell_pj4,	7A,  FL_LDSCHED, 9e)
---- a/src/gcc/config/arm/cortex-r4.md
-+++ b/src/gcc/config/arm/cortex-r4.md
-@@ -81,7 +81,7 @@
-        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                         alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        adr,bfm,rev,\
-+                        adr,bfm,clz,rbit,rev,\
-                         shift_imm,shift_reg,mvn_imm,mvn_reg"))
-   "cortex_r4_alu")
- 
---- a/src/gcc/config/arm/arm-tune.md
-+++ b/src/gcc/config/arm/arm-tune.md
-@@ -28,7 +28,8 @@
- 	genericv7a,cortexa5,cortexa7,
- 	cortexa8,cortexa9,cortexa12,
- 	cortexa15,cortexr4,cortexr4f,
--	cortexr5,cortexr7,cortexm4,
--	cortexm3,marvell_pj4,cortexa15cortexa7,
--	cortexa53,cortexa57,cortexa57cortexa53"
-+	cortexr5,cortexr7,cortexm7,
-+	cortexm4,cortexm3,marvell_pj4,
-+	cortexa15cortexa7,cortexa53,cortexa57,
-+	cortexa57cortexa53"
- 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
---- a/src/gcc/config/arm/arm-protos.h
-+++ b/src/gcc/config/arm/arm-protos.h
-@@ -126,7 +126,6 @@
- extern int arm_const_double_inline_cost (rtx);
- extern bool arm_const_double_by_parts (rtx);
- extern bool arm_const_double_by_immediates (rtx);
--extern const char *fp_immediate_constant (rtx);
- extern void arm_emit_call_insn (rtx, rtx);
- extern const char *output_call (rtx *);
- extern const char *output_call_mem (rtx *);
-@@ -150,7 +149,7 @@
- extern int    arm_emit_vector_const (FILE *, rtx);
- extern void arm_emit_fp16_const (rtx c);
- extern const char * arm_output_load_gr (rtx *);
--extern const char *vfp_output_fstmd (rtx *);
-+extern const char *vfp_output_vstmd (rtx *);
- extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
- extern void arm_set_return_address (rtx, rtx);
- extern int arm_eliminable_register (rtx);
-@@ -273,6 +272,11 @@
-   const struct cpu_vec_costs* vec_costs;
-   /* Prefer Neon for 64-bit bitops.  */
-   bool prefer_neon_for_64bits;
-+  /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding.  */
-+  bool disparage_flag_setting_t16_encodings;
-+  /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags
-+     would be set.  */
-+  bool disparage_partial_flag_setting_t16_encodings;
- };
- 
- extern const struct tune_params *current_tune;
---- a/src/gcc/config/arm/vfp.md
-+++ b/src/gcc/config/arm/vfp.md
-@@ -41,11 +41,11 @@
-     case 5:
-       return \"str%?\\t%1, %0\";
-     case 6:
--      return \"fmsr%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?\\t%0, %1\\t%@ int\";
-     case 7:
--      return \"fmrs%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?\\t%0, %1\\t%@ int\";
-     case 8:
--      return \"fcpys%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
-     case 9: case 10:
-       return output_move_vfp (operands);
-     default:
-@@ -87,11 +87,11 @@
-     case 8:
-       return \"str%?\\t%1, %0\";
-     case 9:
--      return \"fmsr%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?\\t%0, %1\\t%@ int\";
-     case 10:
--      return \"fmrs%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?\\t%0, %1\\t%@ int\";
-     case 11:
--      return \"fcpys%?\\t%0, %1\\t%@ int\";
-+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
-     case 12: case 13:
-       return output_move_vfp (operands);
-     default:
-@@ -100,7 +100,7 @@
-   "
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
--   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
-+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores")
-    (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
-    (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
-    (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
-@@ -130,14 +130,14 @@
-     case 6:
-       return output_move_double (operands, true, NULL);
-     case 7:
--      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
-     case 8:
--      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
-     case 9:
-       if (TARGET_VFP_SINGLE)
--	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
-+	return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\";
-       else
--	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-+	return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
-     case 10: case 11:
-       return output_move_vfp (operands);
-     default:
-@@ -181,11 +181,11 @@
-     case 6:
-       return output_move_double (operands, true, NULL);
-     case 7:
--      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
-     case 8:
--      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
-     case 9:
--      return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-+      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
-     case 10: case 11:
-       return output_move_vfp (operands);
-     default:
-@@ -229,13 +229,13 @@
-     case 3:     /* memory from ARM register */
-       return \"strh\\t%1, %0\\t%@ __fp16\";
-     case 4:	/* S register from S register */
--      return \"fcpys\\t%0, %1\";
-+      return \"vmov.f32\\t%0, %1\";
-     case 5:	/* ARM register from ARM register */
-       return \"mov\\t%0, %1\\t%@ __fp16\";
-     case 6:	/* S register from ARM register */
--      return \"fmsr\\t%0, %1\";
-+      return \"vmov\\t%0, %1\";
-     case 7:	/* ARM register from S register */
--      return \"fmrs\\t%0, %1\";
-+      return \"vmov\\t%0, %1\";
-     case 8:	/* ARM register from constant */
-       {
-         REAL_VALUE_TYPE r;
-@@ -280,13 +280,13 @@
-     case 1:     /* memory from ARM register */
-       return \"strh\\t%1, %0\\t%@ __fp16\";
-     case 2:	/* S register from S register */
--      return \"fcpys\\t%0, %1\";
-+      return \"vmov.f32\\t%0, %1\";
-     case 3:	/* ARM register from ARM register */
-       return \"mov\\t%0, %1\\t%@ __fp16\";
-     case 4:	/* S register from ARM register */
--      return \"fmsr\\t%0, %1\";
-+      return \"vmov\\t%0, %1\";
-     case 5:	/* ARM register from S register */
--      return \"fmrs\\t%0, %1\";
-+      return \"vmov\\t%0, %1\";
-     case 6:	/* ARM register from constant */
-       {
-         REAL_VALUE_TYPE r;
-@@ -322,7 +322,7 @@
- 
- (define_insn "*movsf_vfp"
-   [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
--	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
-+        (match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
-   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
-    && (   s_register_operand (operands[0], SFmode)
-        || s_register_operand (operands[1], SFmode))"
-@@ -330,11 +330,11 @@
-   switch (which_alternative)
-     {
-     case 0:
--      return \"fmsr%?\\t%0, %1\";
-+      return \"vmov%?\\t%0, %1\";
-     case 1:
--      return \"fmrs%?\\t%0, %1\";
-+      return \"vmov%?\\t%0, %1\";
-     case 2:
--      return \"fconsts%?\\t%0, #%G1\";
-+      return \"vmov%?.f32\\t%0, %1\";
-     case 3: case 4:
-       return output_move_vfp (operands);
-     case 5:
-@@ -342,7 +342,7 @@
-     case 6:
-       return \"str%?\\t%1, %0\\t%@ float\";
-     case 7:
--      return \"fcpys%?\\t%0, %1\";
-+      return \"vmov%?.f32\\t%0, %1\";
-     case 8:
-       return \"mov%?\\t%0, %1\\t%@ float\";
-     default:
-@@ -366,11 +366,11 @@
-   switch (which_alternative)
-     {
-     case 0:
--      return \"fmsr%?\\t%0, %1\";
-+      return \"vmov%?\\t%0, %1\";
-     case 1:
--      return \"fmrs%?\\t%0, %1\";
-+      return \"vmov%?\\t%0, %1\";
-     case 2:
--      return \"fconsts%?\\t%0, #%G1\";
-+      return \"vmov%?.f32\\t%0, %1\";
-     case 3: case 4:
-       return output_move_vfp (operands);
-     case 5:
-@@ -378,7 +378,7 @@
-     case 6:
-       return \"str%?\\t%1, %0\\t%@ float\";
-     case 7:
--      return \"fcpys%?\\t%0, %1\";
-+      return \"vmov%?.f32\\t%0, %1\";
-     case 8:
-       return \"mov%?\\t%0, %1\\t%@ float\";
-     default:
-@@ -406,12 +406,12 @@
-     switch (which_alternative)
-       {
-       case 0:
--	return \"fmdrr%?\\t%P0, %Q1, %R1\";
-+	return \"vmov%?\\t%P0, %Q1, %R1\";
-       case 1:
--	return \"fmrrd%?\\t%Q0, %R0, %P1\";
-+	return \"vmov%?\\t%Q0, %R0, %P1\";
-       case 2:
- 	gcc_assert (TARGET_VFP_DOUBLE);
--        return \"fconstd%?\\t%P0, #%G1\";
-+        return \"vmov%?.f64\\t%P0, %1\";
-       case 3: case 4:
- 	return output_move_vfp (operands);
-       case 5: case 6:
-@@ -418,9 +418,9 @@
- 	return output_move_double (operands, true, NULL);
-       case 7:
- 	if (TARGET_VFP_SINGLE)
--	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
-+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
- 	else
--	  return \"fcpyd%?\\t%P0, %P1\";
-+	  return \"vmov%?.f64\\t%P0, %P1\";
-       case 8:
-         return \"#\";
-       default:
-@@ -453,12 +453,12 @@
-     switch (which_alternative)
-       {
-       case 0:
--	return \"fmdrr%?\\t%P0, %Q1, %R1\";
-+	return \"vmov%?\\t%P0, %Q1, %R1\";
-       case 1:
--	return \"fmrrd%?\\t%Q0, %R0, %P1\";
-+	return \"vmov%?\\t%Q0, %R0, %P1\";
-       case 2:
- 	gcc_assert (TARGET_VFP_DOUBLE);
--	return \"fconstd%?\\t%P0, #%G1\";
-+	return \"vmov%?.f64\\t%P0, %1\";
-       case 3: case 4:
- 	return output_move_vfp (operands);
-       case 5: case 6: case 8:
-@@ -465,9 +465,9 @@
- 	return output_move_double (operands, true, NULL);
-       case 7:
- 	if (TARGET_VFP_SINGLE)
--	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
-+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
- 	else
--	  return \"fcpyd%?\\t%P0, %P1\";
-+	  return \"vmov%?.f64\\t%P0, %P1\";
-       default:
- 	abort ();
-       }
-@@ -498,15 +498,15 @@
- 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
-   "@
--   fcpys%D3\\t%0, %2
--   fcpys%d3\\t%0, %1
--   fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
--   fmsr%D3\\t%0, %2
--   fmsr%d3\\t%0, %1
--   fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
--   fmrs%D3\\t%0, %2
--   fmrs%d3\\t%0, %1
--   fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
-+   vmov%D3.f32\\t%0, %2
-+   vmov%d3.f32\\t%0, %1
-+   vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
-+   vmov%D3\\t%0, %2
-+   vmov%d3\\t%0, %1
-+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
-+   vmov%D3\\t%0, %2
-+   vmov%d3\\t%0, %1
-+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
-    [(set_attr "conds" "use")
-     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-     (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
-@@ -521,15 +521,15 @@
- 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
-   "@
--   it\\t%D3\;fcpys%D3\\t%0, %2
--   it\\t%d3\;fcpys%d3\\t%0, %1
--   ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
--   it\\t%D3\;fmsr%D3\\t%0, %2
--   it\\t%d3\;fmsr%d3\\t%0, %1
--   ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
--   it\\t%D3\;fmrs%D3\\t%0, %2
--   it\\t%d3\;fmrs%d3\\t%0, %1
--   ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
-+   it\\t%D3\;vmov%D3.f32\\t%0, %2
-+   it\\t%d3\;vmov%d3.f32\\t%0, %1
-+   ite\\t%D3\;vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
-+   it\\t%D3\;vmov%D3\\t%0, %2
-+   it\\t%d3\;vmov%d3\\t%0, %1
-+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
-+   it\\t%D3\;vmov%D3\\t%0, %2
-+   it\\t%d3\;vmov%d3\\t%0, %1
-+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
-    [(set_attr "conds" "use")
-     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-     (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
-@@ -544,15 +544,15 @@
- 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-   "@
--   fcpyd%D3\\t%P0, %P2
--   fcpyd%d3\\t%P0, %P1
--   fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
--   fmdrr%D3\\t%P0, %Q2, %R2
--   fmdrr%d3\\t%P0, %Q1, %R1
--   fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
--   fmrrd%D3\\t%Q0, %R0, %P2
--   fmrrd%d3\\t%Q0, %R0, %P1
--   fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
-+   vmov%D3.f64\\t%P0, %P2
-+   vmov%d3.f64\\t%P0, %P1
-+   vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
-+   vmov%D3\\t%P0, %Q2, %R2
-+   vmov%d3\\t%P0, %Q1, %R1
-+   vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
-+   vmov%D3\\t%Q0, %R0, %P2
-+   vmov%d3\\t%Q0, %R0, %P1
-+   vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
-    [(set_attr "conds" "use")
-     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-     (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
-@@ -567,15 +567,15 @@
- 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
-   "@
--   it\\t%D3\;fcpyd%D3\\t%P0, %P2
--   it\\t%d3\;fcpyd%d3\\t%P0, %P1
--   ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
--   it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
--   it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
--   ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
--   it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
--   it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
--   ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
-+   it\\t%D3\;vmov%D3.f64\\t%P0, %P2
-+   it\\t%d3\;vmov%d3.f64\\t%P0, %P1
-+   ite\\t%D3\;vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
-+   it\t%D3\;vmov%D3\\t%P0, %Q2, %R2
-+   it\t%d3\;vmov%d3\\t%P0, %Q1, %R1
-+   ite\\t%D3\;vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
-+   it\t%D3\;vmov%D3\\t%Q0, %R0, %P2
-+   it\t%d3\;vmov%d3\\t%Q0, %R0, %P1
-+   ite\\t%D3\;vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
-    [(set_attr "conds" "use")
-     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-     (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
-@@ -588,7 +588,7 @@
-   [(set (match_operand:SF	  0 "s_register_operand" "=t")
- 	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fabss%?\\t%0, %1"
-+  "vabs%?.f32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "ffariths")]
-@@ -598,7 +598,7 @@
-   [(set (match_operand:DF	  0 "s_register_operand" "=w")
- 	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fabsd%?\\t%P0, %P1"
-+  "vabs%?.f64\\t%P0, %P1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "ffarithd")]
-@@ -609,7 +609,7 @@
- 	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-   "@
--   fnegs%?\\t%0, %1
-+   vneg%?.f32\\t%0, %1
-    eor%?\\t%0, %1, #-2147483648"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-@@ -621,7 +621,7 @@
- 	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-   "@
--   fnegd%?\\t%P0, %P1
-+   vneg%?.f64\\t%P0, %P1
-    #
-    #"
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
-@@ -671,7 +671,7 @@
- 	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
- 		 (match_operand:SF 2 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fadds%?\\t%0, %1, %2"
-+  "vadd%?.f32\\t%0, %1, %2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fadds")]
-@@ -682,7 +682,7 @@
- 	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
- 		 (match_operand:DF 2 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "faddd%?\\t%P0, %P1, %P2"
-+  "vadd%?.f64\\t%P0, %P1, %P2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "faddd")]
-@@ -694,7 +694,7 @@
- 	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
- 		  (match_operand:SF 2 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fsubs%?\\t%0, %1, %2"
-+  "vsub%?.f32\\t%0, %1, %2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fadds")]
-@@ -705,7 +705,7 @@
- 	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
- 		  (match_operand:DF 2 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fsubd%?\\t%P0, %P1, %P2"
-+  "vsub%?.f64\\t%P0, %P1, %P2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "faddd")]
-@@ -719,7 +719,7 @@
- 	(div:SF (match_operand:SF 1 "s_register_operand" "t")
- 		(match_operand:SF 2 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fdivs%?\\t%0, %1, %2"
-+  "vdiv%?.f32\\t%0, %1, %2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fdivs")]
-@@ -730,7 +730,7 @@
- 	(div:DF (match_operand:DF 1 "s_register_operand" "w")
- 		(match_operand:DF 2 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fdivd%?\\t%P0, %P1, %P2"
-+  "vdiv%?.f64\\t%P0, %P1, %P2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fdivd")]
-@@ -744,7 +744,7 @@
- 	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
- 		 (match_operand:SF 2 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fmuls%?\\t%0, %1, %2"
-+  "vmul%?.f32\\t%0, %1, %2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmuls")]
-@@ -755,7 +755,7 @@
- 	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
- 		 (match_operand:DF 2 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fmuld%?\\t%P0, %P1, %P2"
-+  "vmul%?.f64\\t%P0, %P1, %P2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmuld")]
-@@ -766,7 +766,7 @@
- 	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
- 		 (match_operand:SF	   2 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fnmuls%?\\t%0, %1, %2"
-+  "vnmul%?.f32\\t%0, %1, %2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmuls")]
-@@ -777,7 +777,7 @@
- 	(mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
- 		 (match_operand:DF	   2 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fnmuld%?\\t%P0, %P1, %P2"
-+  "vnmul%?.f64\\t%P0, %P1, %P2"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmuld")]
-@@ -793,7 +793,7 @@
- 			  (match_operand:SF 3 "s_register_operand" "t"))
- 		 (match_operand:SF	    1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fmacs%?\\t%0, %2, %3"
-+  "vmla%?.f32\\t%0, %2, %3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacs")]
-@@ -805,7 +805,7 @@
- 			  (match_operand:DF 3 "s_register_operand" "w"))
- 		 (match_operand:DF	    1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fmacd%?\\t%P0, %P2, %P3"
-+  "vmla%?.f64\\t%P0, %P2, %P3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacd")]
-@@ -818,7 +818,7 @@
- 			   (match_operand:SF 3 "s_register_operand" "t"))
- 		  (match_operand:SF	     1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fmscs%?\\t%0, %2, %3"
-+  "vnmls%?.f32\\t%0, %2, %3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacs")]
-@@ -830,7 +830,7 @@
- 			   (match_operand:DF 3 "s_register_operand" "w"))
- 		  (match_operand:DF	     1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fmscd%?\\t%P0, %P2, %P3"
-+  "vnmls%?.f64\\t%P0, %P2, %P3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacd")]
-@@ -843,7 +843,7 @@
- 		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
- 			   (match_operand:SF 3 "s_register_operand" "t"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fnmacs%?\\t%0, %2, %3"
-+  "vmls%?.f32\\t%0, %2, %3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacs")]
-@@ -855,7 +855,7 @@
- 		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
- 			   (match_operand:DF 3 "s_register_operand" "w"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fnmacd%?\\t%P0, %P2, %P3"
-+  "vmls%?.f64\\t%P0, %P2, %P3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacd")]
-@@ -870,7 +870,7 @@
- 		    (match_operand:SF	      3 "s_register_operand" "t"))
- 		  (match_operand:SF	      1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fnmscs%?\\t%0, %2, %3"
-+  "vnmla%?.f32\\t%0, %2, %3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacs")]
-@@ -883,7 +883,7 @@
- 		    (match_operand:DF	      3 "s_register_operand" "w"))
- 		  (match_operand:DF	      1 "s_register_operand" "0")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fnmscd%?\\t%P0, %P2, %P3"
-+  "vnmla%?.f64\\t%P0, %P2, %P3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fmacd")]
-@@ -948,7 +948,7 @@
-   [(set (match_operand:DF		   0 "s_register_operand" "=w")
- 	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fcvtds%?\\t%P0, %1"
-+  "vcvt%?.f64.f32\\t%P0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvt")]
-@@ -958,7 +958,7 @@
-   [(set (match_operand:SF		   0 "s_register_operand" "=t")
- 	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fcvtsd%?\\t%0, %P1"
-+  "vcvt%?.f32.f64\\t%0, %P1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvt")]
-@@ -988,7 +988,7 @@
-   [(set (match_operand:SI		  0 "s_register_operand" "=t")
- 	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "ftosizs%?\\t%0, %1"
-+  "vcvt%?.s32.f32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvtf2i")]
-@@ -998,7 +998,7 @@
-   [(set (match_operand:SI		  0 "s_register_operand" "=t")
- 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "ftosizd%?\\t%0, %P1"
-+  "vcvt%?.s32.f64\\t%0, %P1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvtf2i")]
-@@ -1009,7 +1009,7 @@
-   [(set (match_operand:SI		  0 "s_register_operand" "=t")
- 	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "ftouizs%?\\t%0, %1"
-+  "vcvt%?.u32.f32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvtf2i")]
-@@ -1019,7 +1019,7 @@
-   [(set (match_operand:SI		  0 "s_register_operand" "=t")
- 	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "ftouizd%?\\t%0, %P1"
-+  "vcvt%?.u32.f64\\t%0, %P1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvtf2i")]
-@@ -1030,7 +1030,7 @@
-   [(set (match_operand:SF	    0 "s_register_operand" "=t")
- 	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fsitos%?\\t%0, %1"
-+  "vcvt%?.f32.s32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvti2f")]
-@@ -1040,7 +1040,7 @@
-   [(set (match_operand:DF	    0 "s_register_operand" "=w")
- 	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fsitod%?\\t%P0, %1"
-+  "vcvt%?.f64.s32\\t%P0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvti2f")]
-@@ -1051,7 +1051,7 @@
-   [(set (match_operand:SF	    0 "s_register_operand" "=t")
- 	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fuitos%?\\t%0, %1"
-+  "vcvt%?.f32.u32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvti2f")]
-@@ -1061,7 +1061,7 @@
-   [(set (match_operand:DF	    0 "s_register_operand" "=w")
- 	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fuitod%?\\t%P0, %1"
-+  "vcvt%?.f64.u32\\t%P0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "f_cvti2f")]
-@@ -1074,7 +1074,7 @@
-   [(set (match_operand:SF	   0 "s_register_operand" "=t")
- 	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fsqrts%?\\t%0, %1"
-+  "vsqrt%?.f32\\t%0, %1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fsqrts")]
-@@ -1084,7 +1084,7 @@
-   [(set (match_operand:DF	   0 "s_register_operand" "=w")
- 	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
--  "fsqrtd%?\\t%P0, %P1"
-+  "vsqrt%?.f64\\t%P0, %P1"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fsqrtd")]
-@@ -1097,7 +1097,7 @@
-   [(set (reg CC_REGNUM)
- 	(reg VFPCC_REGNUM))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "fmstat%?"
-+  "vmrs%?\\tAPSR_nzcv, FPSCR"
-   [(set_attr "conds" "set")
-    (set_attr "type" "f_flag")]
- )
-@@ -1165,6 +1165,9 @@
- 
- ;; Comparison patterns
- 
-+;; In the compare with FP zero case the ARM Architecture Reference Manual
-+;; specifies the immediate to be #0.0.  However, some buggy assemblers only
-+;; accept #0.  We don't want to autodetect broken assemblers, so output #0.
- (define_insn "*cmpsf_vfp"
-   [(set (reg:CCFP VFPCC_REGNUM)
- 	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
-@@ -1171,8 +1174,8 @@
- 		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-   "@
--   fcmps%?\\t%0, %1
--   fcmpzs%?\\t%0"
-+   vcmp%?.f32\\t%0, %1
-+   vcmp%?.f32\\t%0, #0"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fcmps")]
-@@ -1184,8 +1187,8 @@
- 		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-   "@
--   fcmpes%?\\t%0, %1
--   fcmpezs%?\\t%0"
-+   vcmpe%?.f32\\t%0, %1
-+   vcmpe%?.f32\\t%0, #0"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fcmps")]
-@@ -1197,8 +1200,8 @@
- 		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-   "@
--   fcmpd%?\\t%P0, %P1
--   fcmpzd%?\\t%P0"
-+   vcmp%?.f64\\t%P0, %P1
-+   vcmp%?.f64\\t%P0, #0"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fcmpd")]
-@@ -1210,8 +1213,8 @@
- 		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-   "@
--   fcmped%?\\t%P0, %P1
--   fcmpezd%?\\t%P0"
-+   vcmpe%?.f64\\t%P0, %P1
-+   vcmpe%?.f64\\t%P0, #0"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "type" "fcmpd")]
-@@ -1272,7 +1275,7 @@
- 	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
- 		      UNSPEC_PUSH_MULT))])]
-   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
--  "* return vfp_output_fstmd (operands);"
-+  "* return vfp_output_vstmd (operands);"
-   [(set_attr "type" "f_stored")]
- )
- 
-@@ -1285,7 +1288,7 @@
-         (unspec:SDF [(match_operand:SDF 1
- 		         "register_operand" "<F_constraint>")]
-          VRINT))]
--  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
-   "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
-   [(set_attr "predicable" "<vrint_predicable>")
-    (set_attr "predicable_short_it" "no")
-@@ -1293,6 +1296,18 @@
-    (set_attr "conds" "<vrint_conds>")]
- )
- 
-+;; Implements the lround, lfloor and lceil optabs.
-+(define_insn "l<vrint_pattern><su_optab><mode>si2"
-+  [(set (match_operand:SI 0 "register_operand" "=t")
-+        (FIXUORS:SI (unspec:SDF
-+                        [(match_operand:SDF 1
-+                           "register_operand" "<F_constraint>")] VCVT)))]
-+  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+  "vcvt<vrint_variant>%?.<su>32.<V_if_elem>\\t%0, %<V_reg>1"
-+  [(set_attr "predicable" "no")
-+   (set_attr "type" "f_cvtf2i")]
-+)
-+
- ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
- ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
- ;; operand will be returned when both operands are zero (i.e. they may not
-@@ -1304,7 +1319,7 @@
-   [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
-         (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
- 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
--  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
-   "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-   [(set_attr "type" "f_minmax<vfp_type>")
-    (set_attr "conds" "unconditional")]
-@@ -1314,12 +1329,28 @@
-   [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
-         (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
- 		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
--  "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
-+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
-   "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-   [(set_attr "type" "f_minmax<vfp_type>")
-    (set_attr "conds" "unconditional")]
- )
- 
-+;; Write Floating-point Status and Control Register.
-+(define_insn "set_fpscr"
-+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
-+  "TARGET_VFP && TARGET_HARD_FLOAT"
-+  "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
-+  [(set_attr "type" "mrs")])
-+
-+;; Read Floating-point Status and Control Register.
-+(define_insn "get_fpscr"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
-+  "TARGET_VFP && TARGET_HARD_FLOAT"
-+  "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
-+  [(set_attr "type" "mrs")])
-+
-+
- ;; Unimplemented insns:
- ;; fldm*
- ;; fstm*
---- a/src/gcc/config/arm/neon.md
-+++ b/src/gcc/config/arm/neon.md
-@@ -296,7 +296,7 @@
- 		    UNSPEC_MISALIGNED_ACCESS))]
-   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
-   "vld1.<V_sz_elem>\t{%q0}, %A1"
--  [(set_attr "type" "neon_store1_1reg<q>")])
-+  [(set_attr "type" "neon_load1_1reg<q>")])
- 
- (define_insn "vec_set<mode>_internal"
-   [(set (match_operand:VD 0 "s_register_operand" "=w,w")
-@@ -629,6 +629,17 @@
-   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
- )
- 
-+(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
-+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
-+	(FIXUORS:<V_cmp_result> (unspec:VCVTF
-+			       [(match_operand:VCVTF 1 "register_operand" "w")]
-+			       NEON_VCVT)))]
-+  "TARGET_NEON && TARGET_FPU_ARMV8"
-+  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
-+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
-+   (set_attr "predicable" "no")]
-+)
-+
- (define_insn "ior<mode>3"
-   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
- 	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
-@@ -1041,7 +1052,9 @@
-       }
-     else
-       {
--	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
-+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-+	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
-+		|| REGNO (operands[0]) == REGNO (operands[1])))
- 	  /* This clobbers CC.  */
- 	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
- 	else
-@@ -1141,7 +1154,9 @@
-       }
-     else
-       {
--	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
-+	if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
-+	    && (!reg_overlap_mentioned_p (operands[0], operands[1])
-+		|| REGNO (operands[0]) == REGNO (operands[1])))
- 	  /* This clobbers CC.  */
- 	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
- 	else
-@@ -1334,33 +1349,47 @@
- 
- ;; Reduction operations
- 
--(define_expand "reduc_splus_<mode>"
--  [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_plus_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VD 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
--  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+  rtx vec = gen_reg_rtx (<MODE>mode);
-+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- 			&gen_neon_vpadd_internal<mode>);
-+  /* The same result is actually computed into every element.  */
-+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
-   DONE;
- })
- 
--(define_expand "reduc_splus_<mode>"
--  [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_plus_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VQ 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-    && !BYTES_BIG_ENDIAN"
- {
-   rtx step1 = gen_reg_rtx (<V_HALF>mode);
--  rtx res_d = gen_reg_rtx (<V_HALF>mode);
- 
-   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
--  emit_insn (gen_reduc_splus_<V_half> (res_d, step1));
--  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+  emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
- 
-   DONE;
- })
- 
--(define_insn "reduc_splus_v2di"
-+(define_expand "reduc_plus_scal_v2di"
-+  [(match_operand:DI 0 "nonimmediate_operand" "=w")
-+   (match_operand:V2DI 1 "s_register_operand" "")]
-+  "TARGET_NEON && !BYTES_BIG_ENDIAN"
-+{
-+  rtx vec = gen_reg_rtx (V2DImode);
-+
-+  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
-+  emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
-+
-+  DONE;
-+})
-+
-+(define_insn "arm_reduc_plus_internal_v2di"
-   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
- 	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
- 		     UNSPEC_VPADD))]
-@@ -1369,115 +1398,109 @@
-   [(set_attr "type" "neon_add_q")]
- )
- 
--;; NEON does not distinguish between signed and unsigned addition except on
--;; widening operations.
--(define_expand "reduc_uplus_<mode>"
--  [(match_operand:VDQI 0 "s_register_operand" "")
--   (match_operand:VDQI 1 "s_register_operand" "")]
--  "TARGET_NEON && (<Is_d_reg> || !BYTES_BIG_ENDIAN)"
--{
--  emit_insn (gen_reduc_splus_<mode> (operands[0], operands[1]));
--  DONE;
--})
--
--(define_expand "reduc_smin_<mode>"
--  [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_smin_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VD 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
--  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+  rtx vec = gen_reg_rtx (<MODE>mode);
-+
-+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- 			&gen_neon_vpsmin<mode>);
-+  /* The result is computed into every element of the vector.  */
-+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
-   DONE;
- })
- 
--(define_expand "reduc_smin_<mode>"
--  [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_smin_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VQ 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-    && !BYTES_BIG_ENDIAN"
- {
-   rtx step1 = gen_reg_rtx (<V_HALF>mode);
--  rtx res_d = gen_reg_rtx (<V_HALF>mode);
- 
-   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
--  emit_insn (gen_reduc_smin_<V_half> (res_d, step1));
--  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
- 
-   DONE;
- })
- 
--(define_expand "reduc_smax_<mode>"
--  [(match_operand:VD 0 "s_register_operand" "")
-+(define_expand "reduc_smax_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VD 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
- {
--  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+  rtx vec = gen_reg_rtx (<MODE>mode);
-+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- 			&gen_neon_vpsmax<mode>);
-+  /* The result is computed into every element of the vector.  */
-+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
-   DONE;
- })
- 
--(define_expand "reduc_smax_<mode>"
--  [(match_operand:VQ 0 "s_register_operand" "")
-+(define_expand "reduc_smax_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VQ 1 "s_register_operand" "")]
-   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-    && !BYTES_BIG_ENDIAN"
- {
-   rtx step1 = gen_reg_rtx (<V_HALF>mode);
--  rtx res_d = gen_reg_rtx (<V_HALF>mode);
- 
-   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
--  emit_insn (gen_reduc_smax_<V_half> (res_d, step1));
--  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
- 
-   DONE;
- })
- 
--(define_expand "reduc_umin_<mode>"
--  [(match_operand:VDI 0 "s_register_operand" "")
-+(define_expand "reduc_umin_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VDI 1 "s_register_operand" "")]
-   "TARGET_NEON"
- {
--  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+  rtx vec = gen_reg_rtx (<MODE>mode);
-+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- 			&gen_neon_vpumin<mode>);
-+  /* The result is computed into every element of the vector.  */
-+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
-   DONE;
- })
- 
--(define_expand "reduc_umin_<mode>"
--  [(match_operand:VQI 0 "s_register_operand" "")
-+(define_expand "reduc_umin_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VQI 1 "s_register_operand" "")]
-   "TARGET_NEON && !BYTES_BIG_ENDIAN"
- {
-   rtx step1 = gen_reg_rtx (<V_HALF>mode);
--  rtx res_d = gen_reg_rtx (<V_HALF>mode);
- 
-   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
--  emit_insn (gen_reduc_umin_<V_half> (res_d, step1));
--  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
- 
-   DONE;
- })
- 
--(define_expand "reduc_umax_<mode>"
--  [(match_operand:VDI 0 "s_register_operand" "")
-+(define_expand "reduc_umax_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VDI 1 "s_register_operand" "")]
-   "TARGET_NEON"
- {
--  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
-+  rtx vec = gen_reg_rtx (<MODE>mode);
-+  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
- 			&gen_neon_vpumax<mode>);
-+  /* The result is computed into every element of the vector.  */
-+  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
-   DONE;
- })
- 
--(define_expand "reduc_umax_<mode>"
--  [(match_operand:VQI 0 "s_register_operand" "")
-+(define_expand "reduc_umax_scal_<mode>"
-+  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
-    (match_operand:VQI 1 "s_register_operand" "")]
-   "TARGET_NEON && !BYTES_BIG_ENDIAN"
- {
-   rtx step1 = gen_reg_rtx (<V_HALF>mode);
--  rtx res_d = gen_reg_rtx (<V_HALF>mode);
- 
-   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
--  emit_insn (gen_reduc_umax_<V_half> (res_d, step1));
--  emit_insn (gen_move_lo_quad_<mode> (operands[0], res_d));
-+  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
- 
-   DONE;
- })
-@@ -1842,9 +1865,9 @@
- ; good for plain vadd, vaddq.
- 
- (define_expand "neon_vadd<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "=w")
--   (match_operand:VDQX 1 "s_register_operand" "w")
--   (match_operand:VDQX 2 "s_register_operand" "w")
-+  [(match_operand:VCVTF 0 "s_register_operand" "=w")
-+   (match_operand:VCVTF 1 "s_register_operand" "w")
-+   (match_operand:VCVTF 2 "s_register_operand" "w")
-    (match_operand:SI 3 "immediate_operand" "i")]
-   "TARGET_NEON"
- {
-@@ -1869,9 +1892,9 @@
- ; Used for intrinsics when flag_unsafe_math_optimizations is false.
- 
- (define_insn "neon_vadd<mode>_unspec"
--  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
--        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
--		      (match_operand:VDQX 2 "s_register_operand" "w")]
-+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
-+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
-+		      (match_operand:VCVTF 2 "s_register_operand" "w")]
-                      UNSPEC_VADD))]
-   "TARGET_NEON"
-   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-@@ -2132,9 +2155,9 @@
- )
- 
- (define_expand "neon_vsub<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "=w")
--   (match_operand:VDQX 1 "s_register_operand" "w")
--   (match_operand:VDQX 2 "s_register_operand" "w")
-+  [(match_operand:VCVTF 0 "s_register_operand" "=w")
-+   (match_operand:VCVTF 1 "s_register_operand" "w")
-+   (match_operand:VCVTF 2 "s_register_operand" "w")
-    (match_operand:SI 3 "immediate_operand" "i")]
-   "TARGET_NEON"
- {
-@@ -2149,9 +2172,9 @@
- ; Used for intrinsics when flag_unsafe_math_optimizations is false.
- 
- (define_insn "neon_vsub<mode>_unspec"
--  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
--        (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
--		      (match_operand:VDQX 2 "s_register_operand" "w")]
-+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
-+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
-+		      (match_operand:VCVTF 2 "s_register_operand" "w")]
-                      UNSPEC_VSUB))]
-   "TARGET_NEON"
-   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-@@ -2547,6 +2570,14 @@
-   [(set_attr "type" "neon_qabs<q>")]
- )
- 
-+(define_insn "neon_bswap<mode>"
-+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
-+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
-+  "TARGET_NEON"
-+  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
-+  [(set_attr "type" "neon_rev<q>")]
-+)
-+
- (define_expand "neon_vneg<mode>"
-   [(match_operand:VDQW 0 "s_register_operand" "")
-    (match_operand:VDQW 1 "s_register_operand" "")
-@@ -2557,6 +2588,33 @@
-   DONE;
- })
- 
-+(define_expand "neon_copysignf<mode>"
-+  [(match_operand:VCVTF 0 "register_operand")
-+   (match_operand:VCVTF 1 "register_operand")
-+   (match_operand:VCVTF 2 "register_operand")]
-+  "TARGET_NEON"
-+  "{
-+     rtx v_bitmask_cast;
-+     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
-+     int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
-+     rtvec v = rtvec_alloc (n_elt);
-+
-+     /* Create bitmask for vector select.  */
-+     for (i = 0; i < n_elt; ++i)
-+       RTVEC_ELT (v, i) = GEN_INT (0x80000000);
-+
-+     emit_move_insn (v_bitmask,
-+		     gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
-+     emit_move_insn (operands[0], operands[2]);
-+     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
-+					   <VCVTF:V_cmp_result>mode, 0);
-+     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
-+				     operands[1]));
-+
-+     DONE;
-+  }"
-+)
-+
- (define_insn "neon_vqneg<mode>"
-   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
-@@ -4140,17 +4198,6 @@
-   [(set_attr "type" "neon_permute<q>")]
- )
- 
--(define_expand "neon_vtrn<mode>"
--  [(match_operand:SI 0 "s_register_operand" "r")
--   (match_operand:VDQW 1 "s_register_operand" "w")
--   (match_operand:VDQW 2 "s_register_operand" "w")]
--  "TARGET_NEON"
--{
--  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vtrn<mode>_internal,
--			      operands[0], operands[1], operands[2]);
--  DONE;
--})
--
- (define_expand "neon_vzip<mode>_internal"
-   [(parallel
-     [(set (match_operand:VDQW 0 "s_register_operand" "")
-@@ -4177,17 +4224,6 @@
-   [(set_attr "type" "neon_zip<q>")]
- )
- 
--(define_expand "neon_vzip<mode>"
--  [(match_operand:SI 0 "s_register_operand" "r")
--   (match_operand:VDQW 1 "s_register_operand" "w")
--   (match_operand:VDQW 2 "s_register_operand" "w")]
--  "TARGET_NEON"
--{
--  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vzip<mode>_internal,
--			      operands[0], operands[1], operands[2]);
--  DONE;
--})
--
- (define_expand "neon_vuzp<mode>_internal"
-   [(parallel
-     [(set (match_operand:VDQW 0 "s_register_operand" "")
-@@ -4214,17 +4250,6 @@
-   [(set_attr "type" "neon_zip<q>")]
- )
- 
--(define_expand "neon_vuzp<mode>"
--  [(match_operand:SI 0 "s_register_operand" "r")
--   (match_operand:VDQW 1 "s_register_operand" "w")
--   (match_operand:VDQW 2 "s_register_operand" "w")]
--  "TARGET_NEON"
--{
--  neon_emit_pair_result_insn (<MODE>mode, gen_neon_vuzp<mode>_internal,
--			      operands[0], operands[1], operands[2]);
--  DONE;
--})
--
- (define_expand "neon_vreinterpretv8qi<mode>"
-   [(match_operand:V8QI 0 "s_register_operand" "")
-    (match_operand:VDX 1 "s_register_operand" "")]
-@@ -5357,61 +5382,6 @@
-   [(set_attr "type" "neon_store4_4reg<q>")]
- )
- 
--(define_expand "neon_vand<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "")
--   (match_operand:VDQX 1 "s_register_operand" "")
--   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
--   (match_operand:SI 3 "immediate_operand" "")]
--  "TARGET_NEON"
--{
--  emit_insn (gen_and<mode>3 (operands[0], operands[1], operands[2]));
--  DONE;
--})
--
--(define_expand "neon_vorr<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "")
--   (match_operand:VDQX 1 "s_register_operand" "")
--   (match_operand:VDQX 2 "neon_logic_op2" "")
--   (match_operand:SI 3 "immediate_operand" "")]
--  "TARGET_NEON"
--{
--  emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
--  DONE;
--})
--
--(define_expand "neon_veor<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "")
--   (match_operand:VDQX 1 "s_register_operand" "")
--   (match_operand:VDQX 2 "s_register_operand" "")
--   (match_operand:SI 3 "immediate_operand" "")]
--  "TARGET_NEON"
--{
--  emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
--  DONE;
--})
--
--(define_expand "neon_vbic<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "")
--   (match_operand:VDQX 1 "s_register_operand" "")
--   (match_operand:VDQX 2 "neon_logic_op2" "")
--   (match_operand:SI 3 "immediate_operand" "")]
--  "TARGET_NEON"
--{
--  emit_insn (gen_bic<mode>3_neon (operands[0], operands[1], operands[2]));
--  DONE;
--})
--
--(define_expand "neon_vorn<mode>"
--  [(match_operand:VDQX 0 "s_register_operand" "")
--   (match_operand:VDQX 1 "s_register_operand" "")
--   (match_operand:VDQX 2 "neon_inv_logic_op2" "")
--   (match_operand:SI 3 "immediate_operand" "")]
--  "TARGET_NEON"
--{
--  emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
--  DONE;
--})
--
- (define_insn "neon_vec_unpack<US>_lo_<mode>"
-   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
-         (SE:<V_unpack> (vec_select:<V_HALF>
---- a/src/gcc/config/arm/types.md
-+++ b/src/gcc/config/arm/types.md
-@@ -66,7 +66,6 @@
- ; f_mrc              transfer vfp to arm reg.
- ; f_mrrc             transfer vfp to two arm regs.
- ; f_rint[d,s]        double/single floating point rount to integral.
--; f_sel[d,s]         double/single floating byte select.
- ; f_store[d,s]       double/single store to memory.  Used for VFP unit.
- ; fadd[d,s]          double/single floating-point scalar addition.
- ; fcmp[d,s]          double/single floating-point compare.
-@@ -571,8 +570,6 @@
-   f_mrrc,\
-   f_rintd,\
-   f_rints,\
--  f_seld,\
--  f_sels,\
-   f_stored,\
-   f_stores,\
-   faddd,\
---- a/src/gcc/config/arm/arm_neon_builtins.def
-+++ b/src/gcc/config/arm/arm_neon_builtins.def
-@@ -18,8 +18,7 @@
-    along with GCC; see the file COPYING3.  If not see
-    <http://www.gnu.org/licenses/>.  */
- 
--VAR10 (BINOP, vadd,
--	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (BINOP, vadd, v2sf, v4sf),
- VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
- VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
-@@ -54,7 +53,7 @@
- VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
- VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
- VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
--VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (BINOP, vsub, v2sf, v4sf),
- VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
- VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
- VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
-@@ -89,6 +88,7 @@
- VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
- VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
-+VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
- VAR2 (UNOP, vcnt, v8qi, v16qi),
- VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
- VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
-@@ -135,6 +135,7 @@
- VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
- VAR10 (SELECT, vbsl,
- 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
-+VAR2 (COPYSIGNF, copysignf, v2sf, v4sf),
- VAR2 (RINT, vrintn, v2sf, v4sf),
- VAR2 (RINT, vrinta, v2sf, v4sf),
- VAR2 (RINT, vrintp, v2sf, v4sf),
-@@ -141,6 +142,18 @@
- VAR2 (RINT, vrintm, v2sf, v4sf),
- VAR2 (RINT, vrintz, v2sf, v4sf),
- VAR2 (RINT, vrintx, v2sf, v4sf),
-+VAR1 (RINT, vcvtav2sf, v2si),
-+VAR1 (RINT, vcvtav4sf, v4si),
-+VAR1 (RINT, vcvtauv2sf, v2si),
-+VAR1 (RINT, vcvtauv4sf, v4si),
-+VAR1 (RINT, vcvtpv2sf, v2si),
-+VAR1 (RINT, vcvtpv4sf, v4si),
-+VAR1 (RINT, vcvtpuv2sf, v2si),
-+VAR1 (RINT, vcvtpuv4sf, v4si),
-+VAR1 (RINT, vcvtmv2sf, v2si),
-+VAR1 (RINT, vcvtmv4sf, v4si),
-+VAR1 (RINT, vcvtmuv2sf, v2si),
-+VAR1 (RINT, vcvtmuv4sf, v4si),
- VAR1 (VTBL, vtbl1, v8qi),
- VAR1 (VTBL, vtbl2, v8qi),
- VAR1 (VTBL, vtbl3, v8qi),
-@@ -149,9 +162,6 @@
- VAR1 (VTBX, vtbx2, v8qi),
- VAR1 (VTBX, vtbx3, v8qi),
- VAR1 (VTBX, vtbx4, v8qi),
--VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
--VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
--VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
- VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
- VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
-@@ -199,14 +209,4 @@
- VAR9 (STORESTRUCT, vst4,
- 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
- VAR7 (STORESTRUCTLANE, vst4_lane,
--	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
--VAR10 (LOGICBINOP, vand,
--	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vorr,
--	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (BINOP, veor,
--	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vbic,
--	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
--VAR10 (LOGICBINOP, vorn,
--	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-+	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
---- a/src/gcc/config/arm/cortex-a7.md
-+++ b/src/gcc/config/arm/cortex-a7.md
-@@ -137,7 +137,7 @@
-   (and (eq_attr "tune" "cortexa7")
-        (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        bfm,rev,\
-+                        bfm,clz,rbit,rev,\
-                         shift_imm,shift_reg,mov_reg,mvn_reg"))
-   "cortex_a7_ex1")
- 
---- a/src/gcc/config/arm/aarch-common-protos.h
-+++ b/src/gcc/config/arm/aarch-common-protos.h
-@@ -24,6 +24,9 @@
- #define GCC_AARCH_COMMON_PROTOS_H
- 
- extern int aarch_crypto_can_dual_issue (rtx, rtx);
-+extern bool aarch_rev16_p (rtx);
-+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode);
-+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode);
- extern int arm_early_load_addr_dep (rtx, rtx);
- extern int arm_early_store_addr_dep (rtx, rtx);
- extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
-@@ -54,6 +57,7 @@
-   const int bfi;		/* Bit-field insert.  */
-   const int bfx;		/* Bit-field extraction.  */
-   const int clz;		/* Count Leading Zeros.  */
-+  const int rev;		/* Reverse bits/bytes.  */
-   const int non_exec;		/* Extra cost when not executing insn.  */
-   const bool non_exec_costs_exec; /* True if non-execution must add the exec
- 				     cost.  */
---- a/src/gcc/config/arm/predicates.md
-+++ b/src/gcc/config/arm/predicates.md
-@@ -291,6 +291,15 @@
- 			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
-        (match_test "mode == GET_MODE (op)")))
- 
-+(define_special_predicate "shift_nomul_operator"
-+  (and (ior (and (match_code "rotate")
-+		 (match_test "CONST_INT_P (XEXP (op, 1))
-+			      && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))
-+	    (and (match_code "ashift,ashiftrt,lshiftrt,rotatert")
-+		 (match_test "!CONST_INT_P (XEXP (op, 1))
-+			      || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")))
-+       (match_test "mode == GET_MODE (op)")))
-+
- ;; True for shift operators which can be used with saturation instructions.
- (define_special_predicate "sat_shift_operator"
-   (and (ior (and (match_code "mult")
-@@ -681,5 +690,6 @@
-        (match_code "reg" "0")))
- 
- (define_predicate "call_insn_operand"
--  (ior (match_code "symbol_ref")
-+  (ior (and (match_code "symbol_ref")
-+	    (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))"))
-        (match_operand 0 "s_register_operand")))
---- a/src/gcc/config/arm/arm_neon.h
-+++ b/src/gcc/config/arm/arm_neon.h
-@@ -452,114 +452,121 @@
- } poly64x2x4_t;
- #endif
- 
--
--
-+/* vadd  */
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vadd_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vadd_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vadd_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vadd_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a + __b;
-+#else
-+  return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3);
-+#endif
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vadd_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vadd_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vadd_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vadd_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vadd_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vaddq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vaddq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vaddq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vaddq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1);
-+  return __a + __b;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vaddq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a + __b;
-+#else
-+  return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3);
-+#endif
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a + __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -949,93 +956,102 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vmul_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vmul_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vmul_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vmul_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a * __b;
-+#else
-+  return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3);
-+#endif
-+
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vmul_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a * __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vmul_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a * __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vmul_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a * __b;
- }
- 
--__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
--vmul_p8 (poly8x8_t __a, poly8x8_t __b)
--{
--  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
--}
--
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vmulq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vmulq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vmulq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1);
-+  return __a * __b;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vmulq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a * __b;
-+#else
-+  return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3);
-+#endif
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a * __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a * __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a * __b;
- }
- 
-+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
-+vmul_p8 (poly8x8_t __a, poly8x8_t __b)
-+{
-+  return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2);
-+}
-+
- __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
- vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
- {
-@@ -1520,112 +1536,121 @@
- }
- 
- #endif
-+
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vsub_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vsub_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vsub_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
- vsub_f32 (float32x2_t __a, float32x2_t __b)
- {
--  return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a - __b;
-+#else
-+  return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3);
-+#endif
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vsub_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vsub_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vsub_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vsub_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vsub_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vsubq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vsubq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vsubq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vsubq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1);
-+  return __a - __b;
- }
- 
- __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
- vsubq_f32 (float32x4_t __a, float32x4_t __b)
- {
--  return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3);
-+#ifdef __FAST_MATH
-+  return __a - __b;
-+#else
-+  return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3);
-+#endif
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a - __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-@@ -11295,484 +11320,483 @@
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vand_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vand_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vand_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vand_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vand_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vand_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vand_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vand_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vandq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vandq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vandq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vandq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vandq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vandq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vandq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vandq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a & __b;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vorr_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vorr_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vorr_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vorr_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vorr_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vorr_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vorr_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vorr_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vorrq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vorrq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vorrq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vorrq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a | __b;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- veor_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- veor_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- veor_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- veor_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- veor_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- veor_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- veor_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_veordi (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- veor_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- veorq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- veorq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- veorq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- veorq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- veorq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- veorq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- veorq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- veorq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a ^ __b;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vbic_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vbic_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vbic_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vbic_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vbic_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vbic_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vbic_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vbic_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vbicq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vbicq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vbicq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vbicq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a & ~__b;
- }
- 
- __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
- vorn_s8 (int8x8_t __a, int8x8_t __b)
- {
--  return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
- vorn_s16 (int16x4_t __a, int16x4_t __b)
- {
--  return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
- vorn_s32 (int32x2_t __a, int32x2_t __b)
- {
--  return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
- vorn_u8 (uint8x8_t __a, uint8x8_t __b)
- {
--  return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
- vorn_u16 (uint16x4_t __a, uint16x4_t __b)
- {
--  return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
- vorn_u32 (uint32x2_t __a, uint32x2_t __b)
- {
--  return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
- vorn_s64 (int64x1_t __a, int64x1_t __b)
- {
--  return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
- vorn_u64 (uint64x1_t __a, uint64x1_t __b)
- {
--  return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
- vornq_s8 (int8x16_t __a, int8x16_t __b)
- {
--  return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
- vornq_s16 (int16x8_t __a, int16x8_t __b)
- {
--  return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
- vornq_s32 (int32x4_t __a, int32x4_t __b)
- {
--  return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
- vornq_s64 (int64x2_t __a, int64x2_t __b)
- {
--  return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
- vornq_u8 (uint8x16_t __a, uint8x16_t __b)
- {
--  return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
- vornq_u16 (uint16x8_t __a, uint16x8_t __b)
- {
--  return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
- vornq_u32 (uint32x4_t __a, uint32x4_t __b)
- {
--  return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
-+  return __a | ~__b;
- }
- 
- __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
- vornq_u64 (uint64x2_t __a, uint64x2_t __b)
- {
--  return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0);
-+  return __a | ~__b;
- }
- 
--
- __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
- vreinterpret_p8_p16 (poly16x4_t __a)
- {
---- a/src/gcc/config/arm/aarch-common.c
-+++ b/src/gcc/config/arm/aarch-common.c
-@@ -191,6 +191,83 @@
-   return 0;
- }
- 
-+bool
-+aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode)
-+{
-+  return CONST_INT_P (val)
-+         && INTVAL (val)
-+            == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
-+                                   mode);
-+}
-+
-+bool
-+aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode)
-+{
-+  return CONST_INT_P (val)
-+         && INTVAL (val)
-+            == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
-+                                   mode);
-+}
-+
-+
-+static bool
-+aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode)
-+{
-+  if (GET_CODE (lhs) == AND
-+         && GET_CODE (XEXP (lhs, 0)) == ASHIFT
-+            && CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
-+            && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
-+            && REG_P (XEXP (XEXP (lhs, 0), 0))
-+         && CONST_INT_P (XEXP (lhs, 1))
-+      && GET_CODE (rhs) == AND
-+         && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
-+            && REG_P (XEXP (XEXP (rhs, 0), 0))
-+            && CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
-+            && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
-+         && CONST_INT_P (XEXP (rhs, 1))
-+      && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
-+
-+    {
-+      rtx lhs_mask = XEXP (lhs, 1);
-+      rtx rhs_mask = XEXP (rhs, 1);
-+
-+      return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
-+             && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
-+    }
-+
-+  return false;
-+}
-+
-+/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
-+   These will be of the form:
-+     ((x >> 8) & 0x00ff00ff)
-+   | ((x << 8) & 0xff00ff00)
-+   for SImode and with similar but wider bitmasks for DImode.
-+   The two sub-expressions of the IOR can appear on either side so check both
-+   permutations with the help of aarch_rev16_p_1 above.  */
-+
-+bool
-+aarch_rev16_p (rtx x)
-+{
-+  rtx left_sub_rtx, right_sub_rtx;
-+  bool is_rev = false;
-+
-+  if (GET_CODE (x) != IOR)
-+    return false;
-+
-+  left_sub_rtx = XEXP (x, 0);
-+  right_sub_rtx = XEXP (x, 1);
-+
-+  /* There are no canonicalisation rules for the position of the two shifts
-+     involved in a rev, so try both permutations.  */
-+  is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
-+
-+  if (!is_rev)
-+    is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
-+
-+  return is_rev;
-+}
-+
- /* Return nonzero if the CONSUMER instruction (a load) does need
-    PRODUCER's value to calculate the address.  */
- int
---- a/src/gcc/config/arm/arm-fpus.def
-+++ b/src/gcc/config/arm/arm-fpus.def
-@@ -37,6 +37,8 @@
- ARM_FPU("vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false)
- ARM_FPU("vfpv4-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false)
- ARM_FPU("fpv4-sp-d16",	ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false)
-+ARM_FPU("fpv5-sp-d16",	ARM_FP_MODEL_VFP, 5, VFP_REG_SINGLE, false, true, false)
-+ARM_FPU("fpv5-d16",	ARM_FP_MODEL_VFP, 5, VFP_REG_D16, false, true, false)
- ARM_FPU("neon-vfpv4",	ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false)
- ARM_FPU("fp-armv8",	ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false)
- ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false)
---- a/src/gcc/config/arm/cortex-a53.md
-+++ b/src/gcc/config/arm/cortex-a53.md
-@@ -75,7 +75,7 @@
-        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                         alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        adr,bfm,csel,rev,\
-+                        adr,bfm,csel,clz,rbit,rev,\
-                         shift_imm,shift_reg,\
-                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
-                         mrs,multiple,no_insn"))
-@@ -84,8 +84,8 @@
- (define_insn_reservation "cortex_a53_alu_shift" 2
-   (and (eq_attr "tune" "cortexa53")
-        (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
--                        logic_shift_imm,logics_shift_imm,\
--                        alu_shift_reg,alus_shift_reg,\
-+                        crc,logic_shift_imm,logics_shift_imm,\
-+                        alu_ext,alus_ext,alu_shift_reg,alus_shift_reg,\
-                         logic_shift_reg,logics_shift_reg,\
-                         extend,mov_shift,mov_shift_reg,\
-                         mvn_shift,mvn_shift_reg"))
-@@ -216,7 +216,8 @@
-   (and (eq_attr "tune" "cortexa53")
-        (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
-                         f_cvt,f_cvtf2i,f_cvti2f,\
--			fcmps, fcmpd, fcsel"))
-+                        fcmps, fcmpd, fcsel, f_rints, f_rintd, f_minmaxs,\
-+                        f_minmaxd"))
-   "cortex_a53_slot0+cortex_a53_fpadd_pipe")
- 
- (define_insn_reservation "cortex_a53_fconst" 2
---- a/src/gcc/config/arm/bpabi.h
-+++ b/src/gcc/config/arm/bpabi.h
-@@ -73,7 +73,7 @@
-    |mcpu=generic-armv7-a                                \
-    |march=armv7ve	                                \
-    |march=armv7-m|mcpu=cortex-m3                        \
--   |march=armv7e-m|mcpu=cortex-m4                       \
-+   |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7        \
-    |march=armv6-m|mcpu=cortex-m0                        \
-    |march=armv8-a					\
-    :%{!r:--be8}}}"
-@@ -91,7 +91,7 @@
-    |mcpu=generic-armv7-a                                \
-    |march=armv7ve	                                \
-    |march=armv7-m|mcpu=cortex-m3                        \
--   |march=armv7e-m|mcpu=cortex-m4                       \
-+   |march=armv7e-m|mcpu=cortex-m4|mcpu=cortex-m7        \
-    |march=armv6-m|mcpu=cortex-m0                        \
-    |march=armv8-a					\
-    :%{!r:--be8}}}"
---- a/src/gcc/config/arm/iterators.md
-+++ b/src/gcc/config/arm/iterators.md
-@@ -116,6 +116,9 @@
- ;; Vector modes including 64-bit integer elements, but no floats.
- (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
- 
-+;; Vector modes for H, S and D types.
-+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
-+
- ;; Vector modes for float->int conversions.
- (define_mode_iterator VCVTF [V2SF V4SF])
- 
-@@ -191,6 +194,23 @@
- ;; Right shifts
- (define_code_iterator rshifts [ashiftrt lshiftrt])
- 
-+;; Iterator for integer conversions
-+(define_code_iterator FIXUORS [fix unsigned_fix])
-+
-+;; Binary operators whose second operand can be shifted.
-+(define_code_iterator shiftable_ops [plus minus ior xor and])
-+
-+;; plus and minus are the only shiftable_ops for which Thumb2 allows
-+;; a stack pointer opoerand.  The minus operation is a candidate for an rsub
-+;; and hence only plus is supported.
-+(define_code_attr t2_binop0
-+  [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")])
-+
-+;; The instruction to use when a shiftable_ops has a shift operation as
-+;; its first operand.
-+(define_code_attr arith_shift_insn
-+  [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")])
-+
- ;;----------------------------------------------------------------------------
- ;; Int iterators
- ;;----------------------------------------------------------------------------
-@@ -198,9 +218,13 @@
- (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM
-                             UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA])
- 
-+(define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA])
-+
- (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
-                               UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
- 
-+(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
-+
- (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
-                           UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
- 
-@@ -502,6 +526,13 @@
- ;; Assembler mnemonics for signedness of widening operations.
- (define_code_attr US [(sign_extend "s") (zero_extend "u")])
- 
-+;; Signedness suffix for float->fixed conversions.  Empty for signed
-+;; conversion.
-+(define_code_attr su_optab [(fix "") (unsigned_fix "u")])
-+
-+;; Sign prefix to use in instruction type suffixes, i.e. s32, u32.
-+(define_code_attr su [(fix "s") (unsigned_fix "u")])
-+
- ;; Right shifts
- (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
- (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
---- a/src/gcc/config/arm/arm.md
-+++ b/src/gcc/config/arm/arm.md
-@@ -205,17 +205,9 @@
- 	  (const_string "yes")]
- 	 (const_string "no")))
- 
--; Allows an insn to disable certain alternatives for reasons other than
--; arch support.
--(define_attr "insn_enabled" "no,yes"
--  (const_string "yes"))
--
- ; Enable all alternatives that are both arch_enabled and insn_enabled.
-  (define_attr "enabled" "no,yes"
--   (cond [(eq_attr "insn_enabled" "no")
--	  (const_string "no")
--
--	  (and (eq_attr "predicable_short_it" "no")
-+   (cond [(and (eq_attr "predicable_short_it" "no")
- 	       (and (eq_attr "predicated" "yes")
- 	            (match_test "arm_restrict_it")))
- 	  (const_string "no")
-@@ -2868,6 +2860,28 @@
-    (set_attr "type" "multiple")]
- )
- 
-+(define_insn_and_split "*anddi_notdi_zesidi"
-+  [(set (match_operand:DI 0 "s_register_operand" "=r")
-+        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r"))
-+                (zero_extend:DI
-+                 (match_operand:SI 1 "s_register_operand" "r"))))]
-+  "TARGET_32BIT"
-+  "#"
-+  "TARGET_32BIT && reload_completed"
-+  [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1)))
-+   (set (match_dup 3) (const_int 0))]
-+  "
-+  {
-+    operands[3] = gen_highpart (SImode, operands[0]);
-+    operands[0] = gen_lowpart (SImode, operands[0]);
-+    operands[2] = gen_lowpart (SImode, operands[2]);
-+  }"
-+  [(set_attr "length" "8")
-+   (set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "type" "multiple")]
-+)
-+
- (define_insn_and_split "*anddi_notsesidi_di"
-   [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- 	(and:DI (not:DI (sign_extend:DI
-@@ -8906,7 +8920,7 @@
-     return \"\";
-   }"
-   [(set_attr "conds" "use")
--   (set_attr "type" "f_sel<vfp_type>")]
-+   (set_attr "type" "fcsel")]
- )
- 
- (define_insn_and_split "*movsicc_insn"
-@@ -9343,8 +9357,10 @@
-   "TARGET_32BIT"
-   "
-   {
--    if (!REG_P (XEXP (operands[0], 0))
--       && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF))
-+    if ((!REG_P (XEXP (operands[0], 0))
-+	 && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)
-+	|| (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
-+	    && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[0], 0)))))
-      XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0));
- 
-     if (operands[2] == NULL_RTX)
-@@ -9361,8 +9377,10 @@
-   "TARGET_32BIT"
-   "
-   {
--    if (!REG_P (XEXP (operands[1], 0)) &&
--       (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF))
-+    if ((!REG_P (XEXP (operands[1], 0))
-+	 && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)
-+	|| (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-+	    && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[1], 0)))))
-      XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0));
- 
-     if (operands[3] == NULL_RTX)
-@@ -9848,39 +9866,35 @@
- 
- ;; Patterns to allow combination of arithmetic, cond code and shifts
- 
--(define_insn "*arith_shiftsi"
--  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r")
--        (match_operator:SI 1 "shiftable_operator"
--          [(match_operator:SI 3 "shift_operator"
--             [(match_operand:SI 4 "s_register_operand" "r,r,r,r")
--              (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")])
--           (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))]
-+(define_insn "*<arith_shift_insn>_multsi"
-+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
-+	(shiftable_ops:SI
-+	 (mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
-+		  (match_operand:SI 3 "power_of_two_operand" ""))
-+	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>")))]
-   "TARGET_32BIT"
--  "%i1%?\\t%0, %2, %4%S3"
-+  "<arith_shift_insn>%?\\t%0, %1, %2, lsl %b3"
-   [(set_attr "predicable" "yes")
-    (set_attr "predicable_short_it" "no")
-    (set_attr "shift" "4")
--   (set_attr "arch" "a,t2,t2,a")
--   ;; Thumb2 doesn't allow the stack pointer to be used for 
--   ;; operand1 for all operations other than add and sub. In this case 
--   ;; the minus operation is a candidate for an rsub and hence needs
--   ;; to be disabled.
--   ;; We have to make sure to disable the fourth alternative if
--   ;; the shift_operator is MULT, since otherwise the insn will
--   ;; also match a multiply_accumulate pattern and validate_change
--   ;; will allow a replacement of the constant with a register
--   ;; despite the checks done in shift_operator.
--   (set_attr_alternative "insn_enabled"
--			 [(const_string "yes")
--			  (if_then_else
--			   (match_operand:SI 1 "add_operator" "")
--			   (const_string "yes") (const_string "no"))
--			  (const_string "yes")
--			  (if_then_else
--			   (match_operand:SI 3 "mult_operator" "")
--			   (const_string "no") (const_string "yes"))])
--   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")])
-+   (set_attr "arch" "a,t2")
-+   (set_attr "type" "alu_shift_imm")])
- 
-+(define_insn "*<arith_shift_insn>_shiftsi"
-+  [(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
-+	(shiftable_ops:SI
-+	 (match_operator:SI 2 "shift_nomul_operator"
-+	  [(match_operand:SI 3 "s_register_operand" "r,r,r")
-+	   (match_operand:SI 4 "shift_amount_operand" "M,M,r")])
-+	 (match_operand:SI 1 "s_register_operand" "rk,<t2_binop0>,rk")))]
-+  "TARGET_32BIT && GET_CODE (operands[3]) != MULT"
-+  "<arith_shift_insn>%?\\t%0, %1, %3%S2"
-+  [(set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-+   (set_attr "shift" "4")
-+   (set_attr "arch" "a,t2,a")
-+   (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")])
-+
- (define_split
-   [(set (match_operand:SI 0 "s_register_operand" "")
- 	(match_operator:SI 1 "shiftable_operator"
-@@ -12169,7 +12183,7 @@
-     int num_regs = XVECLEN (operands[0], 0);
-     char pattern[100];
-     rtx op_list[2];
--    strcpy (pattern, \"fldmfdd\\t\");
-+    strcpy (pattern, \"vldm\\t\");
-     strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]);
-     strcat (pattern, \"!, {\");
-     op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0);
-@@ -12373,6 +12387,7 @@
-   "TARGET_32BIT && arm_arch5"
-   "clz%?\\t%0, %1"
-   [(set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-    (set_attr "type" "clz")])
- 
- (define_insn "rbitsi2"
-@@ -12381,6 +12396,7 @@
-   "TARGET_32BIT && arm_arch_thumb2"
-   "rbit%?\\t%0, %1"
-   [(set_attr "predicable" "yes")
-+   (set_attr "predicable_short_it" "no")
-    (set_attr "type" "clz")])
- 
- (define_expand "ctzsi2"
-@@ -12556,6 +12572,8 @@
-    rev%?\t%0, %1"
-   [(set_attr "arch" "t1,t2,32")
-    (set_attr "length" "2,2,4")
-+   (set_attr "predicable" "no,yes,yes")
-+   (set_attr "predicable_short_it" "no")
-    (set_attr "type" "rev")]
- )
- 
-@@ -12673,6 +12691,44 @@
-    (set_attr "type" "rev")]
- )
- 
-+;; There are no canonicalisation rules for the position of the lshiftrt, ashift
-+;; operations within an IOR/AND RTX, therefore we have two patterns matching
-+;; each valid permutation.
-+
-+(define_insn "arm_rev16si2"
-+  [(set (match_operand:SI 0 "register_operand" "=l,l,r")
-+        (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r")
-+                                   (const_int 8))
-+                        (match_operand:SI 3 "const_int_operand" "n,n,n"))
-+                (and:SI (lshiftrt:SI (match_dup 1)
-+                                     (const_int 8))
-+                        (match_operand:SI 2 "const_int_operand" "n,n,n"))))]
-+  "arm_arch6
-+   && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
-+   && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
-+  "rev16\\t%0, %1"
-+  [(set_attr "arch" "t1,t2,32")
-+   (set_attr "length" "2,2,4")
-+   (set_attr "type" "rev")]
-+)
-+
-+(define_insn "arm_rev16si2_alt"
-+  [(set (match_operand:SI 0 "register_operand" "=l,l,r")
-+        (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r")
-+                                     (const_int 8))
-+                        (match_operand:SI 2 "const_int_operand" "n,n,n"))
-+                (and:SI (ashift:SI (match_dup 1)
-+                                   (const_int 8))
-+                        (match_operand:SI 3 "const_int_operand" "n,n,n"))))]
-+  "arm_arch6
-+   && aarch_rev16_shleft_mask_imm_p (operands[3], SImode)
-+   && aarch_rev16_shright_mask_imm_p (operands[2], SImode)"
-+  "rev16\\t%0, %1"
-+  [(set_attr "arch" "t1,t2,32")
-+   (set_attr "length" "2,2,4")
-+   (set_attr "type" "rev")]
-+)
-+
- (define_expand "bswaphi2"
-   [(set (match_operand:HI 0 "s_register_operand" "=r")
- 	(bswap:HI (match_operand:HI 1 "s_register_operand" "r")))]
---- a/src/gcc/config/arm/cortex-a5.md
-+++ b/src/gcc/config/arm/cortex-a5.md
-@@ -61,7 +61,7 @@
-        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                         alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        adr,bfm,rev,\
-+                        adr,bfm,clz,rbit,rev,\
-                         shift_imm,shift_reg,\
-                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
-                         mrs,multiple,no_insn"))
---- a/src/gcc/config/arm/cortex-a9.md
-+++ b/src/gcc/config/arm/cortex-a9.md
-@@ -83,7 +83,7 @@
-        (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
-                         alu_reg,alus_reg,logic_reg,logics_reg,\
-                         adc_imm,adcs_imm,adc_reg,adcs_reg,\
--                        adr,bfm,rev,\
-+                        adr,bfm,clz,rbit,rev,\
-                         shift_imm,shift_reg,\
-                         mov_imm,mov_reg,mvn_imm,mvn_reg,\
-                         mov_shift_reg,mov_shift,\
---- a/src/gcc/config/mips/mips.c
-+++ b/src/gcc/config/mips/mips.c
-@@ -7197,12 +7197,17 @@
-   emit_insn (gen_slt_sf (dest, fp2, fp1));
- }
- 
--/* Implement MOVE_BY_PIECES_P.  */
-+/* Implement TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P.  */
- 
- bool
--mips_move_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
-+mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-+				     unsigned int align,
-+				     enum by_pieces_operation op,
-+				     bool speed_p)
- {
--  if (HAVE_movmemsi)
-+  if (op == STORE_BY_PIECES)
-+    return mips_store_by_pieces_p (size, align);
-+  if (op == MOVE_BY_PIECES && HAVE_movmemsi)
-     {
-       /* movmemsi is meant to generate code that is at least as good as
- 	 move_by_pieces.  However, movmemsi effectively uses a by-pieces
-@@ -7219,13 +7224,12 @@
- 	return size < UNITS_PER_WORD;
-       return size <= MIPS_MAX_MOVE_BYTES_STRAIGHT;
-     }
--  /* The default value.  If this becomes a target hook, we should
--     call the default definition instead.  */
--  return (move_by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1)
--	  < (unsigned int) MOVE_RATIO (optimize_insn_for_speed_p ()));
-+
-+  return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
- }
- 
--/* Implement STORE_BY_PIECES_P.  */
-+/* Implement a handler for STORE_BY_PIECES operations
-+   for TARGET_USE_MOVE_BY_PIECES_INFRASTRUCTURE_P.  */
- 
- bool
- mips_store_by_pieces_p (unsigned HOST_WIDE_INT size, unsigned int align)
-@@ -19134,6 +19138,10 @@
- #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
- #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV mips_atomic_assign_expand_fenv
- 
-+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
-+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
-+  mips_use_by_pieces_infrastructure_p
-+
- struct gcc_target targetm = TARGET_INITIALIZER;
- 
- #include "gt-mips.h"
---- a/src/gcc/config/mips/mips.h
-+++ b/src/gcc/config/mips/mips.h
-@@ -2867,9 +2867,6 @@
-    ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
-    : MIPS_CALL_RATIO / 2)
- 
--#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
--  mips_move_by_pieces_p (SIZE, ALIGN)
--
- /* For CLEAR_RATIO, when optimizing for size, give a better estimate
-    of the length of a memset call, but use the default otherwise.  */
- 
-@@ -2882,9 +2879,6 @@
- 
- #define SET_RATIO(speed) \
-   ((speed) ? 15 : MIPS_CALL_RATIO - 2)
--
--#define STORE_BY_PIECES_P(SIZE, ALIGN) \
--  mips_store_by_pieces_p (SIZE, ALIGN)
- 
- /* Since the bits of the _init and _fini function is spread across
-    many object files, each potentially with its own GP, we must assume
---- a/src/gcc/params.def
-+++ b/src/gcc/params.def
-@@ -303,7 +303,7 @@
- DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
- 	"max-completely-peeled-insns",
- 	"The maximum number of insns of a completely peeled loop",
--	100, 0, 0)
-+	200, 0, 0)
- /* The maximum number of peelings of a single loop that is peeled completely.  */
- DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
- 	"max-completely-peel-times",
-@@ -1095,6 +1095,21 @@
- 	  "Maximum number of nested calls to search for control dependencies "
- 	  "during uninitialized variable analysis",
- 	  1000, 1, 0)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
-+	  "max-fsm-thread-path-insns",
-+	  "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path",
-+	  100, 1, 999999)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH,
-+	  "max-fsm-thread-length",
-+	  "Maximum number of basic blocks on a finite state automaton jump thread path",
-+	  10, 1, 999999)
-+
-+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS,
-+	  "max-fsm-thread-paths",
-+	  "Maximum number of new jump thread paths to create for a finite state automaton",
-+	  50, 1, 999999)
- /*
- 
- Local variables:
---- a/src/gcc/tree-ssa-threadedge.c
-+++ b/src/gcc/tree-ssa-threadedge.c
-@@ -617,6 +617,7 @@
-      rather than use a relational operator.  These are simpler to handle.  */
-   if (TREE_CODE (cond) == SSA_NAME)
-     {
-+      tree original_lhs = cond;
-       cached_lhs = cond;
- 
-       /* Get the variable's current value from the equivalence chains.
-@@ -638,6 +639,12 @@
- 	 pass specific callback to try and simplify it further.  */
-       if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
-         cached_lhs = (*simplify) (stmt, stmt);
-+
-+      /* We couldn't find an invariant.  But, callers of this
-+	 function may be able to do something useful with the
-+	 unmodified destination.  */
-+      if (!cached_lhs)
-+	cached_lhs = original_lhs;
-     }
-   else
-     cached_lhs = NULL;
-@@ -897,6 +904,248 @@
-   return false;
- }
- 
-+/* Return true if the CFG contains at least one path from START_BB to END_BB.
-+   When a path is found, record in PATH the blocks from END_BB to START_BB.
-+   VISITED_BBS is used to make sure we don't fall into an infinite loop.  Bound
-+   the recursion to basic blocks belonging to LOOP.  */
-+
-+static bool
-+fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
-+		      vec<basic_block, va_gc> *&path,
-+		      pointer_set_t *visited_bbs, loop_p loop)
-+{
-+  if (loop != start_bb->loop_father)
-+    return false;
-+
-+  if (start_bb == end_bb)
-+    {
-+      vec_safe_push (path, start_bb);
-+      return true;
-+    }
-+
-+  if (!pointer_set_insert (visited_bbs, start_bb))
-+    {
-+      edge e;
-+      edge_iterator ei;
-+      FOR_EACH_EDGE (e, ei, start_bb->succs)
-+	if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
-+	  {
-+	    vec_safe_push (path, start_bb);
-+	    return true;
-+	  }
-+    }
-+
-+  return false;
-+}
-+
-+static int max_threaded_paths;
-+
-+/* We trace the value of the variable EXPR back through any phi nodes looking
-+   for places where it gets a constant value and save the path.  Stop after
-+   having recorded MAX_PATHS jump threading paths.  */
-+
-+static void
-+fsm_find_control_statement_thread_paths (tree expr,
-+					 pointer_set_t *visited_phis,
-+					 vec<basic_block, va_gc> *&path)
-+{
-+  tree var = SSA_NAME_VAR (expr);
-+  gimple def_stmt = SSA_NAME_DEF_STMT (expr);
-+  basic_block var_bb = gimple_bb (def_stmt);
-+
-+  if (var == NULL || var_bb == NULL)
-+    return;
-+
-+  /* For the moment we assume that an SSA chain only contains phi nodes, and
-+     eventually one of the phi arguments will be an integer constant.  In the
-+     future, this could be extended to also handle simple assignments of
-+     arithmetic operations.  */
-+  if (gimple_code (def_stmt) != GIMPLE_PHI)
-+    return;
-+
-+  /* Avoid infinite recursion.  */
-+  if (pointer_set_insert (visited_phis, def_stmt))
-+    return;
-+
-+  int next_path_length = 0;
-+  basic_block last_bb_in_path = path->last ();
-+
-+  /* Following the chain of SSA_NAME definitions, we jumped from a definition in
-+     LAST_BB_IN_PATH to a definition in VAR_BB.  When these basic blocks are
-+     different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB.  */
-+  if (var_bb != last_bb_in_path)
-+    {
-+      edge e;
-+      int e_count = 0;
-+      edge_iterator ei;
-+      vec<basic_block, va_gc> *next_path;
-+      vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
-+
-+      FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
-+	{
-+	  pointer_set_t *visited_bbs = pointer_set_create ();
-+
-+	  if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs,
-+				    e->src->loop_father))
-+	    ++e_count;
-+
-+	  pointer_set_destroy (visited_bbs);
-+
-+	  /* If there is more than one path, stop.  */
-+	  if (e_count > 1)
-+	    {
-+	      vec_free (next_path);
-+	      return;
-+	    }
-+	}
-+
-+      /* Stop if we have not found a path: this could occur when the recursion
-+	 is stopped by one of the bounds.  */
-+      if (e_count == 0)
-+	{
-+	  vec_free (next_path);
-+	  return;
-+	}
-+
-+      /* Append all the nodes from NEXT_PATH to PATH.  */
-+      vec_safe_splice (path, next_path);
-+      next_path_length = next_path->length ();
-+      vec_free (next_path);
-+    }
-+
-+  gcc_assert (path->last () == var_bb);
-+
-+  /* Iterate over the arguments of PHI.  */
-+  unsigned int i;
-+  for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
-+    {
-+      tree arg = gimple_phi_arg_def (def_stmt, i);
-+      basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src;
-+
-+      /* Skip edges pointing outside the current loop.  */
-+      if (!arg || var_bb->loop_father != bbi->loop_father)
-+	continue;
-+
-+      if (TREE_CODE (arg) == SSA_NAME)
-+	{
-+	  vec_safe_push (path, bbi);
-+	  /* Recursively follow SSA_NAMEs looking for a constant definition.  */
-+	  fsm_find_control_statement_thread_paths (arg, visited_phis, path);
-+	  path->pop ();
-+	  continue;
-+	}
-+
-+      if (TREE_CODE (arg) != INTEGER_CST)
-+	continue;
-+
-+      int path_length = path->length ();
-+      /* A path with less than 2 basic blocks should not be jump-threaded.  */
-+      if (path_length < 2)
-+	continue;
-+
-+      if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
-+	{
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    fprintf (dump_file, "FSM jump-thread path not considered: "
-+		     "the number of basic blocks on the path "
-+		     "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
-+	  continue;
-+	}
-+
-+      if (max_threaded_paths <= 0)
-+	{
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    fprintf (dump_file, "FSM jump-thread path not considered: "
-+		     "the number of previously recorded FSM paths to thread "
-+		     "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
-+	  continue;
-+	}
-+
-+      /* Add BBI to the path.  */
-+      vec_safe_push (path, bbi);
-+      ++path_length;
-+
-+      int n_insns = 0;
-+      gimple_stmt_iterator gsi;
-+      int j;
-+      loop_p loop = (*path)[0]->loop_father;
-+      bool path_crosses_loops = false;
-+
-+      /* Count the number of instructions on the path: as these instructions
-+	 will have to be duplicated, we will not record the path if there are
-+	 too many instructions on the path.  Also check that all the blocks in
-+	 the path belong to a single loop.  */
-+      for (j = 1; j < path_length - 1; j++)
-+	{
-+	  basic_block bb = (*path)[j];
-+
-+	  if (bb->loop_father != loop)
-+	    {
-+	      path_crosses_loops = true;
-+	      break;
-+	    }
-+
-+	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-+	    {
-+	      gimple stmt = gsi_stmt (gsi);
-+	      /* Do not count empty statements and labels.  */
-+	      if (gimple_code (stmt) != GIMPLE_NOP
-+		  && gimple_code (stmt) != GIMPLE_LABEL
-+		  && !is_gimple_debug (stmt))
-+		++n_insns;
-+	    }
-+	}
-+
-+      if (path_crosses_loops)
-+	{
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    fprintf (dump_file, "FSM jump-thread path not considered: "
-+		     "the path crosses loops.\n");
-+	  path->pop ();
-+	  continue;
-+	}
-+
-+      if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
-+	{
-+	  if (dump_file && (dump_flags & TDF_DETAILS))
-+	    fprintf (dump_file, "FSM jump-thread path not considered: "
-+		     "the number of instructions on the path "
-+		     "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
-+	  path->pop ();
-+	  continue;
-+	}
-+
-+      vec<jump_thread_edge *> *jump_thread_path
-+	= new vec<jump_thread_edge *> ();
-+
-+      /* Record the edges between the blocks in PATH.  */
-+      for (j = 0; j < path_length - 1; j++)
-+	{
-+	  edge e = find_edge ((*path)[path_length - j - 1],
-+			      (*path)[path_length - j - 2]);
-+	  gcc_assert (e);
-+	  jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
-+	  jump_thread_path->safe_push (x);
-+	}
-+
-+      /* Add the edge taken when the control variable has value ARG.  */
-+      edge taken_edge = find_taken_edge ((*path)[0], arg);
-+      jump_thread_edge *x
-+	= new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
-+      jump_thread_path->safe_push (x);
-+
-+      register_jump_thread (jump_thread_path);
-+      --max_threaded_paths;
-+
-+      /* Remove BBI from the path.  */
-+      path->pop ();
-+    }
-+
-+  /* Remove all the nodes that we added from NEXT_PATH.  */
-+  if (next_path_length)
-+    vec_safe_truncate (path, (path->length () - next_path_length));
-+}
-+
- /* We are exiting E->src, see if E->dest ends with a conditional
-    jump which has a known value when reached via E.
- 
-@@ -982,7 +1231,10 @@
-       cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
- 					      handle_dominating_asserts);
- 
--      if (cond && is_gimple_min_invariant (cond))
-+      if (!cond)
-+	return 0;
-+
-+      if (is_gimple_min_invariant (cond))
- 	{
- 	  edge taken_edge = find_taken_edge (e->dest, cond);
- 	  basic_block dest = (taken_edge ? taken_edge->dest : NULL);
-@@ -1028,6 +1280,27 @@
- 				      backedge_seen_p);
- 	  return 1;
- 	}
-+
-+      if (!flag_expensive_optimizations
-+	  || optimize_function_for_size_p (cfun)
-+	  || TREE_CODE (cond) != SSA_NAME
-+	  || e->dest->loop_father != e->src->loop_father
-+	  || loop_depth (e->dest->loop_father) == 0)
-+	return 0;
-+
-+      /* When COND cannot be simplified, try to find paths from a control
-+	 statement back through the PHI nodes which would affect that control
-+	 statement.  */
-+      vec<basic_block, va_gc> *bb_path;
-+      vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
-+      vec_safe_push (bb_path, e->dest);
-+      pointer_set_t *visited_phis = pointer_set_create ();
-+
-+      max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
-+      fsm_find_control_statement_thread_paths (cond, visited_phis, bb_path);
-+
-+      pointer_set_destroy (visited_phis);
-+      vec_free (bb_path);
-     }
-   return 0;
- }
---- a/src/gcc/convert.c
-+++ b/src/gcc/convert.c
-@@ -471,8 +471,8 @@
- 	  break;
- 
- 	CASE_FLT_FN (BUILT_IN_ROUND):
--	  /* Only convert in ISO C99 mode.  */
--	  if (!targetm.libc_has_function (function_c99_misc))
-+	  /* Only convert in ISO C99 mode and with -fno-math-errno.  */
-+	  if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
- 	    break;
- 	  if (outprec < TYPE_PRECISION (integer_type_node)
- 	      || (outprec == TYPE_PRECISION (integer_type_node)
-@@ -492,8 +492,8 @@
- 	    break;
- 	  /* ... Fall through ...  */
- 	CASE_FLT_FN (BUILT_IN_RINT):
--	  /* Only convert in ISO C99 mode.  */
--	  if (!targetm.libc_has_function (function_c99_misc))
-+	  /* Only convert in ISO C99 mode and with -fno-math-errno.  */
-+	  if (!targetm.libc_has_function (function_c99_misc) || flag_errno_math)
- 	    break;
- 	  if (outprec < TYPE_PRECISION (integer_type_node)
- 	      || (outprec == TYPE_PRECISION (integer_type_node)
---- a/src/libobjc/ChangeLog.linaro
-+++ b/src/libobjc/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libvtv/ChangeLog.linaro
-+++ b/src/libvtv/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libgfortran/configure
-+++ b/src/libgfortran/configure
-@@ -25941,7 +25941,7 @@
- # test is copied from libgomp, and modified to not link in -lrt as
- # libgfortran calls clock_gettime via a weak reference if it's found
- # in librt.
--if test $ac_cv_func_clock_gettime = no; then
-+if test "$ac_cv_func_clock_gettime" = no; then
-   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5
- $as_echo_n "checking for clock_gettime in -lrt... " >&6; }
- if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then :
---- a/src/libgfortran/configure.ac
-+++ b/src/libgfortran/configure.ac
-@@ -511,7 +511,7 @@
- # test is copied from libgomp, and modified to not link in -lrt as
- # libgfortran calls clock_gettime via a weak reference if it's found
- # in librt.
--if test $ac_cv_func_clock_gettime = no; then
-+if test "$ac_cv_func_clock_gettime" = no; then
-   AC_CHECK_LIB(rt, clock_gettime,
-     [AC_DEFINE(HAVE_CLOCK_GETTIME_LIBRT, 1,
-                [Define to 1 if you have the `clock_gettime' function in librt.])])
---- a/src/libgfortran/ChangeLog.linaro
-+++ b/src/libgfortran/ChangeLog.linaro
-@@ -0,0 +1,59 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	Backport from trunk r209747.
-+	2014-04-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
-+
-+	* configure.ac: Quote usage of ac_cv_func_clock_gettime in if test.
-+	* configure: Regenerate.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libada/ChangeLog.linaro
-+++ b/src/libada/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libffi/ChangeLog.linaro
-+++ b/src/libffi/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libssp/ChangeLog.linaro
-+++ b/src/libssp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libcilkrts/ChangeLog.linaro
-+++ b/src/libcilkrts/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libcpp/ChangeLog.linaro
-+++ b/src/libcpp/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/libcpp/po/ChangeLog.linaro
-+++ b/src/libcpp/po/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.
---- a/src/fixincludes/ChangeLog.linaro
-+++ b/src/fixincludes/ChangeLog.linaro
-@@ -0,0 +1,51 @@
-+2015-01-15  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2015.01 released.
-+
-+2014-12-11  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.12 released.
-+
-+2014-11-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.11 released.
-+
-+2014-10-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10-1 released.
-+
-+2014-10-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.10 released.
-+
-+2014-09-10  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.09 released.
-+
-+2014-08-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.08 released.
-+
-+2014-07-24  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07-1 released.
-+
-+2014-07-17  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.07 released.
-+
-+2014-06-25  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06-1 released.
-+
-+2014-06-12  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.06 released.
-+
-+2014-05-14  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.05 released.
-+
-+2014-04-22  Yvan Roux  <yvan.roux@linaro.org>
-+
-+	GCC Linaro 4.9-2014.04 released.